'use strict'; var __importDefault = (this && this.__importDefault) || function (mod) { return mod && mod.__esModule ? mod : { default: mod }; }; Object.defineProperty(exports, '__esModule', { value: true }); exports.decodeXML = exports.decodeHTMLStrict = exports.decodeHTML = exports.determineBranch = exports.BinTrieFlags = exports.fromCodePoint = exports.replaceCodePoint = exports.decodeCodePoint = exports.xmlDecodeTree = exports.htmlDecodeTree = void 0; var decode_data_html_js_1 = __importDefault(require('./generated/decode-data-html.js')); exports.htmlDecodeTree = decode_data_html_js_1.default; var decode_data_xml_js_1 = __importDefault(require('./generated/decode-data-xml.js')); exports.xmlDecodeTree = decode_data_xml_js_1.default; var decode_codepoint_js_1 = __importDefault(require('./decode_codepoint.js')); exports.decodeCodePoint = decode_codepoint_js_1.default; var decode_codepoint_js_2 = require('./decode_codepoint.js'); Object.defineProperty(exports, 'replaceCodePoint', { enumerable: true, get: function () { return decode_codepoint_js_2.replaceCodePoint; } }); Object.defineProperty(exports, 'fromCodePoint', { enumerable: true, get: function () { return decode_codepoint_js_2.fromCodePoint; } }); var CharCodes; (function (CharCodes) { CharCodes[(CharCodes['NUM'] = 35)] = 'NUM'; CharCodes[(CharCodes['SEMI'] = 59)] = 'SEMI'; CharCodes[(CharCodes['ZERO'] = 48)] = 'ZERO'; CharCodes[(CharCodes['NINE'] = 57)] = 'NINE'; CharCodes[(CharCodes['LOWER_A'] = 97)] = 'LOWER_A'; CharCodes[(CharCodes['LOWER_F'] = 102)] = 'LOWER_F'; CharCodes[(CharCodes['LOWER_X'] = 120)] = 'LOWER_X'; /** Bit that needs to be set to convert an upper case ASCII character to lower case */ CharCodes[(CharCodes['To_LOWER_BIT'] = 32)] = 'To_LOWER_BIT'; })(CharCodes || (CharCodes = {})); var BinTrieFlags; (function (BinTrieFlags) { BinTrieFlags[(BinTrieFlags['VALUE_LENGTH'] = 49152)] = 'VALUE_LENGTH'; BinTrieFlags[(BinTrieFlags['BRANCH_LENGTH'] = 16256)] = 'BRANCH_LENGTH'; BinTrieFlags[(BinTrieFlags['JUMP_TABLE'] = 127)] = 'JUMP_TABLE'; })((BinTrieFlags = exports.BinTrieFlags || (exports.BinTrieFlags = {}))); function getDecoder(decodeTree) { return function decodeHTMLBinary(str, strict) { var ret = ''; var lastIdx = 0; var strIdx = 0; while ((strIdx = str.indexOf('&', strIdx)) >= 0) { ret += str.slice(lastIdx, strIdx); lastIdx = strIdx; // Skip the "&" strIdx += 1; // If we have a numeric entity, handle this separately. if (str.charCodeAt(strIdx) === CharCodes.NUM) { // Skip the leading "&#". For hex entities, also skip the leading "x". var start = strIdx + 1; var base = 10; var cp = str.charCodeAt(start); if ((cp | CharCodes.To_LOWER_BIT) === CharCodes.LOWER_X) { base = 16; strIdx += 1; start += 1; } do cp = str.charCodeAt(++strIdx); while ( (cp >= CharCodes.ZERO && cp <= CharCodes.NINE) || (base === 16 && (cp | CharCodes.To_LOWER_BIT) >= CharCodes.LOWER_A && (cp | CharCodes.To_LOWER_BIT) <= CharCodes.LOWER_F) ); if (start !== strIdx) { var entity = str.substring(start, strIdx); var parsed = parseInt(entity, base); if (str.charCodeAt(strIdx) === CharCodes.SEMI) { strIdx += 1; } else if (strict) { continue; } ret += (0, decode_codepoint_js_1.default)(parsed); lastIdx = strIdx; } continue; } var resultIdx = 0; var excess = 1; var treeIdx = 0; var current = decodeTree[treeIdx]; for (; strIdx < str.length; strIdx++, excess++) { treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx)); if (treeIdx < 0) { break; } current = decodeTree[treeIdx]; var masked = current & BinTrieFlags.VALUE_LENGTH; // If the branch is a value, store it and continue if (masked) { // If we have a legacy entity while parsing strictly, just skip the number of bytes if (!strict || str.charCodeAt(strIdx) === CharCodes.SEMI) { resultIdx = treeIdx; excess = 0; } // The mask is the number of bytes of the value, including the current byte. var valueLength = (masked >> 14) - 1; if (valueLength === 0) { break; } treeIdx += valueLength; } } if (resultIdx !== 0) { var valueLength = (decodeTree[resultIdx] & BinTrieFlags.VALUE_LENGTH) >> 14; ret += valueLength === 1 ? String.fromCharCode(decodeTree[resultIdx] & ~BinTrieFlags.VALUE_LENGTH) : valueLength === 2 ? String.fromCharCode(decodeTree[resultIdx + 1]) : String.fromCharCode(decodeTree[resultIdx + 1], decodeTree[resultIdx + 2]); lastIdx = strIdx - excess + 1; } } return ret + str.slice(lastIdx); }; } function determineBranch(decodeTree, current, nodeIdx, char) { var branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; var jumpOffset = current & BinTrieFlags.JUMP_TABLE; // Case 1: Single branch encoded in jump offset if (branchCount === 0) { return jumpOffset !== 0 && char === jumpOffset ? nodeIdx : -1; } // Case 2: Multiple branches encoded in jump table if (jumpOffset) { var value = char - jumpOffset; return value < 0 || value >= branchCount ? -1 : decodeTree[nodeIdx + value] - 1; } // Case 3: Multiple branches encoded in dictionary // Binary search for the character. var lo = nodeIdx; var hi = lo + branchCount - 1; while (lo <= hi) { var mid = (lo + hi) >>> 1; var midVal = decodeTree[mid]; if (midVal < char) { lo = mid + 1; } else if (midVal > char) { hi = mid - 1; } else { return decodeTree[mid + branchCount]; } } return -1; } exports.determineBranch = determineBranch; var htmlDecoder = getDecoder(decode_data_html_js_1.default); var xmlDecoder = getDecoder(decode_data_xml_js_1.default); /** * Decodes an HTML string, allowing for entities not terminated by a semi-colon. * * @param str The string to decode. * @returns The decoded string. */ function decodeHTML(str) { return htmlDecoder(str, false); } exports.decodeHTML = decodeHTML; /** * Decodes an HTML string, requiring all entities to be terminated by a semi-colon. * * @param str The string to decode. * @returns The decoded string. */ function decodeHTMLStrict(str) { return htmlDecoder(str, true); } exports.decodeHTMLStrict = decodeHTMLStrict; /** * Decodes an XML string, requiring all entities to be terminated by a semi-colon. * * @param str The string to decode. * @returns The decoded string. */ function decodeXML(str) { return xmlDecoder(str, true); } exports.decodeXML = decodeXML; //# sourceMappingURL=decode.js.map