123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- 'use strict';
- var __importDefault =
- (this && this.__importDefault) ||
- function (mod) {
- return mod && mod.__esModule
- ? mod
- : {
- default: mod
- };
- };
- Object.defineProperty(exports, '__esModule', {
- value: true
- });
- exports.decodeXML =
- exports.decodeHTMLStrict =
- exports.decodeHTML =
- exports.determineBranch =
- exports.BinTrieFlags =
- exports.fromCodePoint =
- exports.replaceCodePoint =
- exports.decodeCodePoint =
- exports.xmlDecodeTree =
- exports.htmlDecodeTree =
- void 0;
- var decode_data_html_js_1 = __importDefault(require('./generated/decode-data-html.js'));
- exports.htmlDecodeTree = decode_data_html_js_1.default;
- var decode_data_xml_js_1 = __importDefault(require('./generated/decode-data-xml.js'));
- exports.xmlDecodeTree = decode_data_xml_js_1.default;
- var decode_codepoint_js_1 = __importDefault(require('./decode_codepoint.js'));
- exports.decodeCodePoint = decode_codepoint_js_1.default;
- var decode_codepoint_js_2 = require('./decode_codepoint.js');
- Object.defineProperty(exports, 'replaceCodePoint', {
- enumerable: true,
- get: function () {
- return decode_codepoint_js_2.replaceCodePoint;
- }
- });
- Object.defineProperty(exports, 'fromCodePoint', {
- enumerable: true,
- get: function () {
- return decode_codepoint_js_2.fromCodePoint;
- }
- });
- var CharCodes;
- (function (CharCodes) {
- CharCodes[(CharCodes['NUM'] = 35)] = 'NUM';
- CharCodes[(CharCodes['SEMI'] = 59)] = 'SEMI';
- CharCodes[(CharCodes['ZERO'] = 48)] = 'ZERO';
- CharCodes[(CharCodes['NINE'] = 57)] = 'NINE';
- CharCodes[(CharCodes['LOWER_A'] = 97)] = 'LOWER_A';
- CharCodes[(CharCodes['LOWER_F'] = 102)] = 'LOWER_F';
- CharCodes[(CharCodes['LOWER_X'] = 120)] = 'LOWER_X';
- /** Bit that needs to be set to convert an upper case ASCII character to lower case */
- CharCodes[(CharCodes['To_LOWER_BIT'] = 32)] = 'To_LOWER_BIT';
- })(CharCodes || (CharCodes = {}));
- var BinTrieFlags;
- (function (BinTrieFlags) {
- BinTrieFlags[(BinTrieFlags['VALUE_LENGTH'] = 49152)] = 'VALUE_LENGTH';
- BinTrieFlags[(BinTrieFlags['BRANCH_LENGTH'] = 16256)] = 'BRANCH_LENGTH';
- BinTrieFlags[(BinTrieFlags['JUMP_TABLE'] = 127)] = 'JUMP_TABLE';
- })((BinTrieFlags = exports.BinTrieFlags || (exports.BinTrieFlags = {})));
- function getDecoder(decodeTree) {
- return function decodeHTMLBinary(str, strict) {
- var ret = '';
- var lastIdx = 0;
- var strIdx = 0;
- while ((strIdx = str.indexOf('&', strIdx)) >= 0) {
- ret += str.slice(lastIdx, strIdx);
- lastIdx = strIdx;
- // Skip the "&"
- strIdx += 1;
- // If we have a numeric entity, handle this separately.
- if (str.charCodeAt(strIdx) === CharCodes.NUM) {
- // Skip the leading "&#". For hex entities, also skip the leading "x".
- var start = strIdx + 1;
- var base = 10;
- var cp = str.charCodeAt(start);
- if ((cp | CharCodes.To_LOWER_BIT) === CharCodes.LOWER_X) {
- base = 16;
- strIdx += 1;
- start += 1;
- }
- do cp = str.charCodeAt(++strIdx);
- while (
- (cp >= CharCodes.ZERO && cp <= CharCodes.NINE) ||
- (base === 16 && (cp | CharCodes.To_LOWER_BIT) >= CharCodes.LOWER_A && (cp | CharCodes.To_LOWER_BIT) <= CharCodes.LOWER_F)
- );
- if (start !== strIdx) {
- var entity = str.substring(start, strIdx);
- var parsed = parseInt(entity, base);
- if (str.charCodeAt(strIdx) === CharCodes.SEMI) {
- strIdx += 1;
- } else if (strict) {
- continue;
- }
- ret += (0, decode_codepoint_js_1.default)(parsed);
- lastIdx = strIdx;
- }
- continue;
- }
- var resultIdx = 0;
- var excess = 1;
- var treeIdx = 0;
- var current = decodeTree[treeIdx];
- for (; strIdx < str.length; strIdx++, excess++) {
- treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx));
- if (treeIdx < 0) {
- break;
- }
- current = decodeTree[treeIdx];
- var masked = current & BinTrieFlags.VALUE_LENGTH;
- // If the branch is a value, store it and continue
- if (masked) {
- // If we have a legacy entity while parsing strictly, just skip the number of bytes
- if (!strict || str.charCodeAt(strIdx) === CharCodes.SEMI) {
- resultIdx = treeIdx;
- excess = 0;
- }
- // The mask is the number of bytes of the value, including the current byte.
- var valueLength = (masked >> 14) - 1;
- if (valueLength === 0) {
- break;
- }
- treeIdx += valueLength;
- }
- }
- if (resultIdx !== 0) {
- var valueLength = (decodeTree[resultIdx] & BinTrieFlags.VALUE_LENGTH) >> 14;
- ret +=
- valueLength === 1
- ? String.fromCharCode(decodeTree[resultIdx] & ~BinTrieFlags.VALUE_LENGTH)
- : valueLength === 2
- ? String.fromCharCode(decodeTree[resultIdx + 1])
- : String.fromCharCode(decodeTree[resultIdx + 1], decodeTree[resultIdx + 2]);
- lastIdx = strIdx - excess + 1;
- }
- }
- return ret + str.slice(lastIdx);
- };
- }
- function determineBranch(decodeTree, current, nodeIdx, char) {
- var branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
- var jumpOffset = current & BinTrieFlags.JUMP_TABLE;
- // Case 1: Single branch encoded in jump offset
- if (branchCount === 0) {
- return jumpOffset !== 0 && char === jumpOffset ? nodeIdx : -1;
- }
- // Case 2: Multiple branches encoded in jump table
- if (jumpOffset) {
- var value = char - jumpOffset;
- return value < 0 || value >= branchCount ? -1 : decodeTree[nodeIdx + value] - 1;
- }
- // Case 3: Multiple branches encoded in dictionary
- // Binary search for the character.
- var lo = nodeIdx;
- var hi = lo + branchCount - 1;
- while (lo <= hi) {
- var mid = (lo + hi) >>> 1;
- var midVal = decodeTree[mid];
- if (midVal < char) {
- lo = mid + 1;
- } else if (midVal > char) {
- hi = mid - 1;
- } else {
- return decodeTree[mid + branchCount];
- }
- }
- return -1;
- }
- exports.determineBranch = determineBranch;
- var htmlDecoder = getDecoder(decode_data_html_js_1.default);
- var xmlDecoder = getDecoder(decode_data_xml_js_1.default);
- /**
- * Decodes an HTML string, allowing for entities not terminated by a semi-colon.
- *
- * @param str The string to decode.
- * @returns The decoded string.
- */
- function decodeHTML(str) {
- return htmlDecoder(str, false);
- }
- exports.decodeHTML = decodeHTML;
- /**
- * Decodes an HTML string, requiring all entities to be terminated by a semi-colon.
- *
- * @param str The string to decode.
- * @returns The decoded string.
- */
- function decodeHTMLStrict(str) {
- return htmlDecoder(str, true);
- }
- exports.decodeHTMLStrict = decodeHTMLStrict;
- /**
- * Decodes an XML string, requiring all entities to be terminated by a semi-colon.
- *
- * @param str The string to decode.
- * @returns The decoded string.
- */
- function decodeXML(str) {
- return xmlDecoder(str, true);
- }
- exports.decodeXML = decodeXML;
- //# sourceMappingURL=decode.js.map
|