Skip to content

Commit 169fbf9

Browse files
committed
use @nodable/entities to replace entities
1 parent 5d8a891 commit 169fbf9

8 files changed

Lines changed: 51 additions & 124 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
Note: Due to some last quick changes on v4, detail of v4.5.3 & v4.5.4 are not updated here. v4.5.4x is the last tag of v4 in github repository. I'm extremely sorry for the confusion
44

5+
**5.5.13** (not released yet)
6+
- fix: entity replacement for numeric entities
7+
- use @nodable/entities to replace entities
8+
59
**5.5.12 / 2026-04-13**
610
- Performance Improvement: update path-expression-matcher
711
- use proxy pattern than Proxy class

package-lock.json

Lines changed: 19 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,9 @@
8787
}
8888
],
8989
"dependencies": {
90+
"@nodable/entities": "^1.0.1",
9091
"fast-xml-builder": "^1.1.4",
9192
"path-expression-matcher": "^1.5.0",
9293
"strnum": "^2.2.3"
9394
}
94-
}
95+
}

spec/entities_security_spec.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ describe("XMLParser entity expansion security", function () {
7272

7373
expect(function () {
7474
parser.parse(xmlData);
75-
}).toThrowError(/Entity expansion limit exceeded/);
75+
}).toThrowError("[EntityReplacer] Entity expansion count limit exceeded: 1500 > 1000");
7676
});
7777

7878
it("should allow expansions within limit", function () {
@@ -109,7 +109,7 @@ describe("XMLParser entity expansion security", function () {
109109

110110
expect(function () {
111111
parser.parse(xmlData);
112-
}).toThrowError(/Entity expansion limit exceeded/);
112+
}).toThrowError("[EntityReplacer] Entity expansion count limit exceeded: 1200 > 1000");
113113
});
114114
});
115115

@@ -132,7 +132,7 @@ describe("XMLParser entity expansion security", function () {
132132

133133
expect(function () {
134134
parser.parse(xmlData);
135-
}).toThrowError(/Total expanded content size exceeded/);
135+
}).toThrowError("[EntityReplacer] Expanded content length limit exceeded: 149250 > 100000");
136136
});
137137

138138
it("should allow expansions within maxExpandedLength", function () {
@@ -188,7 +188,7 @@ describe("XMLParser entity expansion security", function () {
188188

189189
expect(function () {
190190
parser.parse(xmlData);
191-
}).toThrowError(/Entity expansion limit exceeded/);
191+
}).toThrowError("[EntityReplacer] Entity expansion count limit exceeded: 5000 > 1000");
192192
});
193193

194194
it("should prevent billion laughs with maxExpandedLength", function () {
@@ -205,7 +205,7 @@ describe("XMLParser entity expansion security", function () {
205205

206206
expect(function () {
207207
parser.parse(xmlData);
208-
}).toThrowError(/Total expanded content size exceeded/);
208+
}).toThrowError("[EntityReplacer] Expanded content length limit exceeded: 199000 > 100000");
209209
});
210210
});
211211

@@ -407,7 +407,7 @@ describe("XMLParser entity expansion security", function () {
407407

408408
expect(function () {
409409
parser.parse(xmlData);
410-
}).toThrowError(/Total expanded content size exceeded/);
410+
}).toThrowError("[EntityReplacer] Expanded content length limit exceeded: 19400 > 10000");
411411
});
412412
});
413413

spec/html_spec.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,6 @@ describe("XMLParser", function () {
9696
expect(function () {
9797
const result = parser.parse(xmlData);
9898
console.log(JSON.stringify(result, null, 4));
99-
}).toThrowError(/Entity expansion limit exceeded: 30 > 20/);
99+
}).toThrowError("[EntityReplacer] Entity expansion count limit exceeded: 30 > 20");
100100
});
101101
});

src/xmlparser/OrderedObjParser.js

Lines changed: 13 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import toNumber from "strnum";
88
import getIgnoreAttributesFn from "../ignoreAttributes.js";
99
import { Expression, Matcher } from 'path-expression-matcher';
1010
import { ExpressionSet } from 'path-expression-matcher';
11+
import EntityReplacer, { COMMON_HTML, NUMERIC_ENTITIES } from '@nodable/entities';
1112

1213
// const regx =
1314
// '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
@@ -72,32 +73,6 @@ export default class OrderedObjParser {
7273
this.options = options;
7374
this.currentNode = null;
7475
this.tagsNodeStack = [];
75-
this.docTypeEntities = {};
76-
this.lastEntities = {
77-
"apos": { regex: /&(apos|#39|#x27);/g, val: "'" },
78-
"gt": { regex: /&(gt|#62|#x3E);/g, val: ">" },
79-
"lt": { regex: /&(lt|#60|#x3C);/g, val: "<" },
80-
"quot": { regex: /&(quot|#34|#x22);/g, val: "\"" },
81-
};
82-
this.ampEntity = { regex: /&(amp|#38|#x26);/g, val: "&" };
83-
this.htmlEntities = {
84-
"space": { regex: /&(nbsp|#160);/g, val: " " },
85-
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
86-
// "gt" : { regex: /&(gt|#62);/g, val: ">" },
87-
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
88-
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
89-
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
90-
"cent": { regex: /&(cent|#162);/g, val: "¢" },
91-
"pound": { regex: /&(pound|#163);/g, val: "£" },
92-
"yen": { regex: /&(yen|#165);/g, val: "¥" },
93-
"euro": { regex: /&(euro|#8364);/g, val: "€" },
94-
"copyright": { regex: /&(copy|#169);/g, val: "©" },
95-
"reg": { regex: /&(reg|#174);/g, val: "®" },
96-
"inr": { regex: /&(inr|#8377);/g, val: "₹" },
97-
"num_dec": { regex: /&#([0-9]{1,7});/g, val: (_, str) => fromCodePoint(str, 10, "&#") },
98-
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val: (_, str) => fromCodePoint(str, 16, "&#x") },
99-
};
100-
this.addExternalEntities = addExternalEntities;
10176
this.parseXml = parseXml;
10277
this.parseTextData = parseTextData;
10378
this.resolveNameSpace = resolveNameSpace;
@@ -111,6 +86,16 @@ export default class OrderedObjParser {
11186
this.entityExpansionCount = 0;
11287
this.currentExpandedLength = 0;
11388

89+
this.entityReplacer = new EntityReplacer({
90+
default: true,
91+
// amp: true,
92+
system: this.options.htmlEntities ? { ...COMMON_HTML, ...NUMERIC_ENTITIES } : {},
93+
maxTotalExpansions: this.options.processEntities.maxTotalExpansions,
94+
maxExpandedLength: this.options.processEntities.maxExpandedLength,
95+
applyLimitsTo: "all",
96+
//postCheck: resolved => resolved
97+
});
98+
11499
// Initialize path matcher for path-expression-matcher
115100
this.matcher = new Matcher();
116101

@@ -141,17 +126,6 @@ export default class OrderedObjParser {
141126

142127
}
143128

144-
function addExternalEntities(externalEntities) {
145-
const entKeys = Object.keys(externalEntities);
146-
for (let i = 0; i < entKeys.length; i++) {
147-
const ent = entKeys[i];
148-
const escaped = ent.replace(/[.\-+*:]/g, '\\.');
149-
this.lastEntities[ent] = {
150-
regex: new RegExp("&" + escaped + ";", "g"),
151-
val: externalEntities[ent]
152-
}
153-
}
154-
}
155129

156130
/**
157131
* @param {string} val
@@ -308,9 +282,6 @@ const parseXml = function (xmlData) {
308282
// Reset entity expansion counters for this document
309283
this.entityExpansionCount = 0;
310284
this.currentExpandedLength = 0;
311-
this.docTypeEntitiesKeys = [];
312-
this.lastEntitiesKeys = Object.keys(this.lastEntities);
313-
this.htmlEntitiesKeys = this.options.htmlEntities ? Object.keys(this.htmlEntities) : [];
314285
const options = this.options;
315286
const docTypeReader = new DocTypeReader(options.processEntities);
316287
const xmlLen = xmlData.length;
@@ -390,8 +361,7 @@ const parseXml = function (xmlData) {
390361
} else if (c1 === 33
391362
&& xmlData.charCodeAt(i + 2) === 68) { //'!D'
392363
const result = docTypeReader.readDocType(xmlData, i);
393-
this.docTypeEntities = result.entities;
394-
this.docTypeEntitiesKeys = Object.keys(this.docTypeEntities) || []
364+
this.entityReplacer.addInputEntities(result.entities);
395365
i = result.i;
396366
} else if (c1 === 33
397367
&& xmlData.charCodeAt(i + 2) === 91) { // '!['
@@ -632,78 +602,7 @@ function replaceEntitiesValue(val, tagName, jPath) {
632602
}
633603
}
634604

635-
// Replace DOCTYPE entities
636-
for (const entityName of this.docTypeEntitiesKeys) {
637-
const entity = this.docTypeEntities[entityName];
638-
const matches = val.match(entity.regx);
639-
640-
if (matches) {
641-
// Track expansions
642-
this.entityExpansionCount += matches.length;
643-
644-
// Check expansion limit
645-
if (entityConfig.maxTotalExpansions &&
646-
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
647-
throw new Error(
648-
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
649-
);
650-
}
651-
652-
// Store length before replacement
653-
const lengthBefore = val.length;
654-
val = val.replace(entity.regx, entity.val);
655-
656-
// Check expanded length immediately after replacement
657-
if (entityConfig.maxExpandedLength) {
658-
this.currentExpandedLength += (val.length - lengthBefore);
659-
660-
if (this.currentExpandedLength > entityConfig.maxExpandedLength) {
661-
throw new Error(
662-
`Total expanded content size exceeded: ${this.currentExpandedLength} > ${entityConfig.maxExpandedLength}`
663-
);
664-
}
665-
}
666-
}
667-
}
668-
if (val.indexOf('&') === -1) return val;
669-
// Replace standard entities
670-
for (const entityName of this.lastEntitiesKeys) {
671-
const entity = this.lastEntities[entityName];
672-
const matches = val.match(entity.regex);
673-
if (matches) {
674-
this.entityExpansionCount += matches.length;
675-
if (entityConfig.maxTotalExpansions &&
676-
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
677-
throw new Error(
678-
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
679-
);
680-
}
681-
}
682-
val = val.replace(entity.regex, entity.val);
683-
}
684-
if (val.indexOf('&') === -1) return val;
685-
686-
// Replace HTML entities if enabled
687-
for (const entityName of this.htmlEntitiesKeys) {
688-
const entity = this.htmlEntities[entityName];
689-
const matches = val.match(entity.regex);
690-
if (matches) {
691-
//console.log(matches);
692-
this.entityExpansionCount += matches.length;
693-
if (entityConfig.maxTotalExpansions &&
694-
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
695-
throw new Error(
696-
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
697-
);
698-
}
699-
}
700-
val = val.replace(entity.regex, entity.val);
701-
}
702-
703-
// Replace ampersand entity last
704-
val = val.replace(this.ampEntity.regex, this.ampEntity.val);
705-
706-
return val;
605+
return this.entityReplacer.replace(val);
707606
}
708607

709608

src/xmlparser/XMLParser.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ export default class XMLParser {
3232
}
3333
}
3434
const orderedObjParser = new OrderedObjParser(this.options);
35-
orderedObjParser.addExternalEntities(this.externalEntities);
35+
orderedObjParser.entityReplacer.setExternalEntities(this.externalEntities);
3636
const orderedResult = orderedObjParser.parseXml(xmlData);
3737
if (this.options.preserveOrder || orderedResult === undefined) return orderedResult;
3838
else return prettify(orderedResult, this.options, orderedObjParser.matcher, orderedObjParser.readonlyMatcher);

yarn.lock

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,11 @@
10681068
"@jridgewell/resolve-uri" "^3.1.0"
10691069
"@jridgewell/sourcemap-codec" "^1.4.14"
10701070

1071+
"@nodable/entities@^1.0.1":
1072+
version "1.0.1"
1073+
resolved "https://registry.npmjs.org/@nodable/entities/-/entities-1.0.1.tgz"
1074+
integrity sha512-P+QVl83POYu47navqBcOSbjJzYlGQNXwzZXDAt3im6Kzs8tn3twmDKHuAEjB9zpQCF45Hc1xY4ND0lQAV12NXA==
1075+
10711076
"@nodelib/fs.scandir@2.1.5":
10721077
version "2.1.5"
10731078
resolved "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz"

0 commit comments

Comments
 (0)