@@ -8,6 +8,7 @@ import toNumber from "strnum";
88import getIgnoreAttributesFn from "../ignoreAttributes.js" ;
99import { Expression , Matcher } from 'path-expression-matcher' ;
1010import { ExpressionSet } from 'path-expression-matcher' ;
11+ import EntityReplacer , { COMMON_HTML , NUMERIC_ENTITIES } from '@nodable/entities' ;
1112
1213// const regx =
1314// '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
@@ -72,32 +73,6 @@ export default class OrderedObjParser {
7273 this . options = options ;
7374 this . currentNode = null ;
7475 this . tagsNodeStack = [ ] ;
75- this . docTypeEntities = { } ;
76- this . lastEntities = {
77- "apos" : { regex : / & ( a p o s | # 3 9 | # x 2 7 ) ; / g, val : "'" } ,
78- "gt" : { regex : / & ( g t | # 6 2 | # x 3 E ) ; / g, val : ">" } ,
79- "lt" : { regex : / & ( l t | # 6 0 | # x 3 C ) ; / g, val : "<" } ,
80- "quot" : { regex : / & ( q u o t | # 3 4 | # x 2 2 ) ; / g, val : "\"" } ,
81- } ;
82- this . ampEntity = { regex : / & ( a m p | # 3 8 | # x 2 6 ) ; / g, val : "&" } ;
83- this . htmlEntities = {
84- "space" : { regex : / & ( n b s p | # 1 6 0 ) ; / g, val : " " } ,
85- // "lt" : { regex: /&(lt|#60);/g, val: "<" },
86- // "gt" : { regex: /&(gt|#62);/g, val: ">" },
87- // "amp" : { regex: /&(amp|#38);/g, val: "&" },
88- // "quot" : { regex: /&(quot|#34);/g, val: "\"" },
89- // "apos" : { regex: /&(apos|#39);/g, val: "'" },
90- "cent" : { regex : / & ( c e n t | # 1 6 2 ) ; / g, val : "¢" } ,
91- "pound" : { regex : / & ( p o u n d | # 1 6 3 ) ; / g, val : "£" } ,
92- "yen" : { regex : / & ( y e n | # 1 6 5 ) ; / g, val : "¥" } ,
93- "euro" : { regex : / & ( e u r o | # 8 3 6 4 ) ; / g, val : "€" } ,
94- "copyright" : { regex : / & ( c o p y | # 1 6 9 ) ; / g, val : "©" } ,
95- "reg" : { regex : / & ( r e g | # 1 7 4 ) ; / g, val : "®" } ,
96- "inr" : { regex : / & ( i n r | # 8 3 7 7 ) ; / g, val : "₹" } ,
97- "num_dec" : { regex : / & # ( [ 0 - 9 ] { 1 , 7 } ) ; / g, val : ( _ , str ) => fromCodePoint ( str , 10 , "&#" ) } ,
98- "num_hex" : { regex : / & # x ( [ 0 - 9 a - f A - F ] { 1 , 6 } ) ; / g, val : ( _ , str ) => fromCodePoint ( str , 16 , "&#x" ) } ,
99- } ;
100- this . addExternalEntities = addExternalEntities ;
10176 this . parseXml = parseXml ;
10277 this . parseTextData = parseTextData ;
10378 this . resolveNameSpace = resolveNameSpace ;
@@ -111,6 +86,16 @@ export default class OrderedObjParser {
11186 this . entityExpansionCount = 0 ;
11287 this . currentExpandedLength = 0 ;
11388
89+ this . entityReplacer = new EntityReplacer ( {
90+ default : true ,
91+ // amp: true,
92+ system : this . options . htmlEntities ? { ...COMMON_HTML , ...NUMERIC_ENTITIES } : { } ,
93+ maxTotalExpansions : this . options . processEntities . maxTotalExpansions ,
94+ maxExpandedLength : this . options . processEntities . maxExpandedLength ,
95+ applyLimitsTo : "all" ,
96+ //postCheck: resolved => resolved
97+ } ) ;
98+
11499 // Initialize path matcher for path-expression-matcher
115100 this . matcher = new Matcher ( ) ;
116101
@@ -141,17 +126,6 @@ export default class OrderedObjParser {
141126
142127}
143128
144- function addExternalEntities ( externalEntities ) {
145- const entKeys = Object . keys ( externalEntities ) ;
146- for ( let i = 0 ; i < entKeys . length ; i ++ ) {
147- const ent = entKeys [ i ] ;
148- const escaped = ent . replace ( / [ . \- + * : ] / g, '\\.' ) ;
149- this . lastEntities [ ent ] = {
150- regex : new RegExp ( "&" + escaped + ";" , "g" ) ,
151- val : externalEntities [ ent ]
152- }
153- }
154- }
155129
156130/**
157131 * @param {string } val
@@ -308,9 +282,6 @@ const parseXml = function (xmlData) {
308282 // Reset entity expansion counters for this document
309283 this . entityExpansionCount = 0 ;
310284 this . currentExpandedLength = 0 ;
311- this . docTypeEntitiesKeys = [ ] ;
312- this . lastEntitiesKeys = Object . keys ( this . lastEntities ) ;
313- this . htmlEntitiesKeys = this . options . htmlEntities ? Object . keys ( this . htmlEntities ) : [ ] ;
314285 const options = this . options ;
315286 const docTypeReader = new DocTypeReader ( options . processEntities ) ;
316287 const xmlLen = xmlData . length ;
@@ -390,8 +361,7 @@ const parseXml = function (xmlData) {
390361 } else if ( c1 === 33
391362 && xmlData . charCodeAt ( i + 2 ) === 68 ) { //'!D'
392363 const result = docTypeReader . readDocType ( xmlData , i ) ;
393- this . docTypeEntities = result . entities ;
394- this . docTypeEntitiesKeys = Object . keys ( this . docTypeEntities ) || [ ]
364+ this . entityReplacer . addInputEntities ( result . entities ) ;
395365 i = result . i ;
396366 } else if ( c1 === 33
397367 && xmlData . charCodeAt ( i + 2 ) === 91 ) { // '!['
@@ -632,78 +602,7 @@ function replaceEntitiesValue(val, tagName, jPath) {
632602 }
633603 }
634604
635- // Replace DOCTYPE entities
636- for ( const entityName of this . docTypeEntitiesKeys ) {
637- const entity = this . docTypeEntities [ entityName ] ;
638- const matches = val . match ( entity . regx ) ;
639-
640- if ( matches ) {
641- // Track expansions
642- this . entityExpansionCount += matches . length ;
643-
644- // Check expansion limit
645- if ( entityConfig . maxTotalExpansions &&
646- this . entityExpansionCount > entityConfig . maxTotalExpansions ) {
647- throw new Error (
648- `Entity expansion limit exceeded: ${ this . entityExpansionCount } > ${ entityConfig . maxTotalExpansions } `
649- ) ;
650- }
651-
652- // Store length before replacement
653- const lengthBefore = val . length ;
654- val = val . replace ( entity . regx , entity . val ) ;
655-
656- // Check expanded length immediately after replacement
657- if ( entityConfig . maxExpandedLength ) {
658- this . currentExpandedLength += ( val . length - lengthBefore ) ;
659-
660- if ( this . currentExpandedLength > entityConfig . maxExpandedLength ) {
661- throw new Error (
662- `Total expanded content size exceeded: ${ this . currentExpandedLength } > ${ entityConfig . maxExpandedLength } `
663- ) ;
664- }
665- }
666- }
667- }
668- if ( val . indexOf ( '&' ) === - 1 ) return val ;
669- // Replace standard entities
670- for ( const entityName of this . lastEntitiesKeys ) {
671- const entity = this . lastEntities [ entityName ] ;
672- const matches = val . match ( entity . regex ) ;
673- if ( matches ) {
674- this . entityExpansionCount += matches . length ;
675- if ( entityConfig . maxTotalExpansions &&
676- this . entityExpansionCount > entityConfig . maxTotalExpansions ) {
677- throw new Error (
678- `Entity expansion limit exceeded: ${ this . entityExpansionCount } > ${ entityConfig . maxTotalExpansions } `
679- ) ;
680- }
681- }
682- val = val . replace ( entity . regex , entity . val ) ;
683- }
684- if ( val . indexOf ( '&' ) === - 1 ) return val ;
685-
686- // Replace HTML entities if enabled
687- for ( const entityName of this . htmlEntitiesKeys ) {
688- const entity = this . htmlEntities [ entityName ] ;
689- const matches = val . match ( entity . regex ) ;
690- if ( matches ) {
691- //console.log(matches);
692- this . entityExpansionCount += matches . length ;
693- if ( entityConfig . maxTotalExpansions &&
694- this . entityExpansionCount > entityConfig . maxTotalExpansions ) {
695- throw new Error (
696- `Entity expansion limit exceeded: ${ this . entityExpansionCount } > ${ entityConfig . maxTotalExpansions } `
697- ) ;
698- }
699- }
700- val = val . replace ( entity . regex , entity . val ) ;
701- }
702-
703- // Replace ampersand entity last
704- val = val . replace ( this . ampEntity . regex , this . ampEntity . val ) ;
705-
706- return val ;
605+ return this . entityReplacer . replace ( val ) ;
707606}
708607
709608
0 commit comments