@@ -22,6 +22,8 @@ import { XMLDocument } from "./XMLDocument";
2222import { XMLDocumentType } from "./XMLDocumentType" ;
2323import { XMLElement } from "./XMLElement" ;
2424import { XMLUtils } from "./XMLUtils" ;
25+ import { AttDecl } from "./dtd/AttDecl" ;
26+ import { AttListDecl } from "./dtd/AttListDecl" ;
2527import { DTDParser } from "./dtd/DTDParser" ;
2628import { Grammar } from "./grammar/Grammar" ;
2729import { GrammarHandler } from "./grammar/GrammarHandler" ;
@@ -41,13 +43,15 @@ export class DOMBuilder implements ContentHandler {
4143 private includeDefaultAttributes : boolean = true ;
4244 private declaredIds : Set < string > = new Set ( ) ;
4345 private pendingIdrefs : string [ ] = [ ] ;
46+ private defaultAttributeLexicalCache : Map < string , Map < string , string > > = new Map ( ) ;
4447
4548 initialize ( ) : void {
4649 this . document = new XMLDocument ( ) ;
4750 this . stack = new Array ( ) ;
4851 this . inCdData = false ;
4952 this . declaredIds . clear ( ) ;
5053 this . pendingIdrefs = [ ] ;
54+ this . defaultAttributeLexicalCache . clear ( ) ;
5155 // Create initial GrammarHandler for this ContentHandler
5256 this . grammarHandler = new GrammarHandler ( ) ;
5357 }
@@ -87,17 +91,67 @@ export class DOMBuilder implements ContentHandler {
8791 const existingAttNames = new Set < string > ( ) ;
8892 const attributes = element . getAttributes ( ) ;
8993 if ( attributes ) {
90- attributes . forEach ( att => existingAttNames . add ( att . getName ( ) ) ) ;
94+ attributes . forEach ( ( att : XMLAttribute ) => existingAttNames . add ( att . getName ( ) ) ) ;
9195 }
9296
93- defaultAttrs . forEach ( ( defaultValue , attName ) => {
94- if ( ! existingAttNames . has ( attName ) ) {
95- const defaultAttr = new XMLAttribute ( attName , defaultValue ) ;
96- element . setAttribute ( defaultAttr ) ;
97+ defaultAttrs . forEach ( ( defaultValue : string , attName : string ) => {
98+ if ( existingAttNames . has ( attName ) ) {
99+ const existingAttr : XMLAttribute | undefined = element . getAttribute ( attName ) ;
100+ if ( existingAttr && ! existingAttr . isSpecified ( ) && existingAttr . getLexicalValue ( ) === undefined ) {
101+ const lexicalValueExisting : string = this . getDefaultAttributeLexical ( elementName , attName ) ?? defaultValue ;
102+ existingAttr . setLexicalValue ( lexicalValueExisting ) ;
103+ }
104+ return ;
97105 }
106+ const lexicalValue : string = this . getDefaultAttributeLexical ( elementName , attName ) ?? defaultValue ;
107+ const defaultAttr : XMLAttribute = new XMLAttribute ( attName , defaultValue , false , lexicalValue ) ;
108+ element . setAttribute ( defaultAttr ) ;
98109 } ) ;
99110 }
100111
112+ private getDefaultAttributeLexical ( elementName : string , attributeName : string ) : string | undefined {
113+ // Check cache first
114+ const cachedForElement : Map < string , string > | undefined = this . defaultAttributeLexicalCache . get ( elementName ) ;
115+ if ( cachedForElement && cachedForElement . has ( attributeName ) ) {
116+ return cachedForElement . get ( attributeName ) ;
117+ }
118+
119+ const docType : XMLDocumentType | undefined = this . document ?. getDocumentType ( ) ;
120+ const internalSubset : string | undefined = docType ?. getInternalSubset ( ) ;
121+ if ( ! internalSubset ) {
122+ return undefined ;
123+ }
124+
125+ const attlistPattern : RegExp = new RegExp ( String . raw `<!ATTLIST\s+${ this . escapeRegExp ( elementName ) } \b([\s\S]*?)>` , 'g' ) ;
126+ let lexicalMapForElement : Map < string , string > | undefined = cachedForElement ?? new Map < string , string > ( ) ;
127+ let match : RegExpExecArray | null ;
128+
129+ while ( ( match = attlistPattern . exec ( internalSubset ) ) !== null ) {
130+ const attributesText : string = match [ 1 ] ;
131+ try {
132+ const attList : AttListDecl = new AttListDecl ( elementName , attributesText . trim ( ) ) ;
133+ attList . getAttributes ( ) . forEach ( ( attDecl : AttDecl , name : string ) => {
134+ const defaultValue : string = attDecl . getDefaultValue ( ) ;
135+ if ( defaultValue ) {
136+ lexicalMapForElement ?. set ( name , defaultValue ) ;
137+ }
138+ } ) ;
139+ } catch ( error ) {
140+ // Ignore parsing issues for malformed attlist fragments in internal subset
141+ }
142+ }
143+
144+ if ( ! this . defaultAttributeLexicalCache . has ( elementName ) && lexicalMapForElement . size > 0 ) {
145+ this . defaultAttributeLexicalCache . set ( elementName , lexicalMapForElement ) ;
146+ }
147+
148+ return lexicalMapForElement . get ( attributeName ) ;
149+ }
150+
151+ private escapeRegExp ( value : string ) : string {
152+ return value . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, '\\$&' ) ;
153+ }
154+
101155 private trackIdAttributes ( elementName : string , element : XMLElement ) : void {
102156 if ( ! this . validating || ! this . grammar ) {
103157 return ;
@@ -164,6 +218,9 @@ export class DOMBuilder implements ContentHandler {
164218
165219 endDocument ( ) : void {
166220 this . validateIdReferences ( ) ;
221+ if ( this . document && this . grammarHandler ) {
222+ this . document . setGrammar ( this . grammarHandler . getGrammar ( ) ) ;
223+ }
167224 }
168225
169226 xmlDeclaration ( version : string , encoding : string , standalone : string ) : void {
@@ -173,8 +230,14 @@ export class DOMBuilder implements ContentHandler {
173230
174231 startElement ( name : string , atts : XMLAttribute [ ] ) : void {
175232 let element : XMLElement = new XMLElement ( name ) ;
176- atts . forEach ( ( att ) => {
233+ atts . forEach ( ( att : XMLAttribute ) => {
177234 element . setAttribute ( att ) ;
235+ if ( ! att . isSpecified ( ) && att . getLexicalValue ( ) === undefined ) {
236+ const lexicalDefault : string | undefined = this . getDefaultAttributeLexical ( name , att . getName ( ) ) ;
237+ if ( lexicalDefault !== undefined ) {
238+ att . setLexicalValue ( lexicalDefault ) ;
239+ }
240+ }
178241 } ) ;
179242
180243 // Add default attributes when includeDefaultAttributes flag is set
0 commit comments