Skip to content

Commit 4c9bd7b

Browse files
committed
Fixed canonical form
1 parent 8dbf3ff commit 4c9bd7b

11 files changed

Lines changed: 590 additions & 91 deletions

ts/DOMBuilder.ts

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import { XMLDocument } from "./XMLDocument";
2222
import { XMLDocumentType } from "./XMLDocumentType";
2323
import { XMLElement } from "./XMLElement";
2424
import { XMLUtils } from "./XMLUtils";
25+
import { AttDecl } from "./dtd/AttDecl";
26+
import { AttListDecl } from "./dtd/AttListDecl";
2527
import { DTDParser } from "./dtd/DTDParser";
2628
import { Grammar } from "./grammar/Grammar";
2729
import { GrammarHandler } from "./grammar/GrammarHandler";
@@ -41,13 +43,15 @@ export class DOMBuilder implements ContentHandler {
4143
private includeDefaultAttributes: boolean = true;
4244
private declaredIds: Set<string> = new Set();
4345
private pendingIdrefs: string[] = [];
46+
private defaultAttributeLexicalCache: Map<string, Map<string, string>> = new Map();
4447

4548
initialize(): void {
4649
this.document = new XMLDocument();
4750
this.stack = new Array();
4851
this.inCdData = false;
4952
this.declaredIds.clear();
5053
this.pendingIdrefs = [];
54+
this.defaultAttributeLexicalCache.clear();
5155
// Create initial GrammarHandler for this ContentHandler
5256
this.grammarHandler = new GrammarHandler();
5357
}
@@ -87,17 +91,67 @@ export class DOMBuilder implements ContentHandler {
8791
const existingAttNames = new Set<string>();
8892
const attributes = element.getAttributes();
8993
if (attributes) {
90-
attributes.forEach(att => existingAttNames.add(att.getName()));
94+
attributes.forEach((att: XMLAttribute) => existingAttNames.add(att.getName()));
9195
}
9296

93-
defaultAttrs.forEach((defaultValue, attName) => {
94-
if (!existingAttNames.has(attName)) {
95-
const defaultAttr = new XMLAttribute(attName, defaultValue);
96-
element.setAttribute(defaultAttr);
97+
defaultAttrs.forEach((defaultValue: string, attName: string) => {
98+
if (existingAttNames.has(attName)) {
99+
const existingAttr: XMLAttribute | undefined = element.getAttribute(attName);
100+
if (existingAttr && !existingAttr.isSpecified() && existingAttr.getLexicalValue() === undefined) {
101+
const lexicalValueExisting: string = this.getDefaultAttributeLexical(elementName, attName) ?? defaultValue;
102+
existingAttr.setLexicalValue(lexicalValueExisting);
103+
}
104+
return;
97105
}
106+
const lexicalValue: string = this.getDefaultAttributeLexical(elementName, attName) ?? defaultValue;
107+
const defaultAttr: XMLAttribute = new XMLAttribute(attName, defaultValue, false, lexicalValue);
108+
element.setAttribute(defaultAttr);
98109
});
99110
}
100111

112+
private getDefaultAttributeLexical(elementName: string, attributeName: string): string | undefined {
113+
// Check cache first
114+
const cachedForElement: Map<string, string> | undefined = this.defaultAttributeLexicalCache.get(elementName);
115+
if (cachedForElement && cachedForElement.has(attributeName)) {
116+
return cachedForElement.get(attributeName);
117+
}
118+
119+
const docType: XMLDocumentType | undefined = this.document?.getDocumentType();
120+
const internalSubset: string | undefined = docType?.getInternalSubset();
121+
if (!internalSubset) {
122+
return undefined;
123+
}
124+
125+
const attlistPattern: RegExp = new RegExp(String.raw`<!ATTLIST\s+${this.escapeRegExp(elementName)}\b([\s\S]*?)>`, 'g');
126+
let lexicalMapForElement: Map<string, string> | undefined = cachedForElement ?? new Map<string, string>();
127+
let match: RegExpExecArray | null;
128+
129+
while ((match = attlistPattern.exec(internalSubset)) !== null) {
130+
const attributesText: string = match[1];
131+
try {
132+
const attList: AttListDecl = new AttListDecl(elementName, attributesText.trim());
133+
attList.getAttributes().forEach((attDecl: AttDecl, name: string) => {
134+
const defaultValue: string = attDecl.getDefaultValue();
135+
if (defaultValue) {
136+
lexicalMapForElement?.set(name, defaultValue);
137+
}
138+
});
139+
} catch (error) {
140+
// Ignore parsing issues for malformed attlist fragments in internal subset
141+
}
142+
}
143+
144+
if (!this.defaultAttributeLexicalCache.has(elementName) && lexicalMapForElement.size > 0) {
145+
this.defaultAttributeLexicalCache.set(elementName, lexicalMapForElement);
146+
}
147+
148+
return lexicalMapForElement.get(attributeName);
149+
}
150+
151+
private escapeRegExp(value: string): string {
152+
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
153+
}
154+
101155
private trackIdAttributes(elementName: string, element: XMLElement): void {
102156
if (!this.validating || !this.grammar) {
103157
return;
@@ -164,6 +218,9 @@ export class DOMBuilder implements ContentHandler {
164218

165219
endDocument(): void {
166220
this.validateIdReferences();
221+
if (this.document && this.grammarHandler) {
222+
this.document.setGrammar(this.grammarHandler.getGrammar());
223+
}
167224
}
168225

169226
xmlDeclaration(version: string, encoding: string, standalone: string): void {
@@ -173,8 +230,14 @@ export class DOMBuilder implements ContentHandler {
173230

174231
startElement(name: string, atts: XMLAttribute[]): void {
175232
let element: XMLElement = new XMLElement(name);
176-
atts.forEach((att) => {
233+
atts.forEach((att: XMLAttribute) => {
177234
element.setAttribute(att);
235+
if (!att.isSpecified() && att.getLexicalValue() === undefined) {
236+
const lexicalDefault: string | undefined = this.getDefaultAttributeLexical(name, att.getName());
237+
if (lexicalDefault !== undefined) {
238+
att.setLexicalValue(lexicalDefault);
239+
}
240+
}
178241
});
179242

180243
// Add default attributes when includeDefaultAttributes flag is set

0 commit comments

Comments
 (0)