Skip to content

Commit 65c02d0

Browse files
committed
Improved validation
1 parent 9628331 commit 65c02d0

7 files changed

Lines changed: 111 additions & 21 deletions

File tree

ts/SAXParser.ts

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,8 @@ export class SAXParser {
421421
let char: string = String.fromCodePoint(code);
422422
this.contentHandler!.characters(char);
423423
} else {
424-
// Look up entity in DTD using grammar interface
425-
const entityValue: string | undefined = grammar.resolveEntity(name);
424+
// Look up entity in grammar, loading external content if necessary
425+
const entityValue: string | undefined = this.grammarHandler.resolveEntityValue(name);
426426
if (entityValue !== undefined) {
427427
if (entityValue.length > 0) {
428428
if (entityValue.length === 1) {
@@ -1410,7 +1410,7 @@ export class SAXParser {
14101410
&& attributeInfo.use !== AttributeUse.REQUIRED) {
14111411
const targetName: string = this.buildAttributeNameForInfo(attributeInfo);
14121412
if (!result.has(targetName)) {
1413-
const defaultValue: string = this.normalizeAttributeByType(attributeInfo.defaultValue, attributeInfo.datatype);
1413+
const defaultValue: string = this.processDefaultAttributeValue(attributeInfo);
14141414
result.set(targetName, defaultValue);
14151415
metadata.set(targetName, {
14161416
specified: false
@@ -1435,6 +1435,24 @@ export class SAXParser {
14351435
}
14361436
}
14371437

1438+
private processDefaultAttributeValue(attributeInfo: AttributeInfo): string {
1439+
const rawDefault: string = attributeInfo.defaultValue ?? '';
1440+
1441+
let normalizedLiteral: string = rawDefault;
1442+
if (rawDefault.indexOf('\r') !== -1 || rawDefault.indexOf('\n') !== -1) {
1443+
normalizedLiteral = this.normalizeLiteralAttributeLineBreaks(rawDefault, rawDefault);
1444+
}
1445+
1446+
if (rawDefault.includes('&')) {
1447+
this.validateAttributeValueWellFormedness(rawDefault);
1448+
}
1449+
1450+
let expandedValue: string = this.expandEntities(normalizedLiteral);
1451+
this.validateAttributeCharacterSet(expandedValue);
1452+
1453+
return this.normalizeAttributeByType(expandedValue, attributeInfo.datatype);
1454+
}
1455+
14381456
private shouldPreserveLexicalWhitespace(lexicalValue: string): boolean {
14391457
const pattern: RegExp = /&#(x?[0-9A-Fa-f]+);/g;
14401458
let match: RegExpExecArray | null;
@@ -1772,7 +1790,7 @@ export class SAXParser {
17721790
}
17731791

17741792
// Look up custom entity using Grammar interface
1775-
const entityValue: string | undefined = this.grammarHandler.getGrammar().resolveEntity(entityName);
1793+
const entityValue: string | undefined = this.grammarHandler.resolveEntityValue(entityName);
17761794
if (entityValue !== undefined) {
17771795
if (entityValue !== '') {
17781796
// Mark this entity as visited for recursion detection

ts/dtd/ContentModel.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ export class ContentModel {
7171
}
7272

7373
parseSpec(modelString: string): ContentModel {
74-
let contentString: string = modelString.replaceAll("\\s+", "");
74+
// Normalize whitespace so multi-line mixed content declarations parse correctly
75+
let contentString: string = modelString.replace(/\s+/g, "");
7576
try {
7677
this.validateParentheses(contentString);
7778
} catch (e: unknown) {

ts/dtd/DTDParser.ts

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -360,14 +360,22 @@ export class DTDParser {
360360

361361
resolveEntities(fragment: string): string {
362362
while (XMLUtils.hasParameterEntity(fragment)) {
363-
let start = fragment.indexOf('%');
364-
let end = fragment.indexOf(';');
365-
let entityName = fragment.substring(start + '%'.length, end);
363+
let start: number = fragment.indexOf('%');
364+
if (start === -1) {
365+
break;
366+
}
367+
let end: number = fragment.indexOf(';', start);
368+
if (end === -1) {
369+
throw new Error('Malformed parameter entity reference while resolving "' + fragment + '"');
370+
}
371+
let entityName: string = fragment.substring(start + '%'.length, end).trim();
366372
let entity: EntityDecl | undefined = this.grammar.getParameterEntity(entityName);
367373
if (entity === undefined) {
368-
throw new Error('Unknown entity: ' + entityName + ' in resolveEntities');
374+
let context: string = fragment.substring(start, Math.min(fragment.length, start + 80));
375+
throw new Error('Unknown entity: ' + entityName + ' in resolveEntities while processing "' + context + '"');
369376
}
370-
fragment = fragment.replace('%' + entityName + ';', entity.getValue());
377+
let replacement: string = entity.getValue();
378+
fragment = fragment.substring(0, start) + replacement + fragment.substring(end + ';'.length);
371379
}
372380
return fragment;
373381
}

ts/dtd/ElementDecl.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ export class ElementDecl implements XMLNode {
3232
return;
3333
}
3434
// Build and validate the content model using the complete parser
35-
let simplified: string = this.contentSpec.replace('\n', ' ');
35+
let simplified: string = this.contentSpec.replace(/\r?\n/g, ' ');
3636
simplified = simplified.replace(/\s+/g, '').trim();
3737
const parser: DTDContentModelParser = new DTDContentModelParser(simplified);
3838
const model: DTDContentModel = parser.parse();

ts/dtd/EntityDecl.ts

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ export class EntityDecl implements XMLNode {
1717

1818
private name: string;
1919
private parameterEntity: boolean;
20-
private value: string;
20+
private value: string;
2121
private systemId: string;
2222
private publicId: string;
2323
private ndata: string;
@@ -64,6 +64,14 @@ export class EntityDecl implements XMLNode {
6464
return this.publicId;
6565
}
6666

67+
getNotationName(): string {
68+
return this.ndata;
69+
}
70+
71+
isExternal(): boolean {
72+
return this.systemId !== '' || this.publicId !== '';
73+
}
74+
6775
getNodeType(): number {
6876
return Constants.ENTITY_DECL_NODE;
6977
}
@@ -82,12 +90,12 @@ export class EntityDecl implements XMLNode {
8290

8391
equals(node: XMLNode): boolean {
8492
if (node instanceof EntityDecl) {
85-
return this.name === node.name &&
86-
this.parameterEntity === node.parameterEntity &&
87-
this.value === node.value &&
88-
this.systemId === node.systemId &&
89-
this.publicId === node.publicId &&
90-
this.ndata === node.ndata;
93+
return this.name === node.name &&
94+
this.parameterEntity === node.parameterEntity &&
95+
this.value === node.value &&
96+
this.systemId === node.systemId &&
97+
this.publicId === node.publicId &&
98+
this.ndata === node.ndata;
9199
}
92100
return false;
93101
}

ts/grammar/DTDComposite.ts

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ import { AttributeInfo, Grammar, GrammarType, ValidationContext, ValidationResul
1818

1919

2020
export class DTDComposite implements Grammar {
21-
21+
2222
private static instance: DTDComposite | undefined;
2323
private validating: boolean = false;
2424
private internalDTD: DTDGrammar | undefined;
2525
private externalDTDs: DTDGrammar[] = [];
2626
private sharedParameterEntities: Map<string, EntityDecl> = new Map();
2727
private includeDefaultAttributes: boolean = true;
28-
28+
2929
private constructor() {
3030
// Initialize with predefined entities like DTDGrammar does
3131
this.addPredefinedEntities();
@@ -285,6 +285,24 @@ export class DTDComposite implements Grammar {
285285
return false;
286286
}
287287

288+
getEntityDeclaration(entityName: string): EntityDecl | undefined {
289+
if (this.internalDTD) {
290+
const entity = this.internalDTD.getEntity(entityName);
291+
if (entity) {
292+
return entity;
293+
}
294+
}
295+
296+
for (const externalDTD of this.externalDTDs) {
297+
const entity = externalDTD.getEntity(entityName);
298+
if (entity) {
299+
return entity;
300+
}
301+
}
302+
303+
return undefined;
304+
}
305+
288306
private notationExists(notationName: string): boolean {
289307
// Check internal DTD first
290308
if (this.internalDTD) {
@@ -382,7 +400,7 @@ export class DTDComposite implements Grammar {
382400

383401
return undefined;
384402
}
385-
403+
386404
consumeEntityReference(expandedText: string): string | undefined {
387405
if (this.internalDTD) {
388406
const ref = this.internalDTD.consumeEntityReference(expandedText);

ts/grammar/GrammarHandler.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import { fileURLToPath } from 'url';
1616
import { Catalog } from '../Catalog';
1717
import { DTDGrammar } from '../dtd/DTDGrammar';
1818
import { DTDParser } from '../dtd/DTDParser';
19+
import { EntityDecl } from '../dtd/EntityDecl';
1920
import { XMLSchemaParser } from '../schema/XMLSchemaParser';
2021
import { CompositeGrammar } from './CompositeGrammar';
2122
import { DTDComposite } from './DTDComposite';
@@ -86,6 +87,14 @@ export class GrammarHandler {
8687
return this.compositeGrammar;
8788
}
8889

90+
resolveEntityValue(entityName: string): string | undefined {
91+
const grammar: Grammar = this.getGrammar();
92+
if (grammar instanceof DTDComposite) {
93+
return this.resolveEntityValueFromDTD(grammar, entityName);
94+
}
95+
return grammar.resolveEntity(entityName);
96+
}
97+
8998
getLoadedGrammars(): Array<{ namespace: string, type: string, elementCount?: number, typeCount?: number }> {
9099
const grammars = this.compositeGrammar.getLoadedGrammarList();
91100
if (this.dtdComposite) {
@@ -99,6 +108,34 @@ export class GrammarHandler {
99108
return grammars;
100109
}
101110

111+
private resolveEntityValueFromDTD(dtdComposite: DTDComposite, entityName: string): string | undefined {
112+
const entityDecl: EntityDecl | undefined = dtdComposite.getEntityDeclaration(entityName);
113+
if (!entityDecl) {
114+
return undefined;
115+
}
116+
117+
if (entityDecl.getNotationName() !== '') {
118+
throw new Error(`Unparsed entity '${entityName}' cannot be referenced in parsed content`);
119+
}
120+
121+
if (entityDecl.isExternal() && !entityDecl.isExternalContentLoaded()) {
122+
const content: string = this.loadExternalEntityContent(entityDecl);
123+
entityDecl.setValue(content);
124+
}
125+
126+
return entityDecl.getValue();
127+
}
128+
129+
private loadExternalEntityContent(entityDecl: EntityDecl): string {
130+
const baseDir: string = this.currentFile ? dirname(this.currentFile) : process.cwd();
131+
const parser: DTDParser = new DTDParser(undefined, baseDir);
132+
parser.setValidating(this.validating);
133+
if (this.catalog) {
134+
parser.setCatalog(this.catalog);
135+
}
136+
return parser.loadExternalEntity(entityDecl.getPublicId(), entityDecl.getSystemId(), true);
137+
}
138+
102139
hasGrammar(namespaceURI: string | undefined): boolean {
103140
// Check DTD grammar first (DTD has no namespace, so namespaceURI would be undefined)
104141
if (namespaceURI === undefined && this.dtdComposite) {

0 commit comments

Comments
 (0)