Skip to content

Commit c1d0483

Browse files
committed
Improved XML Schema support
1 parent 65c02d0 commit c1d0483

9 files changed

Lines changed: 1315 additions & 347 deletions

ts/SAXParser.ts

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,22 @@ export class SAXParser {
8080
this.resetNamespaceContext();
8181
}
8282

83+
private resetParserState(): void {
84+
this.pointer = 0;
85+
this.elementStack = 0;
86+
this.elementNameStack = [];
87+
this.xmlSpaceStack = [];
88+
this.characterRun = '';
89+
this.rootParsed = false;
90+
this.inCDATA = false;
91+
this.inComment = false;
92+
this.inDoctype = false;
93+
this.inProcessingInstruction = false;
94+
this.childrenNames = [];
95+
this.lastParsedAttributeLexical.clear();
96+
this.characterRunPreservedCR.clear();
97+
}
98+
8399
setContentHandler(contentHandler: ContentHandler): void {
84100
this.contentHandler = contentHandler;
85101
this.contentHandler.setValidating(this.validating);
@@ -150,6 +166,15 @@ export class SAXParser {
150166
this.reader = new FileReader(resolvedPath, encoding);
151167
try {
152168
this.buffer = this.reader.read();
169+
170+
this.resetParserState();
171+
172+
// Reset grammar state so each document starts with a clean composite
173+
if (typeof this.grammarHandler.initialize === 'function') {
174+
this.grammarHandler.initialize();
175+
this.grammarHandler.setIncludeDefaultAttributes(this.includeDefaultAttributes);
176+
}
177+
153178
this.contentHandler.initialize();
154179
this.readDocument();
155180
} catch (error) {
@@ -314,6 +339,7 @@ export class SAXParser {
314339
!this.inComment && !this.inDoctype && !this.inProcessingInstruction) {
315340
throw new Error('Entity reference not allowed in this context');
316341
}
342+
const textForValidation: string = this.characterRun ? this.normalizeCharacterRun(this.characterRun) : '';
317343
this.cleanCharacterRun();
318344
this.pointer++; // skip '&'
319345
let name: string = '';
@@ -588,6 +614,7 @@ export class SAXParser {
588614
}
589615

590616
endElement() {
617+
const textForValidation: string = this.characterRun ? this.normalizeCharacterRun(this.characterRun) : '';
591618
this.cleanCharacterRun();
592619
this.pointer += 2; // skip '</'
593620
let name: string = '';
@@ -624,7 +651,7 @@ export class SAXParser {
624651
const elementValidationResult = this.grammarHandler.getGrammar().validateElement(name, {
625652
attributes: new Map(),
626653
childrenNames: actualChildrenNames, // Pass the real child element names
627-
textContent: this.characterRun,
654+
textContent: textForValidation,
628655
attributeOnly: false
629656
});
630657

ts/grammar/CompositeGrammar.ts

Lines changed: 324 additions & 136 deletions
Large diffs are not rendered by default.

ts/grammar/GrammarHandler.ts

Lines changed: 141 additions & 73 deletions
Large diffs are not rendered by default.

ts/schema/Element.ts

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
*******************************************************************************/
1212

1313
import { } from "../grammar/Grammar";
14+
import { XMLUtils } from "../XMLUtils";
1415
import { SchemaType } from "./SchemaType";
1516
import { XMLSchemaGrammar } from "./XMLSchemaGrammar";
16-
import { XMLUtils } from "../XMLUtils";
1717

1818
export class SchemaElementDecl {
1919
private name: string;
@@ -27,6 +27,7 @@ export class SchemaElementDecl {
2727
private defaultValue?: string;
2828
private fixedValue?: string;
2929
private form: 'qualified' | 'unqualified' = 'unqualified';
30+
private namespaceURI?: string;
3031

3132
constructor(name: string) {
3233
this.validateName(name);
@@ -133,32 +134,40 @@ export class SchemaElementDecl {
133134
return this.fixedValue || this.defaultValue;
134135
}
135136

137+
getNamespaceURI(): string | undefined {
138+
return this.namespaceURI;
139+
}
140+
141+
setNamespaceURI(namespaceURI: string | undefined): void {
142+
this.namespaceURI = namespaceURI;
143+
}
144+
136145
// Resolve type from grammar if type name is set but type object is not
137146
resolveType(grammar: XMLSchemaGrammar): SchemaType | undefined {
138147
if (this.type) {
139148
return this.type;
140149
}
141-
150+
142151
if (this.typeName) {
143152
return grammar.getTypeDefinition(this.typeName);
144153
}
145-
154+
146155
return undefined;
147156
}
148157

149158
// Schema validation methods
150-
159+
151160
private validateName(name: string): void {
152161
if (!name) {
153162
throw new Error('Element name cannot be empty');
154163
}
155-
164+
156165
// XML Schema spec: Element names must be valid NCNames (no spaces, no colons except for qualified names)
157166
// Check for invalid characters first (like spaces)
158167
if (name.includes(' ') || name.includes('\t') || name.includes('\n') || name.includes('\r')) {
159168
throw new Error(`Element name '${name}' contains invalid whitespace characters - XML Schema element names must be valid NCNames`);
160169
}
161-
170+
162171
// Skip validation for Clark notation (expanded QNames like {namespace}localName)
163172
if (name.startsWith('{')) {
164173
const closeBrace = name.indexOf('}');
@@ -171,14 +180,14 @@ export class SchemaElementDecl {
171180
return;
172181
}
173182
}
174-
183+
175184
// Handle qualified names (prefix:localName) vs NCNames
176185
const colonIndex = name.indexOf(':');
177186
if (colonIndex !== -1) {
178187
// Qualified name - validate both prefix and local name as NCNames
179188
const prefix = name.substring(0, colonIndex);
180189
const localName = name.substring(colonIndex + 1);
181-
190+
182191
if (!XMLUtils.isValidNCName(prefix)) {
183192
throw new Error(`Element name prefix '${prefix}' is not a valid NCName`);
184193
}
@@ -214,12 +223,12 @@ export class SchemaElementDecl {
214223
this.validateName(this.name);
215224
this.validateOccurrence(this.minOccurs, this.maxOccurs);
216225
this.validateValueConstraints(this.defaultValue, this.fixedValue);
217-
226+
218227
// Schema spec: abstract elements cannot have default/fixed values
219228
if (this.abstract && (this.defaultValue !== undefined || this.fixedValue !== undefined)) {
220229
throw new Error('Abstract elements cannot have default or fixed values');
221230
}
222-
231+
223232
// Schema spec: elements with type and inline type are mutually exclusive
224233
if (this.type !== undefined && this.typeName !== undefined) {
225234
throw new Error('Element cannot have both inline type and type reference');

0 commit comments

Comments
 (0)