Skip to content

Commit a28b65f

Browse files
committed
Fixed attribute values retrieval
1 parent 94f82c5 commit a28b65f

12 files changed

Lines changed: 203 additions & 140 deletions

package.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717
"scripts": {
1818
"build": "tsc && cp ts/grammar/*.json dist/grammar/",
1919
"testDtd": "npm run build && node dist/tests/DTDTestSuite.js",
20-
"testSchema": "npm run build && node dist/tests/XMLSchemaTestSuite.js",
21-
"testRelaxNG": "npm run build && node dist/tests/RelaxNGTestRunner.js"
20+
"testSchema": "npm run build && node dist/tests/XMLSchemaTestSuite.js"
2221
},
2322
"author": {
2423
"name": "Rodolfo M. Raya",

ts/Constants.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ export class Constants {
3535
static readonly XMLNS_NS: string = 'http://www.w3.org/2000/xmlns/';
3636
static readonly XML_NS: string = 'http://www.w3.org/XML/1998/namespace';
3737
static readonly XSD_NS: string = 'http://www.w3.org/2001/XMLSchema-datatypes';
38-
38+
static readonly RELAXNG_NS_URI: string = 'http://relaxng.org/ns/structure/1.0';
3939
}

ts/SAXParser.ts

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ import { isAbsolute, join, resolve } from "node:path";
1515
import { tmpdir } from "os";
1616
import { fileURLToPath } from "url";
1717
import { Catalog } from "./Catalog";
18+
import { Constants } from "./Constants";
1819
import { ContentHandler } from "./ContentHandler";
1920
import { FileReader } from "./FileReader";
2021
import { XMLAttribute } from "./XMLAttribute";
2122
import { XMLUtils } from "./XMLUtils";
22-
import { AttributeInfo, Grammar } from "./grammar/Grammar";
23+
import { AttributeInfo, AttributeUse, Grammar } from "./grammar/Grammar";
2324
import { GrammarHandler } from "./grammar/GrammarHandler";
2425

2526
export class SAXParser {
@@ -794,7 +795,25 @@ export class SAXParser {
794795
}
795796

796797
if (target === 'xml-model') {
797-
// implement support for extracting default attributes from RelaxNG schemas
798+
// Extract default attributes from RelaxNG schemas
799+
let atts: Map<string, string> = this.parseAttributes(data);
800+
let href: string = '';
801+
let schemaType: string = '';
802+
for (let [key, value] of atts.entries()) {
803+
if (key === 'href') {
804+
href = value;
805+
}
806+
if (key === 'schematypens') {
807+
schemaType = value;
808+
}
809+
}
810+
if (href !== '' && Constants.RELAXNG_NS_URI === schemaType) {
811+
try {
812+
this.parseRelaxNG(href);
813+
} catch (e: Error | any) {
814+
// do nothing
815+
}
816+
}
798817
}
799818

800819
this.buffer = this.buffer.substring(this.pointer + 2); // skip '?>'
@@ -803,6 +822,10 @@ export class SAXParser {
803822
this.inProcessingInstruction = false;
804823
}
805824

825+
parseRelaxNG(href: string) {
826+
// TODO Silently ignored, not implemented yet
827+
}
828+
806829
parseDoctype() {
807830
this.cleanCharacterRun();
808831
this.inDoctype = true;
@@ -1192,7 +1215,9 @@ export class SAXParser {
11921215
const normalizedValue: string = this.normalizeAttributeByType(currentValue, attributeInfo.datatype);
11931216
result.set(matchingKey, normalizedValue);
11941217
}
1195-
} else if (this.includeDefaultAttributes && attributeInfo.defaultValue !== undefined) {
1218+
} else if (this.includeDefaultAttributes && attributeInfo.defaultValue !== undefined
1219+
&& attributeInfo.use !== AttributeUse.IMPLIED
1220+
&& attributeInfo.use !== AttributeUse.REQUIRED) {
11961221
const targetName = this.buildAttributeNameForInfo(attributeInfo);
11971222
if (!result.has(targetName)) {
11981223
const defaultValue: string = this.normalizeAttributeByType(attributeInfo.defaultValue, attributeInfo.datatype);

ts/dtd/AttListDecl.ts

Lines changed: 102 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -37,95 +37,53 @@ export class AttListDecl implements XMLNode {
3737
}
3838

3939
parseAttributes(text: string) {
40-
let parts: string[] = this.split(text);
41-
let index: number = 0;
42-
while (index < parts.length) {
43-
let name: string = parts[index++];
40+
const parts: string[] = this.split(text);
41+
const state = { index: 0 };
42+
43+
while (state.index < parts.length) {
44+
const name: string = parts[state.index++];
45+
if (!name) {
46+
continue;
47+
}
4448

45-
// Validate attribute name
4649
if (!XMLUtils.isValidXMLName(name)) {
4750
throw new Error(`Invalid attribute name in ATTLIST declaration: "${name}"`);
4851
}
4952

50-
let attType: string = parts[index++];
53+
if (state.index >= parts.length) {
54+
throw new Error(`Missing attribute type for attribute "${name}"`);
55+
}
56+
57+
let attType: string = this.readAttributeType(parts, state);
5158
let defaultDecl: string = '';
5259
let defaultValue: string = '';
5360

54-
if (AttListDecl.attTypes.includes(attType)) {
55-
// Standard attribute type
56-
if (index < parts.length) {
57-
let nextPart = parts[index++];
58-
if (nextPart === '#REQUIRED' || nextPart === '#IMPLIED') {
59-
defaultDecl = nextPart;
60-
} else if (nextPart === '#FIXED') {
61-
defaultDecl = nextPart;
62-
if (index < parts.length) {
63-
defaultValue = parts[index++];
64-
if (defaultValue.startsWith('"') && defaultValue.endsWith('"')) {
65-
defaultValue = defaultValue.substring(1, defaultValue.length - 1);
66-
}
67-
}
68-
} else if (nextPart && ((nextPart.startsWith('"') && nextPart.endsWith('"')) || (nextPart.startsWith("'") && nextPart.endsWith("'")))) {
69-
// Direct default value
70-
defaultDecl = nextPart;
71-
defaultValue = nextPart.substring(1, nextPart.length - 1); // Remove quotes
72-
} else {
73-
// Invalid: unquoted default value
74-
throw new Error(`Invalid attribute declaration: default value "${nextPart}" must be quoted`);
75-
}
76-
}
77-
} else {
78-
if (attType === 'NOTATION') {
79-
// Parse the notations in the enumeration that follows
80-
if (index < parts.length) {
81-
let notations = parts[index++]; // This should be like "(notation1|notation2|notation3)"
82-
attType = 'NOTATION ' + notations; // Store the full notation enumeration as the type
83-
if (index < parts.length) {
84-
let nextPart = parts[index++];
85-
if (nextPart === '#REQUIRED' || nextPart === '#IMPLIED') {
86-
defaultDecl = nextPart;
87-
} else if (nextPart === '#FIXED') {
88-
defaultDecl = nextPart;
89-
if (index < parts.length) {
90-
defaultValue = parts[index++];
91-
if (defaultValue.startsWith('"') && defaultValue.endsWith('"')) {
92-
defaultValue = defaultValue.substring(1, defaultValue.length - 1);
93-
}
94-
}
95-
} else if (nextPart && nextPart.startsWith('"') && nextPart.endsWith('"')) {
96-
// Direct default value
97-
defaultDecl = nextPart;
98-
defaultValue = nextPart.substring(1, nextPart.length - 1); // Remove quotes
99-
} else {
100-
defaultDecl = nextPart || '';
101-
}
102-
}
103-
}
104-
} else {
105-
// Handle other enumeration types (values in parentheses)
106-
if (index < parts.length) {
107-
let nextPart = parts[index++];
108-
if (nextPart === '#REQUIRED' || nextPart === '#IMPLIED') {
109-
defaultDecl = nextPart;
110-
} else if (nextPart === '#FIXED') {
111-
defaultDecl = nextPart;
112-
if (index < parts.length) {
113-
defaultValue = parts[index++];
114-
if (defaultValue.startsWith('"') && defaultValue.endsWith('"')) {
115-
defaultValue = defaultValue.substring(1, defaultValue.length - 1);
116-
}
117-
}
118-
} else if (nextPart && nextPart.startsWith('"') && nextPart.endsWith('"')) {
119-
// Direct default value
120-
defaultDecl = nextPart;
121-
defaultValue = nextPart.substring(1, nextPart.length - 1); // Remove quotes
61+
if (state.index < parts.length) {
62+
const nextPart = parts[state.index];
63+
if (nextPart === '#REQUIRED' || nextPart === '#IMPLIED') {
64+
defaultDecl = nextPart;
65+
state.index++;
66+
} else if (nextPart === '#FIXED') {
67+
defaultDecl = nextPart;
68+
state.index++;
69+
if (state.index < parts.length) {
70+
const valueToken = parts[state.index++];
71+
if (this.isQuotedValue(valueToken)) {
72+
defaultValue = this.trimQuotes(valueToken);
12273
} else {
123-
defaultDecl = nextPart || '';
74+
defaultValue = valueToken;
12475
}
12576
}
77+
} else if (nextPart && this.isQuotedValue(nextPart)) {
78+
defaultDecl = nextPart;
79+
defaultValue = this.trimQuotes(nextPart);
80+
state.index++;
81+
} else if (nextPart) {
82+
throw new Error(`Invalid attribute declaration: default value "${nextPart}" must be quoted`);
12683
}
12784
}
128-
let att: AttDecl = new AttDecl(name, attType, defaultDecl, defaultValue);
85+
86+
const att: AttDecl = new AttDecl(name, attType, defaultDecl, defaultValue);
12987
this.attributes.set(name, att);
13088
}
13189
}
@@ -134,19 +92,19 @@ export class AttListDecl implements XMLNode {
13492
let result: string[] = [];
13593
let word: string = '';
13694
let inQuotes: boolean = false;
95+
let quoteChar: string = '';
13796

13897
for (let i: number = 0; i < text.length; i++) {
13998
let c: string = text.charAt(i);
14099

141-
if (c === '"' && !inQuotes) {
142-
// Start of quoted string
100+
if ((c === '"' || c === "'") && !inQuotes) {
143101
inQuotes = true;
102+
quoteChar = c;
144103
word += c;
145-
} else if (c === '"' && inQuotes) {
146-
// End of quoted string
104+
} else if (inQuotes && c === quoteChar) {
147105
inQuotes = false;
106+
quoteChar = '';
148107
word += c;
149-
// Complete the quoted word
150108
if (word.length > 0) {
151109
result.push(word);
152110
word = '';
@@ -169,6 +127,69 @@ export class AttListDecl implements XMLNode {
169127
return result;
170128
}
171129

130+
private readAttributeType(parts: string[], state: { index: number }): string {
131+
let token: string = parts[state.index++];
132+
133+
if (token === 'NOTATION') {
134+
if (state.index >= parts.length) {
135+
throw new Error('Expected NOTATION enumeration in ATTLIST declaration');
136+
}
137+
let enumeration: string = parts[state.index++];
138+
enumeration = this.readParenthesized(enumeration, parts, state);
139+
return 'NOTATION ' + enumeration;
140+
}
141+
142+
if (token.includes('(')) {
143+
return this.readParenthesized(token, parts, state);
144+
}
145+
146+
return token;
147+
}
148+
149+
private readParenthesized(initial: string, parts: string[], state: { index: number }): string {
150+
let result = initial;
151+
let balance: number = this.countParenthesis(initial);
152+
153+
while (balance > 0) {
154+
if (state.index >= parts.length) {
155+
throw new Error('Unterminated parenthesized list in ATTLIST declaration');
156+
}
157+
const next: string = parts[state.index++];
158+
result += ' ' + next;
159+
balance += this.countParenthesis(next);
160+
}
161+
162+
return this.normalizeEnumeration(result);
163+
}
164+
165+
private countParenthesis(value: string): number {
166+
let balance: number = 0;
167+
for (const char of value) {
168+
if (char === '(') {
169+
balance++;
170+
} else if (char === ')') {
171+
balance--;
172+
}
173+
}
174+
return balance;
175+
}
176+
177+
private normalizeEnumeration(value: string): string {
178+
let normalized = value.replace(/\s+/g, ' ');
179+
normalized = normalized.replace(/\s*\|\s*/g, '|');
180+
normalized = normalized.replace(/\(\s*/g, '(');
181+
normalized = normalized.replace(/\s*\)/g, ')');
182+
return normalized.trim();
183+
}
184+
185+
private isQuotedValue(value: string): boolean {
186+
return (value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"));
187+
}
188+
189+
private trimQuotes(value: string): string {
190+
return this.isQuotedValue(value) ? value.substring(1, value.length - 1) : value;
191+
}
192+
172193
getNodeType(): number {
173194
return Constants.ATTRIBUTE_LIST_DECL_NODE;
174195
}

ts/dtd/DTDChoiceModel.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ export class DTDChoiceModel implements DTDContentModel {
5353
// Only check name if choice is DTDElementNameParticle
5454
if (choice instanceof DTDElementNameParticle) {
5555
const name = choice.getName();
56-
if (name !== '#PCDATA' && !XMLUtils.isValidNCName(name)) {
56+
if (name !== '#PCDATA' && !XMLUtils.isValidXMLName(name)) {
5757
return false;
5858
}
5959
}

0 commit comments

Comments
 (0)