Skip to content

Commit 09b9934

Browse files
committed
Improved detection of default attribute values from RelaxNG
1 parent 2e0b2a3 commit 09b9934

4 files changed

Lines changed: 324 additions & 139 deletions

File tree

ts/Constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ export class Constants {
3333

3434
// RelaxNG Namespace URI
3535
static readonly RELAXNG_NS_URI: string = 'http://relaxng.org/ns/structure/1.0';
36+
static readonly RELAXNG_COMPATIBILITY_NS_URI: string = 'http://relaxng.org/ns/compatibility/annotations/1.0';
3637

3738
// XML Schema instance namespace URI
3839
static readonly XML_SCHEMA_INSTANCE_NS_URI: string = 'http://www.w3.org/2001/XMLSchema-instance';

ts/RelaxNGParser.ts

Lines changed: 223 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ import { TextNode } from "./TextNode";
2121
import { XMLAttribute } from "./XMLAttribute";
2222
import { XMLElement } from "./XMLElement";
2323
import { XMLNode } from "./XMLNode";
24+
import { type AttributeDefault } from "./XMLSchemaParser";
25+
26+
type NameInfo = {
27+
lexicalName: string;
28+
localName: string;
29+
namespace?: string;
30+
};
2431

2532
export class RelaxNGParser {
2633

@@ -31,8 +38,6 @@ export class RelaxNGParser {
3138
private defaultNamespace: string = Constants.RELAXNG_NS_URI;
3239
private definitions: Map<string, XMLElement> = new Map();
3340
private elements: XMLElement[] = [];
34-
private attributes: XMLElement[] = [];
35-
private visited: Set<string> = new Set();
3641
private divsRemoved: boolean = false;
3742

3843
constructor(schemaPath: string, catalog?: Catalog) {
@@ -74,59 +79,144 @@ export class RelaxNGParser {
7479
this.divsRemoved = false;
7580
this.removeDivs(this.root);
7681
} while (this.divsRemoved);
77-
this.nameAttribute(this.root);
82+
this.nameAttribute(this.root, new Map<string, string>());
7883
}
7984

80-
getElements(): Map<string, Map<string, string>> {
81-
const result: Map<string, Map<string, string>> = new Map();
85+
getElements(): Map<string, Map<string, AttributeDefault>> {
86+
const result: Map<string, Map<string, AttributeDefault>> = new Map<string, Map<string, AttributeDefault>>();
8287

83-
this.definitions = new Map();
88+
this.definitions = new Map<string, XMLElement>();
8489
this.harvestDefinitions(this.root);
8590

8691
this.elements = [];
8792
this.harvestElements(this.root);
8893

8994
for (const element of this.elements) {
90-
this.attributes = [];
91-
this.visited = new Set();
92-
this.getAttributes(element);
93-
94-
const defaults: Map<string, string> = new Map();
95-
for (const attribute of this.attributes) {
96-
const nameElement: XMLElement | undefined = this.findChildByLocalName(attribute, "name");
97-
if (!nameElement) {
98-
continue;
99-
}
100-
const attributeName: string = nameElement.getText().trim();
101-
if (!attributeName) {
102-
continue;
103-
}
104-
if (attributeName.indexOf(":") !== -1 && !attributeName.startsWith("xml:")) {
105-
continue;
106-
}
107-
108-
const defaultValue: string | undefined = this.findDefaultValue(attribute);
109-
if (defaultValue !== undefined) {
110-
defaults.set(attributeName, defaultValue);
111-
}
95+
const nameElement: XMLElement | undefined = this.findChildByLocalName(element, "name");
96+
if (!nameElement) {
97+
continue;
98+
}
99+
const elementInfo: NameInfo | undefined = this.extractNameInfo(nameElement);
100+
if (!elementInfo) {
101+
continue;
112102
}
113103

104+
const defaults: Map<string, AttributeDefault> = new Map<string, AttributeDefault>();
105+
const visitedRefs: Set<string> = new Set<string>();
106+
this.collectAttributeDefaultsFromPattern(element, defaults, visitedRefs, true);
114107
if (defaults.size === 0) {
115108
continue;
116109
}
117110

118-
const elementNameElement: XMLElement | undefined = this.findChildByLocalName(element, "name");
119-
if (!elementNameElement) {
111+
this.storeElementDefaults(result, elementInfo, defaults);
112+
}
113+
114+
return result;
115+
}
116+
117+
private storeElementDefaults(result: Map<string, Map<string, AttributeDefault>>, elementInfo: NameInfo, defaults: Map<string, AttributeDefault>): void {
118+
result.set(elementInfo.lexicalName, this.cloneAttributeDefaultMap(defaults));
119+
if (!result.has(elementInfo.localName)) {
120+
result.set(elementInfo.localName, this.cloneAttributeDefaultMap(defaults));
121+
}
122+
if (elementInfo.namespace) {
123+
const namespacedKey: string = this.buildAttributeKey(elementInfo.localName, elementInfo.namespace);
124+
result.set(namespacedKey, this.cloneAttributeDefaultMap(defaults));
125+
}
126+
}
127+
128+
private cloneAttributeDefaultMap(source: Map<string, AttributeDefault>): Map<string, AttributeDefault> {
129+
const clone: Map<string, AttributeDefault> = new Map<string, AttributeDefault>();
130+
source.forEach((value: AttributeDefault, key: string) => {
131+
clone.set(key, {
132+
localName: value.localName,
133+
namespace: value.namespace,
134+
lexicalName: value.lexicalName,
135+
value: value.value
136+
});
137+
});
138+
return clone;
139+
}
140+
141+
private collectAttributeDefaultsFromPattern(pattern: XMLElement, defaults: Map<string, AttributeDefault>, visitedRefs: Set<string>, allowElementTraversal: boolean): void {
142+
const localName: string = this.getLocalNameFromElement(pattern);
143+
if (localName === "attribute") {
144+
this.addAttributeDefault(pattern, defaults);
145+
return;
146+
}
147+
if (localName === "ref" || localName === "parentRef") {
148+
const nameAttr: XMLAttribute | undefined = pattern.getAttribute("name");
149+
const refName: string | undefined = nameAttr?.getValue();
150+
if (!refName || visitedRefs.has(refName)) {
151+
return;
152+
}
153+
visitedRefs.add(refName);
154+
const referenced: XMLElement | undefined = this.definitions.get(refName);
155+
if (referenced) {
156+
this.collectAttributeDefaultsFromPattern(referenced, defaults, visitedRefs, allowElementTraversal);
157+
}
158+
return;
159+
}
160+
let childAllowTraversal: boolean = allowElementTraversal;
161+
if (localName === "element") {
162+
if (!allowElementTraversal) {
163+
return;
164+
}
165+
childAllowTraversal = false;
166+
}
167+
for (const child of pattern.getChildren()) {
168+
if (child.getNodeType() !== Constants.ELEMENT_NODE) {
120169
continue;
121170
}
171+
this.collectAttributeDefaultsFromPattern(child as XMLElement, defaults, visitedRefs, childAllowTraversal);
172+
}
173+
}
122174

123-
const elementName: string = elementNameElement.getText().trim();
124-
if (elementName) {
125-
result.set(elementName, defaults);
126-
}
175+
private addAttributeDefault(attributeElement: XMLElement, defaults: Map<string, AttributeDefault>): void {
176+
const defaultValue: string | undefined = this.findDefaultValue(attributeElement);
177+
if (defaultValue === undefined) {
178+
return;
127179
}
180+
const nameElement: XMLElement | undefined = this.findChildByLocalName(attributeElement, "name");
181+
if (!nameElement) {
182+
return;
183+
}
184+
const nameInfo: NameInfo | undefined = this.extractNameInfo(nameElement);
185+
if (!nameInfo) {
186+
return;
187+
}
188+
const attributeDefault: AttributeDefault = {
189+
localName: nameInfo.localName,
190+
namespace: nameInfo.namespace,
191+
lexicalName: nameInfo.lexicalName,
192+
value: defaultValue
193+
};
194+
this.setAttributeDefault(defaults, attributeDefault);
195+
}
128196

129-
return result;
197+
private extractNameInfo(nameElement: XMLElement): NameInfo | undefined {
198+
const lexicalName: string = nameElement.getText().trim();
199+
if (!lexicalName) {
200+
return undefined;
201+
}
202+
const nsAttr: XMLAttribute | undefined = nameElement.getAttribute("ns");
203+
let namespace: string | undefined = nsAttr ? nsAttr.getValue() : undefined;
204+
let localName: string = lexicalName;
205+
const separatorIndex: number = lexicalName.indexOf(":");
206+
if (separatorIndex !== -1) {
207+
localName = lexicalName.substring(separatorIndex + 1);
208+
if (!namespace) {
209+
const prefix: string = lexicalName.substring(0, separatorIndex);
210+
if (prefix === "xml") {
211+
namespace = "http://www.w3.org/XML/1998/namespace";
212+
}
213+
}
214+
}
215+
return {
216+
lexicalName: lexicalName,
217+
localName: localName,
218+
namespace: namespace && namespace.length > 0 ? namespace : undefined
219+
};
130220
}
131221

132222
private findDefaultValue(attribute: XMLElement): string | undefined {
@@ -135,9 +225,51 @@ export class RelaxNGParser {
135225
return attr.getValue();
136226
}
137227
}
228+
return this.findDefaultValueFromChildren(attribute);
229+
}
230+
231+
private findDefaultValueFromChildren(attribute: XMLElement): string | undefined {
232+
for (const child of attribute.getChildren()) {
233+
if (this.getLocalNameFromElement(child) === "defaultValue") {
234+
return child.getText().trim();
235+
}
236+
}
138237
return undefined;
139238
}
140239

240+
private setAttributeDefault(target: Map<string, AttributeDefault>, value: AttributeDefault): void {
241+
const key: string = this.buildAttributeKey(value.localName, value.namespace);
242+
const removals: string[] = [];
243+
target.forEach((existing: AttributeDefault, existingKey: string) => {
244+
if (existing.localName !== value.localName) {
245+
return;
246+
}
247+
const sameNamespace: boolean = existing.namespace === value.namespace;
248+
if (sameNamespace && existingKey === key) {
249+
return;
250+
}
251+
if (sameNamespace || (value.namespace && !existing.namespace)) {
252+
removals.push(existingKey);
253+
}
254+
});
255+
for (const removalKey of removals) {
256+
target.delete(removalKey);
257+
}
258+
target.set(key, {
259+
localName: value.localName,
260+
namespace: value.namespace,
261+
lexicalName: value.lexicalName,
262+
value: value.value
263+
});
264+
}
265+
266+
private buildAttributeKey(name: string, namespace?: string): string {
267+
if (namespace) {
268+
return namespace + "|" + name;
269+
}
270+
return name;
271+
}
272+
141273
private removeForeign(element: XMLElement): void {
142274
const newContent: XMLNode[] = [];
143275
for (const node of element.getContent()) {
@@ -150,6 +282,9 @@ export class RelaxNGParser {
150282
if (nodeType === Constants.ELEMENT_NODE) {
151283
const child: XMLElement = node as XMLElement;
152284
if (!this.isRelaxNGElement(child)) {
285+
if (this.isCompatibilityAnnotation(child)) {
286+
newContent.push(child);
287+
}
153288
continue;
154289
}
155290
this.removeForeign(child);
@@ -283,35 +418,12 @@ export class RelaxNGParser {
283418
}
284419
}
285420

286-
private getAttributes(element: XMLElement): void {
287-
const localName: string = this.getLocalNameFromElement(element);
288-
if (localName === "attribute") {
289-
this.attributes.push(element);
290-
return;
291-
}
292-
if (localName === "ref") {
293-
const nameAttr: XMLAttribute | undefined = element.getAttribute("name");
294-
const refName: string | undefined = nameAttr?.getValue();
295-
if (refName && !this.visited.has(refName)) {
296-
this.visited.add(refName);
297-
const definition: XMLElement | undefined = this.definitions.get(refName);
298-
if (definition) {
299-
this.getAttributes(definition);
300-
}
301-
}
302-
return;
303-
}
304-
for (const child of element.getChildren()) {
305-
if (this.getLocalNameFromElement(child) === "element") {
306-
return;
307-
}
308-
this.getAttributes(child);
309-
}
310-
}
311-
312-
private nameAttribute(element: XMLElement): void {
421+
private nameAttribute(element: XMLElement, context: Map<string, string>): void {
422+
const currentContext: Map<string, string> = this.augmentNamespaceContext(context, element);
313423
const localName: string = this.getLocalNameFromElement(element);
314-
if ((localName === "element" || localName === "attribute") && element.hasAttribute("name")) {
424+
const isElementPattern: boolean = localName === "element";
425+
const isAttributePattern: boolean = localName === "attribute";
426+
if ((isElementPattern || isAttributePattern) && element.hasAttribute("name")) {
315427
const nameValue: string = element.getAttribute("name")?.getValue() ?? "";
316428
const nameElement: XMLElement = this.createRelaxNGElement("name");
317429
nameElement.addString(nameValue);
@@ -320,17 +432,56 @@ export class RelaxNGParser {
320432
if (nsAttr) {
321433
nameElement.setAttribute(new XMLAttribute("ns", nsAttr.getValue()));
322434
element.removeAttribute("ns");
435+
} else {
436+
const resolvedNamespace: string | undefined = this.resolveNamespaceBinding(nameValue, currentContext, isElementPattern, isAttributePattern);
437+
if (resolvedNamespace) {
438+
nameElement.setAttribute(new XMLAttribute("ns", resolvedNamespace));
439+
}
323440
}
324441

325442
element.removeAttribute("name");
326443
const content: XMLNode[] = [nameElement, ...element.getContent()];
327444
element.setContent(content);
328445
}
329446
for (const child of element.getChildren()) {
330-
this.nameAttribute(child);
447+
this.nameAttribute(child, currentContext);
331448
}
332449
}
333450

451+
private augmentNamespaceContext(baseContext: Map<string, string>, element: XMLElement): Map<string, string> {
452+
const updated: Map<string, string> = new Map<string, string>(baseContext);
453+
for (const attribute of element.getAttributes()) {
454+
const attributeName: string = attribute.getName();
455+
if (attributeName === "xmlns") {
456+
updated.set("", attribute.getValue());
457+
continue;
458+
}
459+
if (attributeName.startsWith("xmlns:")) {
460+
const prefix: string = attributeName.substring(6);
461+
updated.set(prefix, attribute.getValue());
462+
}
463+
}
464+
if (!updated.has("xml")) {
465+
updated.set("xml", "http://www.w3.org/XML/1998/namespace");
466+
}
467+
return updated;
468+
}
469+
470+
private resolveNamespaceBinding(lexicalName: string, context: Map<string, string>, isElementPattern: boolean, isAttributePattern: boolean): string | undefined {
471+
const separatorIndex: number = lexicalName.indexOf(":");
472+
if (separatorIndex === -1) {
473+
if (isElementPattern) {
474+
return context.get("") ?? undefined;
475+
}
476+
if (isAttributePattern) {
477+
return undefined;
478+
}
479+
return context.get("") ?? undefined;
480+
}
481+
const prefix: string = lexicalName.substring(0, separatorIndex);
482+
return context.get(prefix);
483+
}
484+
334485
private resolveHref(href: string): string | undefined {
335486
if (!href) {
336487
return undefined;
@@ -409,6 +560,14 @@ export class RelaxNGParser {
409560
return index === -1 ? "" : name.substring(0, index);
410561
}
411562

563+
private isCompatibilityAnnotation(element: XMLElement): boolean {
564+
const localName: string = this.getLocalNameFromElement(element);
565+
if (localName === "defaultValue") {
566+
return true;
567+
}
568+
return false;
569+
}
570+
412571
private isRelaxNGElement(element: XMLElement): boolean {
413572
const prefix: string = this.getPrefix(element);
414573
if (this.defaultPrefix) {

0 commit comments

Comments
 (0)