Skip to content

Commit 2e0b2a3

Browse files
committed
Improved detection of default attribute values from XML Schema
1 parent 0bec333 commit 2e0b2a3

3 files changed

Lines changed: 378 additions & 103 deletions

File tree

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ node_modules/
33
dist/
44
.vscode/
55
.scannerwork/
6-
ts/test.ts
7-
ts/testRelaxNG.ts
6+
ts/test*.ts
87
test.xml
98
/tests/
109
catalog/

ts/SAXParser.ts

Lines changed: 152 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import { StreamReader } from "./StreamReader";
2424
import { StringReader } from "./StringReader";
2525
import { XMLAttribute } from "./XMLAttribute";
2626
import { XMLUtils } from "./XMLUtils";
27-
import { XMLSchemaParser } from "./XMLSchemaParser";
27+
import { XMLSchemaParser, type AttributeDefault } from "./XMLSchemaParser";
2828
import { AttDecl } from "./dtd/AttDecl";
2929
import { DTDGrammar } from "./dtd/DTDGrammar";
3030
import { DTDParser } from "./dtd/DTDParser";
@@ -42,6 +42,8 @@ export interface StreamParseOptions extends ParseSourceOptions {
4242

4343
export type ParserInputSource = FileReader | StringReader | StreamReader;
4444

45+
type SchemaAttributeDefault = AttributeDefault;
46+
4547
export class SAXParser {
4648

4749
private static readonly SUPPORTED_ENCODINGS: Map<string, string> = new Map<string, string>([
@@ -68,6 +70,7 @@ export class SAXParser {
6870
validating: boolean = false;
6971
relaxNGDefaultAttributes: Map<string, Map<string, string>> = new Map<string, Map<string, string>>();
7072
schemaDefaultAttributes: Map<string, Map<string, string>> = new Map<string, Map<string, string>>();
73+
private schemaDefaultAttributeDetails: Map<string, Map<string, SchemaAttributeDefault>> = new Map<string, Map<string, SchemaAttributeDefault>>();
7174
processedSchemaLocations: Set<string> = new Set<string>();
7275
failedSchemaLocations: Set<string> = new Set<string>();
7376
namespaceContextStack: Array<Map<string, string>> = [];
@@ -216,6 +219,7 @@ export class SAXParser {
216219
this.currentFile = currentFilePath || fallbackVirtualPath;
217220
this.relaxNGDefaultAttributes = new Map<string, Map<string, string>>();
218221
this.schemaDefaultAttributes = new Map<string, Map<string, string>>();
222+
this.schemaDefaultAttributeDetails = new Map<string, Map<string, SchemaAttributeDefault>>();
219223
this.processedSchemaLocations = new Set<string>();
220224
this.failedSchemaLocations = new Set<string>();
221225
this.namespaceContextStack = [];
@@ -831,10 +835,20 @@ export class SAXParser {
831835
}
832836

833837
getDefaultAttributes(elementName: string, attributes: Array<XMLAttribute>): Array<XMLAttribute> {
834-
let grammar: Grammar | undefined = this.contentHandler?.getGrammar();
835-
let existingAttributes: Set<string> = new Set<string>();
838+
const grammar: Grammar | undefined = this.contentHandler?.getGrammar();
839+
const namespaceContext: Map<string, string> = this.namespaceContextStack.length > 0 ? this.namespaceContextStack[this.namespaceContextStack.length - 1] : new Map<string, string>();
840+
const existingAttributeNames: Set<string> = new Set<string>();
841+
const existingAttributeKeys: Set<string> = new Set<string>();
836842
attributes.forEach((attr: XMLAttribute) => {
837-
existingAttributes.add(attr.getName());
843+
const attributeName: string = attr.getName();
844+
existingAttributeNames.add(attributeName);
845+
const attributeParts: { prefix?: string; localName: string } = this.splitQualifiedName(attributeName);
846+
let attributeNamespaceUri: string | undefined = undefined;
847+
if (attributeParts.prefix) {
848+
attributeNamespaceUri = namespaceContext.get(attributeParts.prefix);
849+
}
850+
const attributeKey: string = this.buildSchemaAttributeKey(attributeParts.localName, attributeNamespaceUri);
851+
existingAttributeKeys.add(attributeKey);
838852
});
839853

840854
if (grammar) {
@@ -843,7 +857,9 @@ export class SAXParser {
843857
const dtdGrammar: DTDGrammar | undefined = grammar instanceof DTDGrammar ? grammar : undefined;
844858
const declarations: Map<string, AttDecl> | undefined = dtdGrammar?.getElementAttributesMap(elementName);
845859
grammarDefaults.forEach((value: string, key: string) => {
846-
if (existingAttributes.has(key)) {
860+
const grammarParts: { prefix?: string; localName: string } = this.splitQualifiedName(key);
861+
const attributeKey: string = this.buildSchemaAttributeKey(grammarParts.localName, undefined);
862+
if (existingAttributeKeys.has(attributeKey) || existingAttributeNames.has(key)) {
847863
return;
848864
}
849865
let normalizedValue: string;
@@ -855,41 +871,118 @@ export class SAXParser {
855871
normalizedValue = this.normalizeAttributeValue(value, value);
856872
}
857873
attributes.push(new XMLAttribute(key, normalizedValue));
858-
existingAttributes.add(key);
874+
existingAttributeNames.add(key);
875+
existingAttributeKeys.add(attributeKey);
859876
});
860877
}
861878
}
862879

863-
const appendExternalDefaults = (defaults: Map<string, string> | undefined): void => {
864-
if (!defaults) {
865-
return;
866-
}
867-
defaults.forEach((value: string, key: string) => {
868-
if (existingAttributes.has(key)) {
869-
return;
870-
}
871-
const normalizedValue: string = this.normalizeAttributeValue(value, value);
872-
attributes.push(new XMLAttribute(key, normalizedValue));
873-
existingAttributes.add(key);
874-
});
875-
};
876-
877880
const nameParts: { prefix?: string; localName: string } = this.splitQualifiedName(elementName);
878881
const namespaceUri: string | undefined = this.getNamespaceUriForElement(elementName);
879882
if (namespaceUri) {
880-
const namespaceKey: string = `${namespaceUri}|${nameParts.localName}`;
881-
appendExternalDefaults(this.schemaDefaultAttributes.get(namespaceKey));
883+
const namespaceKey: string = namespaceUri + "|" + nameParts.localName;
884+
this.appendSchemaDefaultsForElement(this.schemaDefaultAttributeDetails.get(namespaceKey), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
882885
}
883-
appendExternalDefaults(this.schemaDefaultAttributes.get(nameParts.localName));
886+
this.appendSchemaDefaultsForElement(this.schemaDefaultAttributeDetails.get(nameParts.localName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
884887
if (nameParts.localName !== elementName) {
885-
appendExternalDefaults(this.schemaDefaultAttributes.get(elementName));
888+
this.appendSchemaDefaultsForElement(this.schemaDefaultAttributeDetails.get(elementName), attributes, existingAttributeNames, existingAttributeKeys, namespaceUri, nameParts, namespaceContext);
886889
}
887890
if (this.isRelaxNG) {
888-
appendExternalDefaults(this.relaxNGDefaultAttributes.get(elementName));
891+
this.appendRelaxNGDefaultsForElement(this.relaxNGDefaultAttributes.get(elementName), attributes, existingAttributeNames, existingAttributeKeys);
889892
}
890893
return attributes;
891894
}
892895

896+
private appendSchemaDefaultsForElement(defaults: Map<string, SchemaAttributeDefault> | undefined, attributes: Array<XMLAttribute>, existingAttributeNames: Set<string>, existingAttributeKeys: Set<string>, namespaceUri: string | undefined, elementNameParts: { prefix?: string; localName: string }, namespaceContext: Map<string, string>): void {
897+
if (!defaults) {
898+
return;
899+
}
900+
defaults.forEach((info: SchemaAttributeDefault) => {
901+
const attributeKey: string = this.buildSchemaAttributeKey(info.localName, info.namespace);
902+
if (existingAttributeKeys.has(attributeKey)) {
903+
return;
904+
}
905+
const attributeName: string = this.resolveSchemaAttributeQualifiedName(info, namespaceUri, elementNameParts, namespaceContext);
906+
if (existingAttributeNames.has(attributeName)) {
907+
return;
908+
}
909+
const normalizedValue: string = this.normalizeAttributeValue(info.value, info.value);
910+
attributes.push(new XMLAttribute(attributeName, normalizedValue));
911+
existingAttributeNames.add(attributeName);
912+
existingAttributeKeys.add(attributeKey);
913+
});
914+
}
915+
916+
private appendRelaxNGDefaultsForElement(defaults: Map<string, string> | undefined, attributes: Array<XMLAttribute>, existingAttributeNames: Set<string>, existingAttributeKeys: Set<string>): void {
917+
if (!defaults) {
918+
return;
919+
}
920+
defaults.forEach((value: string, key: string) => {
921+
if (existingAttributeNames.has(key)) {
922+
return;
923+
}
924+
const parts: { prefix?: string; localName: string } = this.splitQualifiedName(key);
925+
const attributeKey: string = this.buildSchemaAttributeKey(parts.localName, undefined);
926+
if (existingAttributeKeys.has(attributeKey)) {
927+
return;
928+
}
929+
const normalizedValue: string = this.normalizeAttributeValue(value, value);
930+
attributes.push(new XMLAttribute(key, normalizedValue));
931+
existingAttributeNames.add(key);
932+
existingAttributeKeys.add(attributeKey);
933+
});
934+
}
935+
936+
private resolveSchemaAttributeQualifiedName(info: SchemaAttributeDefault, namespaceUri: string | undefined, elementNameParts: { prefix?: string; localName: string }, namespaceContext: Map<string, string>): string {
937+
const lexicalName: string = info.lexicalName;
938+
const attributeNamespace: string | undefined = info.namespace;
939+
const parts: { prefix?: string; localName: string } = this.splitQualifiedName(lexicalName);
940+
if (parts.prefix) {
941+
const mappedNamespace: string | undefined = namespaceContext.get(parts.prefix);
942+
if (!attributeNamespace && mappedNamespace) {
943+
return lexicalName;
944+
}
945+
if (attributeNamespace && mappedNamespace === attributeNamespace) {
946+
return lexicalName;
947+
}
948+
if (attributeNamespace) {
949+
const prefix: string | undefined = this.findNamespacePrefix(attributeNamespace, namespaceContext);
950+
if (prefix) {
951+
return prefix + ":" + info.localName;
952+
}
953+
}
954+
return info.localName;
955+
}
956+
if (attributeNamespace) {
957+
const prefix: string | undefined = this.findNamespacePrefix(attributeNamespace, namespaceContext);
958+
if (prefix) {
959+
return prefix + ":" + info.localName;
960+
}
961+
if (namespaceUri && attributeNamespace === namespaceUri && elementNameParts.prefix) {
962+
return elementNameParts.prefix + ":" + info.localName;
963+
}
964+
}
965+
return info.lexicalName;
966+
}
967+
968+
private findNamespacePrefix(namespaceUri: string, context: Map<string, string>): string | undefined {
969+
for (const entry of context.entries()) {
970+
const prefix: string = entry[0];
971+
const uri: string = entry[1];
972+
if (uri === namespaceUri && prefix !== '') {
973+
return prefix;
974+
}
975+
}
976+
return undefined;
977+
}
978+
979+
private buildSchemaAttributeKey(localName: string, namespace?: string): string {
980+
if (namespace) {
981+
return namespace + "|" + localName;
982+
}
983+
return localName;
984+
}
985+
893986
private buildNamespaceContext(attributes: Map<string, string>, previousContext?: Map<string, string>): Map<string, string> {
894987
const context: Map<string, string> = previousContext ? new Map<string, string>(previousContext) : new Map<string, string>();
895988
attributes.forEach((value: string, key: string) => {
@@ -1034,8 +1127,22 @@ export class SAXParser {
10341127
}
10351128
try {
10361129
const parser: XMLSchemaParser = XMLSchemaParser.getInstance(this.catalog);
1037-
const defaults: Map<string, Map<string, string>> = parser.collectDefaultAttributes(resolvedPath);
1038-
this.mergeSchemaDefaults(defaults);
1130+
const rawDefaults: Map<string, Map<string, SchemaAttributeDefault>> = parser.collectDefaultAttributes(resolvedPath);
1131+
const convertedDefaults: Map<string, Map<string, SchemaAttributeDefault>> = new Map<string, Map<string, SchemaAttributeDefault>>();
1132+
rawDefaults.forEach((attributeMap: Map<string, SchemaAttributeDefault>, elementKey: string) => {
1133+
const converted: Map<string, SchemaAttributeDefault> = new Map<string, SchemaAttributeDefault>();
1134+
attributeMap.forEach((info: SchemaAttributeDefault, attributeKey: string) => {
1135+
const copy: SchemaAttributeDefault = {
1136+
localName: info.localName,
1137+
namespace: info.namespace,
1138+
lexicalName: info.lexicalName,
1139+
value: info.value
1140+
};
1141+
converted.set(attributeKey, copy);
1142+
});
1143+
convertedDefaults.set(elementKey, converted);
1144+
});
1145+
this.mergeSchemaDefaults(convertedDefaults);
10391146
this.processedSchemaLocations.add(resolvedPath);
10401147
this.processedSchemaLocations.add(identifier);
10411148
return true;
@@ -1099,19 +1206,28 @@ export class SAXParser {
10991206
return undefined;
11001207
}
11011208

1102-
private mergeSchemaDefaults(defaults: Map<string, Map<string, string>>): void {
1103-
defaults.forEach((attributeMap: Map<string, string>, elementName: string) => {
1209+
private mergeSchemaDefaults(defaults: Map<string, Map<string, SchemaAttributeDefault>>): void {
1210+
defaults.forEach((attributeMap: Map<string, SchemaAttributeDefault>, elementName: string) => {
11041211
if (attributeMap.size === 0) {
11051212
return;
11061213
}
1107-
const target: Map<string, string> = this.schemaDefaultAttributes.get(elementName) ?? new Map<string, string>();
1108-
attributeMap.forEach((value: string, attributeName: string) => {
1109-
if (!target.has(attributeName)) {
1110-
target.set(attributeName, value);
1111-
}
1214+
const detailTarget: Map<string, SchemaAttributeDefault> = this.schemaDefaultAttributeDetails.get(elementName) ?? new Map<string, SchemaAttributeDefault>();
1215+
const plainTarget: Map<string, string> = this.schemaDefaultAttributes.get(elementName) ?? new Map<string, string>();
1216+
attributeMap.forEach((info: SchemaAttributeDefault, attributeName: string) => {
1217+
const copy: SchemaAttributeDefault = {
1218+
localName: info.localName,
1219+
namespace: info.namespace,
1220+
lexicalName: info.lexicalName,
1221+
value: info.value
1222+
};
1223+
detailTarget.set(attributeName, copy);
1224+
plainTarget.set(attributeName, info.value);
11121225
});
1113-
if (target.size > 0) {
1114-
this.schemaDefaultAttributes.set(elementName, target);
1226+
if (detailTarget.size > 0) {
1227+
this.schemaDefaultAttributeDetails.set(elementName, detailTarget);
1228+
}
1229+
if (plainTarget.size > 0) {
1230+
this.schemaDefaultAttributes.set(elementName, plainTarget);
11151231
}
11161232
});
11171233
}

0 commit comments

Comments
 (0)