Skip to content

Commit 0ab46c8

Browse files
committed
MLE-26427 Namespace support for exclusions
gitflow-feature-stash: namespacaes
1 parent e609381 commit 0ab46c8

File tree

6 files changed

+201
-41
lines changed

6 files changed

+201
-41
lines changed

marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/ContentExclusionUtil.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.w3c.dom.Node;
1616
import org.w3c.dom.NodeList;
1717

18+
import javax.xml.namespace.NamespaceContext;
1819
import javax.xml.namespace.QName;
1920
import javax.xml.parsers.DocumentBuilder;
2021
import javax.xml.transform.OutputKeys;
@@ -29,6 +30,8 @@
2930
import java.io.ByteArrayInputStream;
3031
import java.io.StringWriter;
3132
import java.nio.charset.StandardCharsets;
33+
import java.util.Iterator;
34+
import java.util.Map;
3235

3336
/**
3437
* Utility class for applying content exclusions to documents before hash calculation.
@@ -99,23 +102,28 @@ private static void removeNodeAtPointer(String uri, JsonNode rootNode, String js
99102
*
100103
* @param uri the document URI (used for logging purposes)
101104
* @param xmlContent the XML content as a string
105+
* @param namespaces a map of namespace prefixes to URIs for use in XPath expressions, or null
102106
* @param xpathExpressions array of XPath expressions identifying elements to exclude
103107
* @return the modified XML content with specified elements removed
104108
* @throws Exception if the XML content cannot be parsed or serialized
105109
*/
106-
static String applyXmlExclusions(String uri, String xmlContent, String... xpathExpressions) throws Exception {
110+
static String applyXmlExclusions(String uri, String xmlContent, Map<String, String> namespaces, String... xpathExpressions) throws Exception {
107111
if (xpathExpressions == null || xpathExpressions.length == 0) {
108112
return xmlContent;
109113
}
110114

111115
DocumentBuilder builder = XmlFactories.getDocumentBuilderFactory().newDocumentBuilder();
112116
Document document = builder.parse(new ByteArrayInputStream(xmlContent.getBytes(StandardCharsets.UTF_8)));
113-
applyXmlExclusions(uri, document, xpathExpressions);
117+
applyXmlExclusions(uri, document, namespaces, xpathExpressions);
114118
return serializeDocument(document);
115119
}
116120

117-
private static void applyXmlExclusions(String uri, Document document, String[] xpathExpressions) {
121+
private static void applyXmlExclusions(String uri, Document document, Map<String, String> namespaces, String[] xpathExpressions) {
118122
final XPath xpath = XmlFactories.getXPathFactory().newXPath();
123+
if (namespaces != null && !namespaces.isEmpty()) {
124+
xpath.setNamespaceContext(new SimpleNamespaceContext(namespaces));
125+
}
126+
119127
for (String xpathExpression : xpathExpressions) {
120128
try {
121129
XPathExpression expr = xpath.compile(xpathExpression);

marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteEvalFilter.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.marklogic.client.document.DocumentWriteSet;
1313
import com.marklogic.client.io.JacksonHandle;
1414

15+
import java.util.Map;
1516
import java.util.function.Consumer;
1617

1718
/**
@@ -31,8 +32,8 @@ class IncrementalWriteEvalFilter extends IncrementalWriteFilter {
3132
""";
3233

3334
IncrementalWriteEvalFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
34-
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions) {
35-
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
35+
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
36+
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
3637
}
3738

3839
@Override

marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteFilter.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.time.Instant;
2626
import java.util.ArrayList;
2727
import java.util.List;
28+
import java.util.Map;
2829
import java.util.function.Consumer;
2930
import java.util.function.Function;
3031

@@ -51,6 +52,7 @@ public static class Builder {
5152
private Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
5253
private String[] jsonExclusions;
5354
private String[] xmlExclusions;
55+
private Map<String, String> xmlNamespaces;
5456

5557
/**
5658
* @param keyName the name of the MarkLogic metadata key that will hold the hash value; defaults to "incrementalWriteHash".
@@ -117,13 +119,22 @@ public Builder xmlExclusions(String... xpathExpressions) {
117119
return this;
118120
}
119121

122+
/**
123+
* @param namespaces a map of namespace prefixes to URIs for use in XPath exclusion expressions.
124+
* For example, Map.of("ns", "http://example.com/ns") allows XPath like "//ns:timestamp".
125+
*/
126+
public Builder xmlNamespaces(Map<String, String> namespaces) {
127+
this.xmlNamespaces = namespaces;
128+
return this;
129+
}
130+
120131
public IncrementalWriteFilter build() {
121132
validateJsonExclusions();
122133
validateXmlExclusions();
123134
if (useEvalQuery) {
124-
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
135+
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
125136
}
126-
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
137+
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
127138
}
128139

129140
private void validateJsonExclusions() {
@@ -151,6 +162,9 @@ private void validateXmlExclusions() {
151162
return;
152163
}
153164
XPath xpath = XmlFactories.getXPathFactory().newXPath();
165+
if (xmlNamespaces != null && !xmlNamespaces.isEmpty()) {
166+
xpath.setNamespaceContext(new SimpleNamespaceContext(xmlNamespaces));
167+
}
154168
for (String xpathExpression : xmlExclusions) {
155169
if (xpathExpression == null || xpathExpression.trim().isEmpty()) {
156170
throw new IllegalArgumentException(
@@ -173,18 +187,20 @@ private void validateXmlExclusions() {
173187
private final Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
174188
private final String[] jsonExclusions;
175189
private final String[] xmlExclusions;
190+
private final Map<String, String> xmlNamespaces;
176191

177192
// Hardcoding this for now, with a good general purpose hashing function.
178193
// See https://xxhash.com for benchmarks.
179194
private final LongHashFunction hashFunction = LongHashFunction.xx3();
180195

181-
public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions) {
196+
public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
182197
this.hashKeyName = hashKeyName;
183198
this.timestampKeyName = timestampKeyName;
184199
this.canonicalizeJson = canonicalizeJson;
185200
this.skippedDocumentsConsumer = skippedDocumentsConsumer;
186201
this.jsonExclusions = jsonExclusions;
187202
this.xmlExclusions = xmlExclusions;
203+
this.xmlNamespaces = xmlNamespaces;
188204
}
189205

190206
protected final DocumentWriteSet filterDocuments(Context context, Function<String, String> hashRetriever) {
@@ -260,7 +276,7 @@ private String serializeContent(DocumentWriteOperation doc) {
260276
}
261277
} else if (xmlExclusions != null && xmlExclusions.length > 0) {
262278
try {
263-
content = ContentExclusionUtil.applyXmlExclusions(doc.getUri(), content, xmlExclusions);
279+
content = ContentExclusionUtil.applyXmlExclusions(doc.getUri(), content, xmlNamespaces, xmlExclusions);
264280
} catch (Exception e) {
265281
logger.warn("Unable to apply XML exclusions for URI {}, using original content for hashing; cause: {}",
266282
doc.getUri(), e.getMessage());

marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteOpticFilter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
class IncrementalWriteOpticFilter extends IncrementalWriteFilter {
2121

2222
IncrementalWriteOpticFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
23-
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions) {
24-
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
23+
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
24+
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
2525
}
2626

2727
@Override
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
3+
*/
4+
package com.marklogic.client.datamovement.filter;
5+
6+
import javax.xml.namespace.NamespaceContext;
7+
import java.util.Iterator;
8+
import java.util.Map;
9+
10+
/**
11+
* A simple implementation of {@link NamespaceContext} backed by a Map of prefix to namespace URI mappings.
12+
* Used for XPath evaluation with namespace-qualified expressions.
13+
*
14+
* @since 8.1.0
15+
*/
16+
class SimpleNamespaceContext implements NamespaceContext {
17+
18+
private final Map<String, String> prefixToNamespaceUri;
19+
20+
SimpleNamespaceContext(Map<String, String> prefixToNamespaceUri) {
21+
this.prefixToNamespaceUri = prefixToNamespaceUri;
22+
}
23+
24+
@Override
25+
public String getNamespaceURI(String prefix) {
26+
return prefixToNamespaceUri.get(prefix);
27+
}
28+
29+
@Override
30+
public String getPrefix(String namespaceURI) {
31+
for (Map.Entry<String, String> entry : prefixToNamespaceUri.entrySet()) {
32+
if (entry.getValue().equals(namespaceURI)) {
33+
return entry.getKey();
34+
}
35+
}
36+
return null;
37+
}
38+
39+
@Override
40+
public Iterator<String> getPrefixes(String namespaceURI) {
41+
return prefixToNamespaceUri.entrySet().stream()
42+
.filter(entry -> entry.getValue().equals(namespaceURI))
43+
.map(Map.Entry::getKey)
44+
.iterator();
45+
}
46+
}

0 commit comments

Comments
 (0)