MLE-26918 Added fromView support for incremental write

rjrudin · rjrudin · commit 130ba683391f · 2026-02-10T14:39:07.000-05:00
The plan is to dump eval support and just offer fromLexicons and fromView, but need to test out fromView a bit first.

Did some refactoring too because the constructors had gotten so ugly - there's now an IncrementalWriteConfig class that holds all the inputs from the Builder, so that filter constructors only need that as an arg.
diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteConfig.java b/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteConfig.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
+ */
+package com.marklogic.client.datamovement.filter;
+
+import com.marklogic.client.document.DocumentWriteOperation;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.function.Consumer;
+
+/**
+ * Configuration for incremental write filtering.
+ *
+ * @since 8.1.0
+ */
+public class IncrementalWriteConfig {
+
+	private final String hashKeyName;
+	private final String timestampKeyName;
+	private final boolean canonicalizeJson;
+	private final Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
+	private final String[] jsonExclusions;
+	private final String[] xmlExclusions;
+	private final Map<String, String> xmlNamespaces;
+	private final String schemaName;
+	private final String viewName;
+
+	public IncrementalWriteConfig(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
+								  Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer,
+								  String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces,
+								  String schemaName, String viewName) {
+		this.hashKeyName = hashKeyName;
+		this.timestampKeyName = timestampKeyName;
+		this.canonicalizeJson = canonicalizeJson;
+		this.skippedDocumentsConsumer = skippedDocumentsConsumer;
+		this.jsonExclusions = jsonExclusions;
+		this.xmlExclusions = xmlExclusions;
+		this.xmlNamespaces = xmlNamespaces != null ? Collections.unmodifiableMap(xmlNamespaces) : null;
+		this.schemaName = schemaName;
+		this.viewName = viewName;
+	}
+
+	public String getHashKeyName() {
+		return hashKeyName;
+	}
+
+	public String getTimestampKeyName() {
+		return timestampKeyName;
+	}
+
+	public boolean isCanonicalizeJson() {
+		return canonicalizeJson;
+	}
+
+	public Consumer<DocumentWriteOperation[]> getSkippedDocumentsConsumer() {
+		return skippedDocumentsConsumer;
+	}
+
+	public String[] getJsonExclusions() {
+		return jsonExclusions;
+	}
+
+	public String[] getXmlExclusions() {
+		return xmlExclusions;
+	}
+
+	public Map<String, String> getXmlNamespaces() {
+		return xmlNamespaces != null ? xmlNamespaces : Collections.emptyMap();
+	}
+
+	public String getSchemaName() {
+		return schemaName;
+	}
+
+	public String getViewName() {
+		return viewName;
+	}
+}
diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteEvalFilter.java b/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteEvalFilter.java
@@ -12,9 +12,6 @@
 import com.marklogic.client.document.DocumentWriteSet;
 import com.marklogic.client.io.JacksonHandle;
 
-import java.util.Map;
-import java.util.function.Consumer;
-
 /**
  * Uses server-side JavaScript code to get the existing hash values for a set of URIs.
  *
@@ -31,9 +28,8 @@ class IncrementalWriteEvalFilter extends IncrementalWriteFilter {
 		response
 		""";
 
-	IncrementalWriteEvalFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
-							   Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
-		super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
+	IncrementalWriteEvalFilter(IncrementalWriteConfig config) {
+		super(config);
 	}
 
 	@Override
@@ -47,7 +43,7 @@ public DocumentWriteSet apply(DocumentWriteSetFilter.Context context) {
 
 		try {
 			JsonNode response = context.getDatabaseClient().newServerEval().javascript(EVAL_SCRIPT)
-				.addVariable("hashKeyName", hashKeyName)
+				.addVariable("hashKeyName", getConfig().getHashKeyName())
 				.addVariable("uris", new JacksonHandle(uris))
 				.evalAs(JsonNode.class);
 
diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteFilter.java b/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteFilter.java
@@ -53,6 +53,8 @@ public static class Builder {
 		private String[] jsonExclusions;
 		private String[] xmlExclusions;
 		private Map<String, String> xmlNamespaces;
+		private String schemaName;
+		private String viewName;
 
 		/**
 		 * @param keyName the name of the MarkLogic metadata key that will hold the hash value; defaults to "incrementalWriteHash".
@@ -128,13 +130,43 @@ public Builder xmlNamespaces(Map<String, String> namespaces) {
 			return this;
 		}
 
+		/**
+		 * Configures the filter to use a TDE view for retrieving hash values instead of field range indexes.
+		 * This approach requires a TDE template to be deployed that extracts the URI and hash metadata.
+		 *
+		 * @param schemaName the schema name of the TDE view
+		 * @param viewName   the view name of the TDE view
+		 * @return this builder
+		 */
+		public Builder fromView(String schemaName, String viewName) {
+			boolean schemaEmpty = schemaName == null || schemaName.trim().isEmpty();
+			boolean viewEmpty = viewName == null || viewName.trim().isEmpty();
+
+			if (schemaEmpty && !viewEmpty) {
+				throw new IllegalArgumentException("Schema name cannot be null or empty when view name is provided");
+			}
+			if (!schemaEmpty && viewEmpty) {
+				throw new IllegalArgumentException("View name cannot be null or empty when schema name is provided");
+			}
+
+			this.schemaName = schemaName;
+			this.viewName = viewName;
+			return this;
+		}
+
 		public IncrementalWriteFilter build() {
 			validateJsonExclusions();
 			validateXmlExclusions();
+			IncrementalWriteConfig config = new IncrementalWriteConfig(hashKeyName, timestampKeyName, canonicalizeJson,
+				skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces, schemaName, viewName);
+
+			if (schemaName != null && viewName != null) {
+				return new IncrementalWriteViewFilter(config);
+			}
 			if (useEvalQuery) {
-				return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
+				return new IncrementalWriteEvalFilter(config);
 			}
-			return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
+			return new IncrementalWriteOpticFilter(config);
 		}
 
 		private void validateJsonExclusions() {
@@ -181,26 +213,18 @@ private void validateXmlExclusions() {
 		}
 	}
 
-	protected final String hashKeyName;
-	private final String timestampKeyName;
-	private final boolean canonicalizeJson;
-	private final Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
-	private final String[] jsonExclusions;
-	private final String[] xmlExclusions;
-	private final Map<String, String> xmlNamespaces;
+	private final IncrementalWriteConfig config;
 
 	// Hardcoding this for now, with a good general purpose hashing function.
 	// See https://xxhash.com for benchmarks.
 	private final LongHashFunction hashFunction = LongHashFunction.xx3();
 
-	public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
-		this.hashKeyName = hashKeyName;
-		this.timestampKeyName = timestampKeyName;
-		this.canonicalizeJson = canonicalizeJson;
-		this.skippedDocumentsConsumer = skippedDocumentsConsumer;
-		this.jsonExclusions = jsonExclusions;
-		this.xmlExclusions = xmlExclusions;
-		this.xmlNamespaces = xmlNamespaces;
+	public IncrementalWriteFilter(IncrementalWriteConfig config) {
+		this.config = config;
+	}
+
+	public IncrementalWriteConfig getConfig() {
+		return config;
 	}
 
 	protected final DocumentWriteSet filterDocuments(Context context, Function<String, String> hashRetriever) {
@@ -230,19 +254,19 @@ protected final DocumentWriteSet filterDocuments(Context context, Function<Strin
 
 			if (existingHash != null) {
 				if (!existingHash.equals(contentHash)) {
-					newWriteSet.add(addHashToMetadata(doc, hashKeyName, contentHash, timestampKeyName, timestamp));
-				} else if (skippedDocumentsConsumer != null) {
+					newWriteSet.add(addHashToMetadata(doc, config.getHashKeyName(), contentHash, config.getTimestampKeyName(), timestamp));
+				} else if (config.getSkippedDocumentsConsumer() != null) {
 					skippedDocuments.add(doc);
 				} else {
 					// No consumer, so skip the document silently.
 				}
 			} else {
-				newWriteSet.add(addHashToMetadata(doc, hashKeyName, contentHash, timestampKeyName, timestamp));
+				newWriteSet.add(addHashToMetadata(doc, config.getHashKeyName(), contentHash, config.getTimestampKeyName(), timestamp));
 			}
 		}
 
-		if (!skippedDocuments.isEmpty() && skippedDocumentsConsumer != null) {
-			skippedDocumentsConsumer.accept(skippedDocuments.toArray(new DocumentWriteOperation[0]));
+		if (!skippedDocuments.isEmpty() && config.getSkippedDocumentsConsumer() != null) {
+			config.getSkippedDocumentsConsumer().accept(skippedDocuments.toArray(new DocumentWriteOperation[0]));
 		}
 
 		return newWriteSet;
@@ -259,11 +283,11 @@ private String serializeContent(DocumentWriteOperation doc) {
 			format = baseHandle.getFormat();
 		}
 
-		if (canonicalizeJson && (Format.JSON.equals(format) || isPossiblyJsonContent(content))) {
+		if (config.isCanonicalizeJson() && (Format.JSON.equals(format) || isPossiblyJsonContent(content))) {
 			JsonCanonicalizer jc;
 			try {
-				if (jsonExclusions != null && jsonExclusions.length > 0) {
-					content = ContentExclusionUtil.applyJsonExclusions(doc.getUri(), content, jsonExclusions);
+				if (config.getJsonExclusions() != null && config.getJsonExclusions().length > 0) {
+					content = ContentExclusionUtil.applyJsonExclusions(doc.getUri(), content, config.getJsonExclusions());
 				}
 				jc = new JsonCanonicalizer(content);
 				return jc.getEncodedString();
@@ -274,9 +298,9 @@ private String serializeContent(DocumentWriteOperation doc) {
 				logger.warn("Unable to canonicalize JSON content for URI {}, using original content for hashing; cause: {}",
 					doc.getUri(), e.getMessage());
 			}
-		} else if (xmlExclusions != null && xmlExclusions.length > 0) {
+		} else if (config.getXmlExclusions() != null && config.getXmlExclusions().length > 0) {
 			try {
-				content = ContentExclusionUtil.applyXmlExclusions(doc.getUri(), content, xmlNamespaces, xmlExclusions);
+				content = ContentExclusionUtil.applyXmlExclusions(doc.getUri(), content, config.getXmlNamespaces(), config.getXmlExclusions());
 			} catch (Exception e) {
 				logger.warn("Unable to apply XML exclusions for URI {}, using original content for hashing; cause: {}",
 					doc.getUri(), e.getMessage());
@@ -316,4 +340,6 @@ protected static DocumentWriteOperation addHashToMetadata(DocumentWriteOperation
 
 		return new DocumentWriteOperationImpl(op.getUri(), newMetadata, op.getContent(), op.getTemporalDocumentURI());
 	}
+
+
 }
diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteOpticFilter.java b/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteOpticFilter.java
@@ -10,7 +10,6 @@
 
 import java.util.HashMap;
 import java.util.Map;
-import java.util.function.Consumer;
 
 /**
  * Uses an Optic query to get the existing hash values for a set of URIs.
@@ -19,9 +18,8 @@
  */
 class IncrementalWriteOpticFilter extends IncrementalWriteFilter {
 
-	IncrementalWriteOpticFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
-								Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
-		super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
+	IncrementalWriteOpticFilter(IncrementalWriteConfig config) {
+		super(config);
 	}
 
 	@Override
@@ -39,7 +37,7 @@ public DocumentWriteSet apply(Context context) {
 			Map<String, String> existingHashes = rowTemplate.query(op ->
 					op.fromLexicons(Map.of(
 						"uri", op.cts.uriReference(),
-						"hash", op.cts.fieldReference(super.hashKeyName)
+						"hash", op.cts.fieldReference(getConfig().getHashKeyName())
 					)).where(
 						op.cts.documentQuery(op.xs.stringSeq(uris))
 					),
diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteViewFilter.java b/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteViewFilter.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
+ */
+package com.marklogic.client.datamovement.filter;
+
+import com.marklogic.client.FailedRequestException;
+import com.marklogic.client.document.DocumentWriteOperation;
+import com.marklogic.client.document.DocumentWriteSet;
+import com.marklogic.client.row.RowTemplate;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Uses an Optic query with fromView to get the existing hash values for a set of URIs from a TDE view.
+ * This implementation requires a TDE template to be deployed that extracts the URI and hash metadata.
+ *
+ * @since 8.1.0
+ */
+class IncrementalWriteViewFilter extends IncrementalWriteFilter {
+
+	IncrementalWriteViewFilter(IncrementalWriteConfig config) {
+		super(config);
+	}
+
+	@Override
+	public DocumentWriteSet apply(Context context) {
+		final String[] uris = context.getDocumentWriteSet().stream()
+			.filter(op -> DocumentWriteOperation.OperationType.DOCUMENT_WRITE.equals(op.getOperationType()))
+			.map(DocumentWriteOperation::getUri)
+			.toArray(String[]::new);
+
+		RowTemplate rowTemplate = new RowTemplate(context.getDatabaseClient());
+
+		try {
+			Map<String, String> existingHashes = rowTemplate.query(op ->
+					op.fromView(getConfig().getSchemaName(), getConfig().getViewName())
+						.where(op.in(op.col("uri"), op.xs.stringSeq(uris))),
+
+				rows -> {
+					Map<String, String> map = new HashMap<>();
+					rows.forEach(row -> {
+						String uri = row.getString("uri");
+						String existingHash = row.getString("hash");
+						map.put(uri, existingHash);
+					});
+					return map;
+				}
+			);
+
+			return filterDocuments(context, uri -> existingHashes.get(uri));
+		} catch (FailedRequestException e) {
+			String message = "Unable to query for existing incremental write hashes from view " + getConfig().getSchemaName() + "." + getConfig().getViewName() + "; cause: " + e.getMessage();
+			throw new FailedRequestException(message, e.getFailedRequest());
+		}
+	}
+}
diff --git a/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/IncrementalWriteTest.java b/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/IncrementalWriteTest.java
diff --git a/test-app/src/main/ml-schemas/tde/incrementalWriteHash.json b/test-app/src/main/ml-schemas/tde/incrementalWriteHash.json