Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
*/
package com.marklogic.client.datamovement.filter;

import com.fasterxml.jackson.core.JsonPointer;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Utility class for applying content exclusions to documents before hash calculation.
* Supports removing specific paths from JSON and XML documents using JSON Pointer and XPath expressions.
*
* @since 8.1.0
*/
public class ContentExclusionUtil {

private static final Logger logger = LoggerFactory.getLogger(ContentExclusionUtil.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

/**
* Applies JSON Pointer exclusions to JSON content by removing the specified paths.
*
* @param uri the document URI (used for logging purposes)
* @param jsonContent the JSON content as a string
* @param jsonPointers array of RFC 6901 JSON Pointer expressions identifying properties to exclude
* @return the modified JSON content with specified paths removed
* @throws JsonProcessingException if the JSON content cannot be parsed or serialized
*/
public static String applyJsonExclusions(String uri, String jsonContent, String[] jsonPointers) throws JsonProcessingException {
if (jsonPointers == null || jsonPointers.length == 0) {
return jsonContent;
}

JsonNode rootNode = OBJECT_MAPPER.readTree(jsonContent);
for (String jsonPointer : jsonPointers) {
removeNodeAtPointer(uri, rootNode, jsonPointer);
}
return OBJECT_MAPPER.writeValueAsString(rootNode);
}

/**
* Removes a node at the specified JSON Pointer path from the given root node.
*
* @param uri the document URI (used for logging purposes)
* @param rootNode the root JSON node
* @param jsonPointer the JSON Pointer expression identifying the node to remove
*/
private static void removeNodeAtPointer(String uri, JsonNode rootNode, String jsonPointer) {
JsonPointer pointer = JsonPointer.compile(jsonPointer);
JsonNode targetNode = rootNode.at(pointer);

if (targetNode.isMissingNode()) {
logger.debug("JSONPointer '{}' does not exist in document {}, skipping", jsonPointer, uri);
return;
}

// Use Jackson's JsonPointer API to get parent and field name
JsonPointer parentPointer = pointer.head();
JsonNode parentNode = rootNode.at(parentPointer);

if (parentNode.isObject()) {
String fieldName = pointer.last().getMatchingProperty();
((ObjectNode) parentNode).remove(fieldName);
} else if (parentNode.isArray()) {
logger.warn("Array element exclusion not supported for JSONPointer '{}'. " +
"Consider excluding the entire array property instead.", jsonPointer);
}
}

// Future method for XML exclusions
// public static String applyXmlExclusions(String xmlContent, String[] xpaths) { ... }
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
*/
package com.marklogic.client.datamovement.filter;

Expand Down Expand Up @@ -31,8 +31,8 @@ class IncrementalWriteEvalFilter extends IncrementalWriteFilter {
""";

IncrementalWriteEvalFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer);
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public static class Builder {
private boolean canonicalizeJson = true;
private boolean useEvalQuery = false;
private Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
private String[] jsonExclusions;

/**
* @param keyName the name of the MarkLogic metadata key that will hold the hash value; defaults to "incrementalWriteHash".
Expand Down Expand Up @@ -93,28 +94,39 @@ public Builder onDocumentsSkipped(Consumer<DocumentWriteOperation[]> skippedDocu
return this;
}

/**
* @param jsonPointers JSON Pointer expressions (RFC 6901) identifying JSON properties to exclude from hash calculation.
* For example, "/metadata/timestamp" or "/user/lastModified".
*/
public Builder jsonExclusions(String... jsonPointers) {
this.jsonExclusions = jsonPointers;
return this;
}

public IncrementalWriteFilter build() {
if (useEvalQuery) {
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer);
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions);
}
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer);
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions);
}
}

protected final String hashKeyName;
private final String timestampKeyName;
private final boolean canonicalizeJson;
private final Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
private final String[] jsonExclusions;

// Hardcoding this for now, with a good general purpose hashing function.
// See https://xxhash.com for benchmarks.
private final LongHashFunction hashFunction = LongHashFunction.xx3();

public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer) {
public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions) {
this.hashKeyName = hashKeyName;
this.timestampKeyName = timestampKeyName;
this.canonicalizeJson = canonicalizeJson;
this.skippedDocumentsConsumer = skippedDocumentsConsumer;
this.jsonExclusions = jsonExclusions;
}

protected final DocumentWriteSet filterDocuments(Context context, Function<String, String> hashRetriever) {
Expand Down Expand Up @@ -165,6 +177,10 @@ private String serializeContent(DocumentWriteOperation doc) {
if (canonicalizeJson && (Format.JSON.equals(format) || isPossiblyJsonContent(content))) {
JsonCanonicalizer jc;
try {
if (jsonExclusions != null && jsonExclusions.length > 0) {
// TBD on error handling here, want to get XML supported first.
content = ContentExclusionUtil.applyJsonExclusions(doc.getUri(), content, jsonExclusions);
}
jc = new JsonCanonicalizer(content);
return jc.getEncodedString();
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
*/
package com.marklogic.client.datamovement.filter;

Expand All @@ -20,8 +20,8 @@
class IncrementalWriteOpticFilter extends IncrementalWriteFilter {

IncrementalWriteOpticFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer);
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,65 @@ void nullIsIgnoredForKeyNames() {
assertNotNull(metadata.getMetadataValues().get("incrementalWriteTimestamp"));
}

@Test
void jsonExclusions() {
filter = IncrementalWriteFilter.newBuilder()
.jsonExclusions("/timestamp", "/metadata/lastModified")
.onDocumentsSkipped(docs -> skippedCount.addAndGet(docs.length))
.build();

// Write initial documents with three keys
docs = new ArrayList<>();
for (int i = 1; i <= 5; i++) {
ObjectNode doc = objectMapper.createObjectNode();
doc.put("id", i);
doc.put("name", "Document " + i);
doc.put("timestamp", "2025-01-01T10:00:00Z");
doc.putObject("metadata")
.put("lastModified", "2025-01-01T10:00:00Z")
.put("author", "Test User");
docs.add(new DocumentWriteOperationImpl("/incremental/test/json-doc-" + i + ".json", METADATA, new JacksonHandle(doc)));
}

writeDocs(docs);
assertEquals(5, writtenCount.get());
assertEquals(0, skippedCount.get());

// Write again with different values for excluded fields - should be skipped
docs = new ArrayList<>();
for (int i = 1; i <= 5; i++) {
ObjectNode doc = objectMapper.createObjectNode();
doc.put("id", i);
doc.put("name", "Document " + i);
doc.put("timestamp", "2026-01-02T15:30:00Z"); // Changed
doc.putObject("metadata")
.put("lastModified", "2026-01-02T15:30:00Z") // Changed
.put("author", "Test User");
docs.add(new DocumentWriteOperationImpl("/incremental/test/json-doc-" + i + ".json", METADATA, new JacksonHandle(doc)));
}

writeDocs(docs);
assertEquals(5, writtenCount.get(), "Documents should be skipped since only excluded fields changed");
assertEquals(5, skippedCount.get());

// Write again with actual content change - should NOT be skipped
docs = new ArrayList<>();
for (int i = 1; i <= 5; i++) {
ObjectNode doc = objectMapper.createObjectNode();
doc.put("id", i);
doc.put("name", "Modified Document " + i); // Changed
doc.put("timestamp", "2026-01-02T16:00:00Z");
doc.putObject("metadata")
.put("lastModified", "2026-01-02T16:00:00Z")
.put("author", "Test User");
docs.add(new DocumentWriteOperationImpl("/incremental/test/json-doc-" + i + ".json", METADATA, new JacksonHandle(doc)));
}

writeDocs(docs);
assertEquals(10, writtenCount.get(), "Documents should be written since non-excluded content changed");
assertEquals(5, skippedCount.get(), "Skip count should remain at 5");
}

private void verifyIncrementalWriteWorks() {
writeTenDocuments();
verifyDocumentsHasHashInMetadataKey();
Expand Down