Skip to content

Commit

Permalink
Indexer ES document id (#1028)
Browse files Browse the repository at this point in the history
* ES document id can be a random UUID

Signed-off-by: Mathieu Bret <[email protected]>

* Remove the random option.

Signed-off-by: Mathieu Bret <[email protected]>

* Apply getDocumentId to all AbstractIndexerBolt implementation.

Signed-off-by: Mathieu Bret <[email protected]>

* Rollback SOLR and SQL.

Signed-off-by: Mathieu Bret <[email protected]>

Signed-off-by: Mathieu Bret <[email protected]>
  • Loading branch information
Mikwiss authored Jan 10, 2023
1 parent a575df5 commit 9bc2be9
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,17 @@ protected Map<String, String[]> filterMetadata(Metadata meta) {
return fieldVals;
}

/**
* Get the document id.
*
* @param metadata The {@link Metadata}.
* @param normalisedUrl The normalised url.
* @return Return the normalised url SHA-256 digest as String.
*/
protected String getDocumentID(Metadata metadata, String normalisedUrl) {
return org.apache.commons.codec.digest.DigestUtils.sha256Hex(normalisedUrl);
}

/**
* Returns the value to be used as the URL for indexing purposes, if present the canonical value
* is used instead
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,11 @@ public void prepare(
client.setEndpoint(endpoint);
}

@Override
protected String getDocumentID(Metadata metadata, String normalisedUrl) {
return CloudSearchUtils.getID(normalisedUrl);
}

@Override
public void execute(Tuple tuple) {

Expand Down Expand Up @@ -204,7 +209,7 @@ public void execute(Tuple tuple) {
doc_builder.put("type", "add");

// generate the id from the normalised url
String ID = CloudSearchUtils.getID(normalisedurl);
String ID = getDocumentID(metadata, normalisedurl);
doc_builder.put("id", ID);

ObjectNode fields = objectMapper.createObjectNode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public void execute(Tuple tuple) {
return;
}

String docID = org.apache.commons.codec.digest.DigestUtils.sha256Hex(normalisedurl);
String docID = getDocumentID(metadata, normalisedurl);

try {
XContentBuilder builder = jsonBuilder().startObject();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ public void execute(Tuple tuple) {
return;
}

final String docID = org.apache.commons.codec.digest.DigestUtils.sha256Hex(normalisedurl);
final String docID = getDocumentID(metadata, normalisedurl);

try {
final XContentBuilder builder = jsonBuilder().startObject();
Expand Down

0 comments on commit 9bc2be9

Please sign in to comment.