FoundationDB · jjezra · Feb 12, 2025 · Dec 18, 2024 · Dec 23, 2024 · Jan 3, 2025
diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md
@@ -33,7 +33,7 @@ Users performing online updates are encouraged to update from [4.0.559.4](#40559
 * **Feature** Add enum column support to relational server [(Issue #3073)](https://github.com/FoundationDB/fdb-record-layer/issues/3073)
 * **Feature** Feature 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
 * **Feature** Feature 4 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
-* **Feature** Feature 5 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
+* **Feature** Support Lucene index scrubbing [(Issue #3008)](https://github.com/FoundationDB/fdb-record-layer/issues/3008)
 * **Breaking change** Change 1 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
 * **Breaking change** Change 2 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
 * **Breaking change** Change 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)

diff --git a/...record-layer-core/src/main/java/com/apple/foundationdb/record/logging/LogMessageKeys.java b/...record-layer-core/src/main/java/com/apple/foundationdb/record/logging/LogMessageKeys.java
@@ -126,6 +126,7 @@ public enum LogMessageKeys {
     PRIMARY_INDEX,
     VALUE_KEY,
     PRIMARY_KEY,
+    GROUPING_KEY,
     VALUE,
     INDEX_OPERATION("operation"),
     INITIAL_PREFIX,

diff --git a/...ore/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexScrubbing.java b/...ore/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexScrubbing.java
@@ -142,7 +142,6 @@ private CompletableFuture<Boolean> indexScrubRangeOnly(@Nonnull FDBRecordStore s
             throw new UnsupportedOperationException("This index does not support scrubbing type " + scrubbingType);
         }
 
-
         return indexScrubRangeOnly(store, recordsScanned, index, tools, maintainer.isIdempotent());
     }
 

diff --git a/...le/foundationdb/record/provider/foundationdb/indexes/ValueIndexScrubbingToolsMissing.java b/...le/foundationdb/record/provider/foundationdb/indexes/ValueIndexScrubbingToolsMissing.java
@@ -130,7 +130,7 @@ private CompletableFuture<List<Tuple>> getMissingIndexKeys(FDBRecordStore store,
     }
 
     @Nonnull
-    private RecordCursor<IndexEntry> indexEntriesForRecord(@Nonnull FDBRecordStore store, @Nonnull FDBStoredRecord<Message> rec) {
+    protected RecordCursor<IndexEntry> indexEntriesForRecord(@Nonnull FDBRecordStore store, @Nonnull FDBStoredRecord<Message> rec) {
         final IndexMaintainer maintainer = store.getIndexMaintainer(index);
         if (isSynthetic) {
             final RecordQueryPlanner queryPlanner =

diff --git a/...ayer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java b/...ayer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java
@@ -58,6 +58,7 @@
 import com.apple.foundationdb.record.provider.foundationdb.IndexOperation;
 import com.apple.foundationdb.record.provider.foundationdb.IndexOperationResult;
 import com.apple.foundationdb.record.provider.foundationdb.IndexScanBounds;
+import com.apple.foundationdb.record.provider.foundationdb.IndexScrubbingTools;
 import com.apple.foundationdb.record.provider.foundationdb.indexes.InvalidIndexEntry;
 import com.apple.foundationdb.record.provider.foundationdb.indexes.StandardIndexMaintainer;
 import com.apple.foundationdb.record.query.QueryToKeyMatcher;
@@ -111,6 +112,7 @@
 public class LuceneIndexMaintainer extends StandardIndexMaintainer {
     private static final Logger LOG = LoggerFactory.getLogger(LuceneIndexMaintainer.class);
 
+    @Nonnull
     private final FDBDirectoryManager directoryManager;
     private final LuceneAnalyzerCombinationProvider indexAnalyzerSelector;
     private final LuceneAnalyzerCombinationProvider autoCompleteAnalyzerSelector;
@@ -750,4 +752,20 @@ private void logSerializationError(String format, Object ... arguments) {
             }
         }
     }
+
+    @Nullable
+    @Override
+    public IndexScrubbingTools<?> getIndexScrubbingTools(final IndexScrubbingTools.ScrubbingType type) {
+        switch (type) {
+            case MISSING:
+                final Map<String, String> options = state.index.getOptions();
+                if (Boolean.parseBoolean(options.get(LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_ENABLED)) ||
+                        Boolean.parseBoolean(options.get(LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_V2_ENABLED))) {
+                    return new LuceneIndexScrubbingToolsMissing(partitioner, directoryManager, indexAnalyzerSelector);
+                }
+                return null;
+            default:
+                return null;
+        }
+    }
 }
diff --git a/.../src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingToolsMissing.java b/.../src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingToolsMissing.java
@@ -0,0 +1,218 @@
+/*
+ * LuceneIndexScrubbingToolsMissing.java
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.apple.foundationdb.record.lucene;
+
+import com.apple.foundationdb.async.AsyncUtil;
+import com.apple.foundationdb.record.RecordCursor;
+import com.apple.foundationdb.record.RecordCursorResult;
+import com.apple.foundationdb.record.logging.KeyValueLogMessage;
+import com.apple.foundationdb.record.logging.LogMessageKeys;
+import com.apple.foundationdb.record.lucene.directory.FDBDirectoryManager;
+import com.apple.foundationdb.record.metadata.Index;
+import com.apple.foundationdb.record.metadata.RecordType;
+import com.apple.foundationdb.record.metadata.expressions.KeyExpression;
+import com.apple.foundationdb.record.provider.foundationdb.FDBIndexableRecord;
+import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
+import com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer;
+import com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord;
+import com.apple.foundationdb.record.provider.foundationdb.FDBSyntheticRecord;
+import com.apple.foundationdb.record.provider.foundationdb.indexes.ValueIndexScrubbingToolsMissing;
+import com.apple.foundationdb.record.query.plan.RecordQueryPlanner;
+import com.apple.foundationdb.record.query.plan.synthetic.SyntheticRecordFromStoredRecordPlan;
+import com.apple.foundationdb.record.query.plan.synthetic.SyntheticRecordPlanner;
+import com.apple.foundationdb.record.util.pair.Pair;
+import com.apple.foundationdb.tuple.Tuple;
+import com.google.protobuf.Message;
+import org.apache.lucene.index.DirectoryReader;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+
+/**
+ * Index Scrubbing Toolbox for a Lucene index maintainer. Scrub missing value index entries - i.e. detect record(s) that should
+ * have been indexed, but cannot be found in the segment index.
+ */
+public class LuceneIndexScrubbingToolsMissing extends ValueIndexScrubbingToolsMissing {
+    private Collection<RecordType> recordTypes = null;
+    private Index index;
+    private boolean isSynthetic;
+
+    @Nonnull
+    private final LucenePartitioner partitioner;
+    @Nonnull
+    private final FDBDirectoryManager directoryManager;
+    @Nonnull
+    private final LuceneAnalyzerCombinationProvider indexAnalyzerSelector;
+
+    public LuceneIndexScrubbingToolsMissing(@Nonnull LucenePartitioner partitioner, @Nonnull FDBDirectoryManager directoryManager,
+                                            @Nonnull LuceneAnalyzerCombinationProvider indexAnalyzerSelector) {
+        this.partitioner = partitioner;
+        this.directoryManager = directoryManager;
+        this.indexAnalyzerSelector = indexAnalyzerSelector;
+    }
+
+
+    @Override
+    public void presetCommonParams(Index index, boolean allowRepair, boolean isSynthetic, Collection<RecordType> types) {
+        this.recordTypes = types;
+        this.index = index;
+        this.isSynthetic = isSynthetic;
+        // call super, but force allowRepair as false
+        super.presetCommonParams(index, false, isSynthetic, types);
+    }
+
+    /**
+     * Provide a lucene specific reason for detecting a "missing" index entry.
+     */
+    public enum MissingIndexReason {
+        NOT_IN_PARTITION,
+        NOT_IN_PK_SEGMENT_INDEX,
+        EMPTY_RECORDS_FIELDS,
+    }
+
+    @Override
+    @Nullable
+    public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final RecordCursorResult<FDBStoredRecord<Message>> result) {
+        if (recordTypes == null || index == null) {
+            throw new IllegalStateException("presetParams was not called appropriately for this scrubbing tool");
+        }
+
+        final FDBStoredRecord<Message> rec = result.get();
+        if (rec == null || !recordTypes.contains(rec.getRecordType())) {
+            return CompletableFuture.completedFuture(null);
+        }
+
+        return detectMissingIndexKeys(store, rec)
+                .thenApply(missingIndexesKeys -> {
+                    if (missingIndexesKeys == null) {
+                        return null;
+                    }
+                    // Here: Oh, No! an index entry is missing!!
+                    // (Maybe) report an error
+                    return new Issue(
+                            KeyValueLogMessage.build("Scrubber: missing index entry",
+                                    LogMessageKeys.KEY, rec.getPrimaryKey(),
+                                    LogMessageKeys.GROUPING_KEY, missingIndexesKeys.getValue(),
+                                    LogMessageKeys.REASON, missingIndexesKeys.getKey()),
+                            FDBStoreTimer.Counts.INDEX_SCRUBBER_MISSING_ENTRIES,
+                            null);
+                });
+    }
+
+    @SuppressWarnings("PMD.CloseResource")
+    private CompletableFuture<Pair<MissingIndexReason, Tuple>> detectMissingIndexKeys(final FDBRecordStore store, FDBStoredRecord<Message> rec) {
+        // Generate synthetic record (if applicable) and return the first detected missing (if any).
+        final AtomicReference<Pair<MissingIndexReason, Tuple>> issue = new AtomicReference<>();
+
+        if (!isSynthetic) {
+            return checkMissingIndexKey(rec, issue).thenApply(ignore -> issue.get());
+        }
+        final RecordQueryPlanner queryPlanner =
+                new RecordQueryPlanner(store.getRecordMetaData(), store.getRecordStoreState().withWriteOnlyIndexes(Collections.singletonList(index.getName())));
+        final SyntheticRecordPlanner syntheticPlanner = new SyntheticRecordPlanner(store, queryPlanner);
+        SyntheticRecordFromStoredRecordPlan syntheticPlan = syntheticPlanner.forIndex(index);
+        final RecordCursor<FDBSyntheticRecord> recordCursor = syntheticPlan.execute(store, rec);
+
+        return AsyncUtil.whenAll(
+                recordCursor.asStream().map(syntheticRecord -> checkMissingIndexKey(syntheticRecord, issue))
+                        .collect(Collectors.toList()))
+                .whenComplete((ret, e) -> recordCursor.close())
+                .thenApply(ignore -> issue.get());
+
+    }
+
+    private CompletableFuture<Void> checkMissingIndexKey(FDBIndexableRecord<Message> rec,
+                                                         AtomicReference<Pair<MissingIndexReason, Tuple>> issue) {
+        // Iterate grouping keys (if any) and detect missing index entry (if any)
+        final KeyExpression root = index.getRootExpression();
+        final Map<Tuple, List<LuceneDocumentFromRecord.DocumentField>> recordFields = LuceneDocumentFromRecord.getRecordFields(root, rec);
+        if (recordFields.isEmpty()) {
+            // recordFields should not be an empty map
+            issue.compareAndSet(null, Pair.of(MissingIndexReason.EMPTY_RECORDS_FIELDS, null));
+            return AsyncUtil.DONE;
+        }
+        if (recordFields.size() == 1) {
+            // A single grouping key, simple check.
+            return checkMissingIndexKey(rec, recordFields.keySet().iterator().next(), issue);
+        }
+
+        // Here: more than one grouping key, declare an issue if at least one of them is missing
+        return AsyncUtil.whenAll( recordFields.keySet().stream().map(groupingKey ->
+                        checkMissingIndexKey(rec, groupingKey, issue)
+                ).collect(Collectors.toList()))
+                .thenApply(ignore -> null);
+    }
+
+    private CompletableFuture<Void> checkMissingIndexKey(FDBIndexableRecord<Message> rec, Tuple groupingKey, AtomicReference<Pair<MissingIndexReason, Tuple>> issue) {
+        // Get partition (if applicable) and detect missing index entry (if any)
+        if (!partitioner.isPartitioningEnabled()) {
+            if (isMissingIndexKey(rec, null, groupingKey)) {
+                issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, null));
+            }
+            return AsyncUtil.DONE;
+        }
+        return partitioner.tryGetPartitionInfo(rec, groupingKey).thenApply(partitionInfo -> {
+            if (partitionInfo == null) {
+                issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PARTITION, groupingKey));
+            } else if (isMissingIndexKey(rec, partitionInfo.getId(), groupingKey)) {
+                issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, groupingKey));
+            }
+            return null;
+        });
+    }
+
+    @SuppressWarnings("PMD.CloseResource")
+    private boolean isMissingIndexKey(FDBIndexableRecord<Message> rec, Integer partitionId, Tuple groupingKey) {
+        @Nullable final LucenePrimaryKeySegmentIndex segmentIndex = directoryManager.getDirectory(groupingKey, partitionId).getPrimaryKeySegmentIndex();
+        if (segmentIndex == null) {
+            // Here: internal error, getIndexScrubbingTools should have indicated that scrub missing is not supported.
+            throw new IllegalStateException("LucneIndexScrubbingToolsMissing without a LucenePrimaryKeySegmentIndex");
+        }
+
+        try {
+            // TODO: this is called to initialize the writer, else we get an exception at getDirectoryReader. Should it really be done for a RO operation?
+            directoryManager.getIndexWriter(groupingKey, partitionId, indexAnalyzerSelector.provideIndexAnalyzer(""));
+        } catch (IOException e) {
+            throw LuceneExceptions.toRecordCoreException("failed getIndexWriter", e);
+        }
+        try {
+            DirectoryReader directoryReader = directoryManager.getDirectoryReader(groupingKey, partitionId);
+            final LucenePrimaryKeySegmentIndex.DocumentIndexEntry documentIndexEntry = segmentIndex.findDocument(directoryReader, rec.getPrimaryKey());
+            if (documentIndexEntry == null) {
+                // Here: the document had not been found in the PK segment index
+                return true;
+            }
+        } catch (IOException ex) {
+            // Here: an unexpected exception. Unwrap and rethrow.
+            throw LuceneExceptions.toRecordCoreException("Error while finding document", ex);
+        }
+        return false;
+    }
+
+}
diff --git a/...ne/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexGetMetadataInfoTest.java b/...ne/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexGetMetadataInfoTest.java
@@ -24,7 +24,6 @@
 import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
 import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreTestBase;
 import com.apple.foundationdb.record.provider.foundationdb.IndexOperationResult;
-import com.apple.foundationdb.record.provider.foundationdb.OnlineIndexer;
 import com.apple.foundationdb.tuple.Tuple;
 import com.google.protobuf.ByteString;
 import org.hamcrest.Matchers;
@@ -37,7 +36,6 @@
 import javax.annotation.Nullable;
 import java.util.List;
 import java.util.Map;
-import java.util.Objects;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -102,7 +100,9 @@ void getMetadataPartitioned(boolean justPartitionInfo, boolean isGrouped) {
                 dataModel.saveRecords(10, context, i / 3);
                 commit(context);
             }
-            explicitMergeIndex(dataModel);
+            try (final FDBRecordContext context = openContext()) {
+                dataModel.explicitMergeIndex(context, timer);
+            }
         }
 
         final Set<Tuple> groupingKeys = isGrouped ? dataModel.groupingKeys() : Set.of(Tuple.from());
@@ -146,7 +146,9 @@ void getMetadataAfterDelete() {
                 dataModel.saveRecords(10, context, i / 3);
                 commit(context);
             }
-            explicitMergeIndex(dataModel);
+            try (final FDBRecordContext context = openContext()) {
+                dataModel.explicitMergeIndex(context, timer);
+            }
         }
 
         final Tuple groupingKey = Tuple.from();
@@ -225,17 +227,4 @@ private static void assertLessThan(final ByteString lesserOne, final ByteString
     private static int segmentCountToFileCount(final int segmentCount) {
         return segmentCount * 4 + 1;
     }
-
-    private void explicitMergeIndex(LuceneIndexTestDataModel dataModel) {
-        try (FDBRecordContext context = openContext()) {
-            FDBRecordStore recordStore = Objects.requireNonNull(dataModel.schemaSetup.apply(context));
-            try (OnlineIndexer indexBuilder = OnlineIndexer.newBuilder()
-                    .setRecordStore(recordStore)
-                    .setIndex(dataModel.index)
-                    .setTimer(timer)
-                    .build()) {
-                indexBuilder.mergeIndex();
-            }
-        }
-    }
 }
-Original file line number
+Diff line change
@@ Expand Up @@
                 throw new UnsupportedOperationException("This index does not support scrubbing type " + scrubbingType);
             }
             return indexScrubRangeOnly(store, recordsScanned, index, tools, maintainer.isIdempotent());
         }
@@ Expand Down @@