Skip to content

Commit 5254499

Browse files
authored
Resolve #3008: Support Lucene index scrubbing (#3009)
* Resolve #3008: Support Lucene index scrubbing To validate Lucene index validity, support "Report Only" scrubbing for: Dangling Lucene index entries: Iterate "all entries" (similar toLuceneScanAllEntriesTest), validate that all pointers lead to existing records. Missing Lucene index entries: iterate all records, validate that their primary keys are represented in the “primary key to Lucene segment” map, and that the Lucene segment exists * scrub missing * Add negative test (scrub index with missings entries) * elaborate shouldFailWithoutException * Suppress Warnings "PMD.CloseResource" * Convert pseodo exception to a flag * Update releases notes (after rebase..) * Temporarly remove LuceneIndexScrubbingToolsDangling (TBD in another PR) * Implement Scott's requested changes * Apply Scott's requested changes (Still missing - check index filter) * Adjust test and comments * tests: Reduce high water mark * Adjust dataModel.saveRecords * Add some variation in records dates + delete samples * Delete records before explicit merge * some cleanup * Apply Scott's requested changes
1 parent 11e4826 commit 5254499

File tree

11 files changed

+568
-20
lines changed

11 files changed

+568
-20
lines changed

docs/ReleaseNotes.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Users performing online updates are encouraged to update from [4.0.559.4](#40559
3333
* **Feature** Add enum column support to relational server [(Issue #3073)](https://github.com/FoundationDB/fdb-record-layer/issues/3073)
3434
* **Feature** Feature 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
3535
* **Feature** Feature 4 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
36-
* **Feature** Feature 5 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
36+
* **Feature** Support Lucene index scrubbing [(Issue #3008)](https://github.com/FoundationDB/fdb-record-layer/issues/3008)
3737
* **Breaking change** Change 1 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
3838
* **Breaking change** Change 2 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
3939
* **Breaking change** Change 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/logging/LogMessageKeys.java

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ public enum LogMessageKeys {
126126
PRIMARY_INDEX,
127127
VALUE_KEY,
128128
PRIMARY_KEY,
129+
GROUPING_KEY,
129130
VALUE,
130131
INDEX_OPERATION("operation"),
131132
INITIAL_PREFIX,

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexScrubbing.java

-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ private CompletableFuture<Boolean> indexScrubRangeOnly(@Nonnull FDBRecordStore s
142142
throw new UnsupportedOperationException("This index does not support scrubbing type " + scrubbingType);
143143
}
144144

145-
146145
return indexScrubRangeOnly(store, recordsScanned, index, tools, maintainer.isIdempotent());
147146
}
148147

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/indexes/ValueIndexScrubbingToolsMissing.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ private CompletableFuture<List<Tuple>> getMissingIndexKeys(FDBRecordStore store,
130130
}
131131

132132
@Nonnull
133-
private RecordCursor<IndexEntry> indexEntriesForRecord(@Nonnull FDBRecordStore store, @Nonnull FDBStoredRecord<Message> rec) {
133+
protected RecordCursor<IndexEntry> indexEntriesForRecord(@Nonnull FDBRecordStore store, @Nonnull FDBStoredRecord<Message> rec) {
134134
final IndexMaintainer maintainer = store.getIndexMaintainer(index);
135135
if (isSynthetic) {
136136
final RecordQueryPlanner queryPlanner =

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java

+18
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
import com.apple.foundationdb.record.provider.foundationdb.IndexOperation;
5959
import com.apple.foundationdb.record.provider.foundationdb.IndexOperationResult;
6060
import com.apple.foundationdb.record.provider.foundationdb.IndexScanBounds;
61+
import com.apple.foundationdb.record.provider.foundationdb.IndexScrubbingTools;
6162
import com.apple.foundationdb.record.provider.foundationdb.indexes.InvalidIndexEntry;
6263
import com.apple.foundationdb.record.provider.foundationdb.indexes.StandardIndexMaintainer;
6364
import com.apple.foundationdb.record.query.QueryToKeyMatcher;
@@ -111,6 +112,7 @@
111112
public class LuceneIndexMaintainer extends StandardIndexMaintainer {
112113
private static final Logger LOG = LoggerFactory.getLogger(LuceneIndexMaintainer.class);
113114

115+
@Nonnull
114116
private final FDBDirectoryManager directoryManager;
115117
private final LuceneAnalyzerCombinationProvider indexAnalyzerSelector;
116118
private final LuceneAnalyzerCombinationProvider autoCompleteAnalyzerSelector;
@@ -750,4 +752,20 @@ private void logSerializationError(String format, Object ... arguments) {
750752
}
751753
}
752754
}
755+
756+
@Nullable
757+
@Override
758+
public IndexScrubbingTools<?> getIndexScrubbingTools(final IndexScrubbingTools.ScrubbingType type) {
759+
switch (type) {
760+
case MISSING:
761+
final Map<String, String> options = state.index.getOptions();
762+
if (Boolean.parseBoolean(options.get(LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_ENABLED)) ||
763+
Boolean.parseBoolean(options.get(LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_V2_ENABLED))) {
764+
return new LuceneIndexScrubbingToolsMissing(partitioner, directoryManager, indexAnalyzerSelector);
765+
}
766+
return null;
767+
default:
768+
return null;
769+
}
770+
}
753771
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
/*
2+
* LuceneIndexScrubbingToolsMissing.java
3+
*
4+
* This source file is part of the FoundationDB open source project
5+
*
6+
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
package com.apple.foundationdb.record.lucene;
22+
23+
import com.apple.foundationdb.async.AsyncUtil;
24+
import com.apple.foundationdb.record.RecordCursor;
25+
import com.apple.foundationdb.record.RecordCursorResult;
26+
import com.apple.foundationdb.record.logging.KeyValueLogMessage;
27+
import com.apple.foundationdb.record.logging.LogMessageKeys;
28+
import com.apple.foundationdb.record.lucene.directory.FDBDirectoryManager;
29+
import com.apple.foundationdb.record.metadata.Index;
30+
import com.apple.foundationdb.record.metadata.RecordType;
31+
import com.apple.foundationdb.record.metadata.expressions.KeyExpression;
32+
import com.apple.foundationdb.record.provider.foundationdb.FDBIndexableRecord;
33+
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
34+
import com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer;
35+
import com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord;
36+
import com.apple.foundationdb.record.provider.foundationdb.FDBSyntheticRecord;
37+
import com.apple.foundationdb.record.provider.foundationdb.indexes.ValueIndexScrubbingToolsMissing;
38+
import com.apple.foundationdb.record.query.plan.RecordQueryPlanner;
39+
import com.apple.foundationdb.record.query.plan.synthetic.SyntheticRecordFromStoredRecordPlan;
40+
import com.apple.foundationdb.record.query.plan.synthetic.SyntheticRecordPlanner;
41+
import com.apple.foundationdb.record.util.pair.Pair;
42+
import com.apple.foundationdb.tuple.Tuple;
43+
import com.google.protobuf.Message;
44+
import org.apache.lucene.index.DirectoryReader;
45+
46+
import javax.annotation.Nonnull;
47+
import javax.annotation.Nullable;
48+
import java.io.IOException;
49+
import java.util.Collection;
50+
import java.util.Collections;
51+
import java.util.List;
52+
import java.util.Map;
53+
import java.util.concurrent.CompletableFuture;
54+
import java.util.concurrent.atomic.AtomicReference;
55+
import java.util.stream.Collectors;
56+
57+
/**
58+
* Index Scrubbing Toolbox for a Lucene index maintainer. Scrub missing value index entries - i.e. detect record(s) that should
59+
* have been indexed, but cannot be found in the segment index.
60+
*/
61+
public class LuceneIndexScrubbingToolsMissing extends ValueIndexScrubbingToolsMissing {
62+
private Collection<RecordType> recordTypes = null;
63+
private Index index;
64+
private boolean isSynthetic;
65+
66+
@Nonnull
67+
private final LucenePartitioner partitioner;
68+
@Nonnull
69+
private final FDBDirectoryManager directoryManager;
70+
@Nonnull
71+
private final LuceneAnalyzerCombinationProvider indexAnalyzerSelector;
72+
73+
public LuceneIndexScrubbingToolsMissing(@Nonnull LucenePartitioner partitioner, @Nonnull FDBDirectoryManager directoryManager,
74+
@Nonnull LuceneAnalyzerCombinationProvider indexAnalyzerSelector) {
75+
this.partitioner = partitioner;
76+
this.directoryManager = directoryManager;
77+
this.indexAnalyzerSelector = indexAnalyzerSelector;
78+
}
79+
80+
81+
@Override
82+
public void presetCommonParams(Index index, boolean allowRepair, boolean isSynthetic, Collection<RecordType> types) {
83+
this.recordTypes = types;
84+
this.index = index;
85+
this.isSynthetic = isSynthetic;
86+
// call super, but force allowRepair as false
87+
super.presetCommonParams(index, false, isSynthetic, types);
88+
}
89+
90+
/**
91+
* Provide a lucene specific reason for detecting a "missing" index entry.
92+
*/
93+
public enum MissingIndexReason {
94+
NOT_IN_PARTITION,
95+
NOT_IN_PK_SEGMENT_INDEX,
96+
EMPTY_RECORDS_FIELDS,
97+
}
98+
99+
@Override
100+
@Nullable
101+
public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final RecordCursorResult<FDBStoredRecord<Message>> result) {
102+
if (recordTypes == null || index == null) {
103+
throw new IllegalStateException("presetParams was not called appropriately for this scrubbing tool");
104+
}
105+
106+
final FDBStoredRecord<Message> rec = result.get();
107+
if (rec == null || !recordTypes.contains(rec.getRecordType())) {
108+
return CompletableFuture.completedFuture(null);
109+
}
110+
111+
return detectMissingIndexKeys(store, rec)
112+
.thenApply(missingIndexesKeys -> {
113+
if (missingIndexesKeys == null) {
114+
return null;
115+
}
116+
// Here: Oh, No! an index entry is missing!!
117+
// (Maybe) report an error
118+
return new Issue(
119+
KeyValueLogMessage.build("Scrubber: missing index entry",
120+
LogMessageKeys.KEY, rec.getPrimaryKey(),
121+
LogMessageKeys.GROUPING_KEY, missingIndexesKeys.getValue(),
122+
LogMessageKeys.REASON, missingIndexesKeys.getKey()),
123+
FDBStoreTimer.Counts.INDEX_SCRUBBER_MISSING_ENTRIES,
124+
null);
125+
});
126+
}
127+
128+
@SuppressWarnings("PMD.CloseResource")
129+
private CompletableFuture<Pair<MissingIndexReason, Tuple>> detectMissingIndexKeys(final FDBRecordStore store, FDBStoredRecord<Message> rec) {
130+
// Generate synthetic record (if applicable) and return the first detected missing (if any).
131+
final AtomicReference<Pair<MissingIndexReason, Tuple>> issue = new AtomicReference<>();
132+
133+
if (!isSynthetic) {
134+
return checkMissingIndexKey(rec, issue).thenApply(ignore -> issue.get());
135+
}
136+
final RecordQueryPlanner queryPlanner =
137+
new RecordQueryPlanner(store.getRecordMetaData(), store.getRecordStoreState().withWriteOnlyIndexes(Collections.singletonList(index.getName())));
138+
final SyntheticRecordPlanner syntheticPlanner = new SyntheticRecordPlanner(store, queryPlanner);
139+
SyntheticRecordFromStoredRecordPlan syntheticPlan = syntheticPlanner.forIndex(index);
140+
final RecordCursor<FDBSyntheticRecord> recordCursor = syntheticPlan.execute(store, rec);
141+
142+
return AsyncUtil.whenAll(
143+
recordCursor.asStream().map(syntheticRecord -> checkMissingIndexKey(syntheticRecord, issue))
144+
.collect(Collectors.toList()))
145+
.whenComplete((ret, e) -> recordCursor.close())
146+
.thenApply(ignore -> issue.get());
147+
148+
}
149+
150+
private CompletableFuture<Void> checkMissingIndexKey(FDBIndexableRecord<Message> rec,
151+
AtomicReference<Pair<MissingIndexReason, Tuple>> issue) {
152+
// Iterate grouping keys (if any) and detect missing index entry (if any)
153+
final KeyExpression root = index.getRootExpression();
154+
final Map<Tuple, List<LuceneDocumentFromRecord.DocumentField>> recordFields = LuceneDocumentFromRecord.getRecordFields(root, rec);
155+
if (recordFields.isEmpty()) {
156+
// recordFields should not be an empty map
157+
issue.compareAndSet(null, Pair.of(MissingIndexReason.EMPTY_RECORDS_FIELDS, null));
158+
return AsyncUtil.DONE;
159+
}
160+
if (recordFields.size() == 1) {
161+
// A single grouping key, simple check.
162+
return checkMissingIndexKey(rec, recordFields.keySet().iterator().next(), issue);
163+
}
164+
165+
// Here: more than one grouping key, declare an issue if at least one of them is missing
166+
return AsyncUtil.whenAll( recordFields.keySet().stream().map(groupingKey ->
167+
checkMissingIndexKey(rec, groupingKey, issue)
168+
).collect(Collectors.toList()))
169+
.thenApply(ignore -> null);
170+
}
171+
172+
private CompletableFuture<Void> checkMissingIndexKey(FDBIndexableRecord<Message> rec, Tuple groupingKey, AtomicReference<Pair<MissingIndexReason, Tuple>> issue) {
173+
// Get partition (if applicable) and detect missing index entry (if any)
174+
if (!partitioner.isPartitioningEnabled()) {
175+
if (isMissingIndexKey(rec, null, groupingKey)) {
176+
issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, null));
177+
}
178+
return AsyncUtil.DONE;
179+
}
180+
return partitioner.tryGetPartitionInfo(rec, groupingKey).thenApply(partitionInfo -> {
181+
if (partitionInfo == null) {
182+
issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PARTITION, groupingKey));
183+
} else if (isMissingIndexKey(rec, partitionInfo.getId(), groupingKey)) {
184+
issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, groupingKey));
185+
}
186+
return null;
187+
});
188+
}
189+
190+
@SuppressWarnings("PMD.CloseResource")
191+
private boolean isMissingIndexKey(FDBIndexableRecord<Message> rec, Integer partitionId, Tuple groupingKey) {
192+
@Nullable final LucenePrimaryKeySegmentIndex segmentIndex = directoryManager.getDirectory(groupingKey, partitionId).getPrimaryKeySegmentIndex();
193+
if (segmentIndex == null) {
194+
// Here: internal error, getIndexScrubbingTools should have indicated that scrub missing is not supported.
195+
throw new IllegalStateException("LucneIndexScrubbingToolsMissing without a LucenePrimaryKeySegmentIndex");
196+
}
197+
198+
try {
199+
// TODO: this is called to initialize the writer, else we get an exception at getDirectoryReader. Should it really be done for a RO operation?
200+
directoryManager.getIndexWriter(groupingKey, partitionId, indexAnalyzerSelector.provideIndexAnalyzer(""));
201+
} catch (IOException e) {
202+
throw LuceneExceptions.toRecordCoreException("failed getIndexWriter", e);
203+
}
204+
try {
205+
DirectoryReader directoryReader = directoryManager.getDirectoryReader(groupingKey, partitionId);
206+
final LucenePrimaryKeySegmentIndex.DocumentIndexEntry documentIndexEntry = segmentIndex.findDocument(directoryReader, rec.getPrimaryKey());
207+
if (documentIndexEntry == null) {
208+
// Here: the document had not been found in the PK segment index
209+
return true;
210+
}
211+
} catch (IOException ex) {
212+
// Here: an unexpected exception. Unwrap and rethrow.
213+
throw LuceneExceptions.toRecordCoreException("Error while finding document", ex);
214+
}
215+
return false;
216+
}
217+
218+
}

fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexGetMetadataInfoTest.java

+6-17
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
2525
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreTestBase;
2626
import com.apple.foundationdb.record.provider.foundationdb.IndexOperationResult;
27-
import com.apple.foundationdb.record.provider.foundationdb.OnlineIndexer;
2827
import com.apple.foundationdb.tuple.Tuple;
2928
import com.google.protobuf.ByteString;
3029
import org.hamcrest.Matchers;
@@ -37,7 +36,6 @@
3736
import javax.annotation.Nullable;
3837
import java.util.List;
3938
import java.util.Map;
40-
import java.util.Objects;
4139
import java.util.Set;
4240
import java.util.stream.Collectors;
4341
import java.util.stream.Stream;
@@ -102,7 +100,9 @@ void getMetadataPartitioned(boolean justPartitionInfo, boolean isGrouped) {
102100
dataModel.saveRecords(10, context, i / 3);
103101
commit(context);
104102
}
105-
explicitMergeIndex(dataModel);
103+
try (final FDBRecordContext context = openContext()) {
104+
dataModel.explicitMergeIndex(context, timer);
105+
}
106106
}
107107

108108
final Set<Tuple> groupingKeys = isGrouped ? dataModel.groupingKeys() : Set.of(Tuple.from());
@@ -146,7 +146,9 @@ void getMetadataAfterDelete() {
146146
dataModel.saveRecords(10, context, i / 3);
147147
commit(context);
148148
}
149-
explicitMergeIndex(dataModel);
149+
try (final FDBRecordContext context = openContext()) {
150+
dataModel.explicitMergeIndex(context, timer);
151+
}
150152
}
151153

152154
final Tuple groupingKey = Tuple.from();
@@ -225,17 +227,4 @@ private static void assertLessThan(final ByteString lesserOne, final ByteString
225227
private static int segmentCountToFileCount(final int segmentCount) {
226228
return segmentCount * 4 + 1;
227229
}
228-
229-
private void explicitMergeIndex(LuceneIndexTestDataModel dataModel) {
230-
try (FDBRecordContext context = openContext()) {
231-
FDBRecordStore recordStore = Objects.requireNonNull(dataModel.schemaSetup.apply(context));
232-
try (OnlineIndexer indexBuilder = OnlineIndexer.newBuilder()
233-
.setRecordStore(recordStore)
234-
.setIndex(dataModel.index)
235-
.setTimer(timer)
236-
.build()) {
237-
indexBuilder.mergeIndex();
238-
}
239-
}
240-
}
241230
}

0 commit comments

Comments
 (0)