Skip to content

Commit 4b17834

Browse files
committed
Implement Scott's requested changes
1 parent 0e049d9 commit 4b17834

File tree

8 files changed

+215
-69
lines changed

8 files changed

+215
-69
lines changed

docs/ReleaseNotes.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Our API stability annotations have been updated to reflect greater API instabili
3333
* **Feature** Feature 2 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
3434
* **Feature** Feature 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
3535
* **Feature** Feature 4 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
36-
* **Feature** Support Lucene index scrubbing [(Issue #3008)](https://github.com/FoundationDB/fdb-record-layer/issues/3008)
36+
* **Feature** Support Lucene index scrubbing of missing entries [(Issue #3008)](https://github.com/FoundationDB/fdb-record-layer/issues/3008)
3737
* **Breaking change** Change 1 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
3838
* **Breaking change** Change 2 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
3939
* **Breaking change** Change 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/indexes/ValueIndexScrubbingToolsMissing.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ private CompletableFuture<List<Tuple>> getMissingIndexKeys(FDBRecordStore store,
130130
}
131131

132132
@Nonnull
133-
private RecordCursor<IndexEntry> indexEntriesForRecord(@Nonnull FDBRecordStore store, @Nonnull FDBStoredRecord<Message> rec) {
133+
protected RecordCursor<IndexEntry> indexEntriesForRecord(@Nonnull FDBRecordStore store, @Nonnull FDBStoredRecord<Message> rec) {
134134
final IndexMaintainer maintainer = store.getIndexMaintainer(index);
135135
if (isSynthetic) {
136136
final RecordQueryPlanner queryPlanner =

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingToolsMissing.java

+64-57
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* This source file is part of the FoundationDB open source project
55
*
6-
* Copyright 2015-2024 Apple Inc. and the FoundationDB project authors
6+
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
77
*
88
* Licensed under the Apache License, Version 2.0 (the "License");
99
* you may not use this file except in compliance with the License.
@@ -21,22 +21,23 @@
2121
package com.apple.foundationdb.record.lucene;
2222

2323
import com.apple.foundationdb.async.AsyncUtil;
24-
import com.apple.foundationdb.record.ExecuteProperties;
25-
import com.apple.foundationdb.record.IsolationLevel;
2624
import com.apple.foundationdb.record.RecordCursor;
2725
import com.apple.foundationdb.record.RecordCursorResult;
28-
import com.apple.foundationdb.record.ScanProperties;
29-
import com.apple.foundationdb.record.TupleRange;
3026
import com.apple.foundationdb.record.logging.KeyValueLogMessage;
3127
import com.apple.foundationdb.record.logging.LogMessageKeys;
3228
import com.apple.foundationdb.record.lucene.directory.FDBDirectoryManager;
3329
import com.apple.foundationdb.record.metadata.Index;
3430
import com.apple.foundationdb.record.metadata.RecordType;
3531
import com.apple.foundationdb.record.metadata.expressions.KeyExpression;
32+
import com.apple.foundationdb.record.provider.foundationdb.FDBIndexableRecord;
3633
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
3734
import com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer;
3835
import com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord;
39-
import com.apple.foundationdb.record.provider.foundationdb.IndexScrubbingTools;
36+
import com.apple.foundationdb.record.provider.foundationdb.FDBSyntheticRecord;
37+
import com.apple.foundationdb.record.provider.foundationdb.indexes.ValueIndexScrubbingToolsMissing;
38+
import com.apple.foundationdb.record.query.plan.RecordQueryPlanner;
39+
import com.apple.foundationdb.record.query.plan.synthetic.SyntheticRecordFromStoredRecordPlan;
40+
import com.apple.foundationdb.record.query.plan.synthetic.SyntheticRecordPlanner;
4041
import com.apple.foundationdb.record.util.pair.Pair;
4142
import com.apple.foundationdb.tuple.Tuple;
4243
import com.google.protobuf.Message;
@@ -47,20 +48,20 @@
4748
import java.io.IOException;
4849
import java.util.Collection;
4950
import java.util.Collections;
50-
import java.util.HashMap;
5151
import java.util.List;
5252
import java.util.Map;
53-
import java.util.Optional;
5453
import java.util.concurrent.CompletableFuture;
54+
import java.util.concurrent.atomic.AtomicReference;
5555
import java.util.stream.Collectors;
5656

5757
/**
5858
* Index Scrubbing Toolbox for a Lucene index maintainer. Scrub missing value index entries - i.e. detect record(s) that should
59-
* cannot be found in the segment index.
59+
* have been indexed, but cannot be found in the segment index.
6060
*/
61-
public class LuceneIndexScrubbingToolsMissing implements IndexScrubbingTools<FDBStoredRecord<Message>> {
61+
public class LuceneIndexScrubbingToolsMissing extends ValueIndexScrubbingToolsMissing {
6262
private Collection<RecordType> recordTypes = null;
6363
private Index index;
64+
private boolean isSynthetic;
6465

6566
@Nonnull
6667
private final LucenePartitioner partitioner;
@@ -81,23 +82,9 @@ public LuceneIndexScrubbingToolsMissing(@Nonnull LucenePartitioner partitioner,
8182
public void presetCommonParams(Index index, boolean allowRepair, boolean isSynthetic, Collection<RecordType> types) {
8283
this.recordTypes = types;
8384
this.index = index;
84-
}
85-
86-
@Override
87-
public RecordCursor<FDBStoredRecord<Message>> getCursor(final TupleRange range, final FDBRecordStore store, final int limit) {
88-
final IsolationLevel isolationLevel = IsolationLevel.SNAPSHOT;
89-
final ExecuteProperties.Builder executeProperties = ExecuteProperties.newBuilder()
90-
.setIsolationLevel(isolationLevel)
91-
.setReturnedRowLimit(limit);
92-
93-
final ScanProperties scanProperties = new ScanProperties(executeProperties.build(), false);
94-
return store.scanRecords(range, null, scanProperties);
95-
}
96-
97-
@Override
98-
public Tuple getKeyFromCursorResult(final RecordCursorResult<FDBStoredRecord<Message>> result) {
99-
final FDBStoredRecord<Message> storedRecord = result.get();
100-
return storedRecord == null ? null : storedRecord.getPrimaryKey();
85+
this.isSynthetic = isSynthetic;
86+
// call super, but force allowRepair as false
87+
super.presetCommonParams(index, false, isSynthetic, types);
10188
}
10289

10390
/**
@@ -110,6 +97,7 @@ public enum MissingIndexReason {
11097
}
11198

11299
@Override
100+
@Nullable
113101
public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final RecordCursorResult<FDBStoredRecord<Message>> result) {
114102
if (recordTypes == null || index == null) {
115103
throw new IllegalStateException("presetParams was not called appropriately for this scrubbing tool");
@@ -120,12 +108,12 @@ public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final
120108
return CompletableFuture.completedFuture(null);
121109
}
122110

123-
return detectMissingIndexKeys(rec)
111+
return detectMissingIndexKeys(store, rec)
124112
.thenApply(missingIndexesKeys -> {
125113
if (missingIndexesKeys == null) {
126114
return null;
127115
}
128-
// Here: Oh, No! the index is missing!!
116+
// Here: Oh, No! an index entry is missing!!
129117
// (Maybe) report an error
130118
return new Issue(
131119
KeyValueLogMessage.build("Scrubber: missing index entry",
@@ -137,59 +125,78 @@ public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final
137125
});
138126
}
139127

140-
public CompletableFuture<Pair<MissingIndexReason, Tuple>> detectMissingIndexKeys(FDBStoredRecord<Message> rec) {
141-
// return the first missing (if any).
128+
@SuppressWarnings("PMD.CloseResource")
129+
private CompletableFuture<Pair<MissingIndexReason, Tuple>> detectMissingIndexKeys(final FDBRecordStore store, FDBStoredRecord<Message> rec) {
130+
// Generate synthetic record (if applicable) and return the first detected missing (if any).
131+
final AtomicReference<Pair<MissingIndexReason, Tuple>> issue = new AtomicReference<>();
132+
133+
if (!isSynthetic) {
134+
return checkMissingIndexKey(rec, issue).thenApply(ignore -> issue.get());
135+
}
136+
final RecordQueryPlanner queryPlanner =
137+
new RecordQueryPlanner(store.getRecordMetaData(), store.getRecordStoreState().withWriteOnlyIndexes(Collections.singletonList(index.getName())));
138+
final SyntheticRecordPlanner syntheticPlanner = new SyntheticRecordPlanner(store, queryPlanner);
139+
SyntheticRecordFromStoredRecordPlan syntheticPlan = syntheticPlanner.forIndex(index);
140+
final RecordCursor<FDBSyntheticRecord> recordCursor = syntheticPlan.execute(store, rec);
141+
142+
return AsyncUtil.whenAll(
143+
recordCursor.asStream().map(syntheticRecord -> checkMissingIndexKey(syntheticRecord, issue))
144+
.collect(Collectors.toList()))
145+
.whenComplete((ret, e) -> recordCursor.close())
146+
.thenApply(ignore -> issue.get());
147+
148+
}
149+
150+
private CompletableFuture<Void> checkMissingIndexKey(FDBIndexableRecord<Message> rec,
151+
AtomicReference<Pair<MissingIndexReason, Tuple>> issue) {
152+
// Iterate grouping keys (if any) and detect missing index entry (if any)
142153
final KeyExpression root = index.getRootExpression();
143154
final Map<Tuple, List<LuceneDocumentFromRecord.DocumentField>> recordFields = LuceneDocumentFromRecord.getRecordFields(root, rec);
144155
if (recordFields.isEmpty()) {
145-
// Could recordFields be an empty map?
146-
return CompletableFuture.completedFuture(Pair.of(MissingIndexReason.EMPTY_RECORDS_FIELDS, null));
156+
// recordFields should not be an empty map
157+
issue.compareAndSet(null, Pair.of(MissingIndexReason.EMPTY_RECORDS_FIELDS, null));
158+
return AsyncUtil.DONE;
147159
}
148160
if (recordFields.size() == 1) {
149-
// A single grouping key
150-
return checkMissingIndexKey(rec, recordFields.keySet().stream().findFirst().get());
161+
// A single grouping key, simple check.
162+
return checkMissingIndexKey(rec, recordFields.keySet().stream().iterator().next(), issue);
151163
}
152164

153-
// Here: more than one grouping key
154-
final Map<Tuple, MissingIndexReason> keys = Collections.synchronizedMap(new HashMap<>());
165+
// Here: more than one grouping key, declare an issue if at least one of them is missing
155166
return AsyncUtil.whenAll( recordFields.keySet().stream().map(groupingKey ->
156-
checkMissingIndexKey(rec, groupingKey)
157-
.thenApply(missing -> keys.put(missing.getValue(), missing.getKey()))
167+
checkMissingIndexKey(rec, groupingKey, issue)
158168
).collect(Collectors.toList()))
159-
.thenApply(ignore -> {
160-
final Optional<Map.Entry<Tuple, MissingIndexReason>> first = keys.entrySet().stream().findFirst();
161-
return first.map(tupleStringEntry -> Pair.of(tupleStringEntry.getValue(), tupleStringEntry.getKey())).orElse(null);
162-
});
169+
.thenApply(ignore -> null);
163170
}
164171

165-
private CompletableFuture<Pair<MissingIndexReason, Tuple>> checkMissingIndexKey(FDBStoredRecord<Message> rec, Tuple groupingKey) {
172+
private CompletableFuture<Void> checkMissingIndexKey(FDBIndexableRecord<Message> rec, Tuple groupingKey, AtomicReference<Pair<MissingIndexReason, Tuple>> issue) {
173+
// Get partition (if applicable) and detect missing index entry (if any)
166174
if (!partitioner.isPartitioningEnabled()) {
167-
return CompletableFuture.completedFuture(
168-
isMissingIndexKey(rec, null, groupingKey) ?
169-
Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, null) :
170-
null);
175+
if (isMissingIndexKey(rec, null, groupingKey)) {
176+
issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, null));
177+
}
178+
return AsyncUtil.DONE;
171179
}
172180
return partitioner.tryGetPartitionInfo(rec, groupingKey).thenApply(partitionInfo -> {
173181
if (partitionInfo == null) {
174-
return Pair.of(MissingIndexReason.NOT_IN_PARTITION, groupingKey);
175-
}
176-
if (isMissingIndexKey(rec, partitionInfo.getId(), groupingKey)) {
177-
return Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, groupingKey);
182+
issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PARTITION, groupingKey));
183+
} else if (isMissingIndexKey(rec, partitionInfo.getId(), groupingKey)) {
184+
issue.compareAndSet(null, Pair.of(MissingIndexReason.NOT_IN_PK_SEGMENT_INDEX, groupingKey));
178185
}
179186
return null;
180187
});
181188
}
182189

183190
@SuppressWarnings("PMD.CloseResource")
184-
private boolean isMissingIndexKey(FDBStoredRecord<Message> rec, Integer partitionId, Tuple groupingKey) {
191+
private boolean isMissingIndexKey(FDBIndexableRecord<Message> rec, Integer partitionId, Tuple groupingKey) {
185192
@Nullable final LucenePrimaryKeySegmentIndex segmentIndex = directoryManager.getDirectory(groupingKey, partitionId).getPrimaryKeySegmentIndex();
186193
if (segmentIndex == null) {
187-
// Here: iternal error, getIndexScrubbingTools should have indicated that scrub missing is not supported.
188-
throw new IllegalStateException("This scrubber should not have been used");
194+
// Here: internal error, getIndexScrubbingTools should have indicated that scrub missing is not supported.
195+
throw new IllegalStateException("LucneIndexScrubbingToolsMissing without a LucenePrimaryKeySegmentIndex");
189196
}
190197

191198
try {
192-
// TODO: this is called to initilize the writer, else we get an exception at getDirectoryReader. Should it really be done for a RO operation?
199+
// TODO: this is called to initialize the writer, else we get an exception at getDirectoryReader. Should it really be done for a RO operation?
193200
directoryManager.getIndexWriter(groupingKey, partitionId, indexAnalyzerSelector.provideIndexAnalyzer(""));
194201
} catch (IOException e) {
195202
throw LuceneExceptions.toRecordCoreException("failed getIndexWriter", e);
@@ -202,7 +209,7 @@ private boolean isMissingIndexKey(FDBStoredRecord<Message> rec, Integer partitio
202209
return true;
203210
}
204211
} catch (IOException ex) {
205-
// Here: probably an fdb exception. Unwrap and rethrow.
212+
// Here: an unexpected exception. Unwrap and rethrow.
206213
throw LuceneExceptions.toRecordCoreException("Error while finding document", ex);
207214
}
208215
return false;

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/codec/PrimaryKeyAndStoredFieldsWriter.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public void finishDocument() throws IOException {
5656
public void writeField(FieldInfo info, IndexableField field) throws IOException {
5757
super.writeField(info, field);
5858
try {
59-
if (LuceneIndexMaintainer.PRIMARY_KEY_FIELD_NAME.equals(info.name) && lucenePrimaryKeySegmentIndex != null) {
59+
if (LuceneIndexMaintainer.PRIMARY_KEY_FIELD_NAME.equals(info.name)) {
6060
final byte[] primaryKey = field.binaryValue().bytes;
6161
lucenePrimaryKeySegmentIndex.addOrDeletePrimaryKeyEntry(primaryKey, segmentId, documentId, true, info.name);
6262
// TODO we store this twice, but we'll probably want to optimize and only store this once

0 commit comments

Comments
 (0)