Skip to content

Commit 9d67856

Browse files
sjuddConvex, Inc.
authored and
Convex, Inc.
committed
Implement upload_new_segment for text indexes (#26017)
GitOrigin-RevId: b16a4838854541bbc6ab0389cfad685d5fbb8217
1 parent 54b4523 commit 9d67856

File tree

8 files changed

+68
-16
lines changed

8 files changed

+68
-16
lines changed

crates/database/src/text_index_worker/text_meta.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use futures::{
3232
};
3333
use search::{
3434
build_new_segment,
35+
disk_index::upload_text_segment,
3536
PreviousTextSegments,
3637
TantivySearchIndexSchema,
3738
TextSegmentPaths,
@@ -166,11 +167,11 @@ impl SearchIndex for TextSearchIndex {
166167
}
167168

168169
async fn upload_new_segment<RT: Runtime>(
169-
_rt: &RT,
170-
_storage: Arc<dyn Storage>,
171-
_new_segment: Self::NewSegment,
170+
rt: &RT,
171+
storage: Arc<dyn Storage>,
172+
new_segment: Self::NewSegment,
172173
) -> anyhow::Result<Self::Segment> {
173-
anyhow::bail!("Not implemented")
174+
upload_text_segment(rt, storage, new_segment).await
174175
}
175176

176177
fn segment_id(segment: &Self::Segment) -> String {

crates/database/src/vector_index_worker/vector_meta.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use futures::{
3030
TryStreamExt,
3131
};
3232
use search::{
33-
disk_index::upload_segment,
33+
disk_index::upload_vector_segment,
3434
fragmented_segment::MutableFragmentedSegmentMetadata,
3535
};
3636
use storage::Storage;
@@ -162,7 +162,7 @@ impl SearchIndex for VectorSearchIndex {
162162
storage: Arc<dyn Storage>,
163163
new_segment: Self::NewSegment,
164164
) -> anyhow::Result<Self::Segment> {
165-
upload_segment(rt, storage, new_segment).await
165+
upload_vector_segment(rt, storage, new_segment).await
166166
}
167167

168168
fn segment_id(segment: &Self::Segment) -> String {

crates/search/src/archive/cache.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ fn is_immutable(search_file_type: SearchFileType) -> bool {
378378
// Text indexes do not appear to be read in readonly mode.
379379
SearchFileType::Text => false,
380380
SearchFileType::TextIdTracker => true,
381-
SearchFileType::TextDeletedBitset => true,
381+
SearchFileType::TextAliveBitset => true,
382382
SearchFileType::TextDeletedTerms => true,
383383
}
384384
}

crates/search/src/disk_index.rs

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ use common::{
2323
FuturesAsyncWriteCompatExt,
2424
TokioAsyncWriteCompatExt,
2525
},
26-
bootstrap_model::index::vector_index::FragmentedVectorSegment,
26+
bootstrap_model::index::{
27+
text_index::FragmentedTextSegment,
28+
vector_index::FragmentedVectorSegment,
29+
},
2730
runtime::Runtime,
2831
types::ObjectKey,
2932
};
@@ -66,6 +69,7 @@ use crate::{
6669
},
6770
SearchFileType,
6871
TantivySearchIndexSchema,
72+
TextSegmentPaths,
6973
};
7074

7175
static SEARCH_INDEXING_MEMORY_ARENA_BYTES: LazyLock<usize> =
@@ -162,7 +166,54 @@ pub async fn download_single_file_zip<P: AsRef<Path>>(
162166
Ok(())
163167
}
164168

165-
pub async fn upload_segment<RT: Runtime>(
169+
pub async fn upload_text_segment<RT: Runtime>(
170+
rt: &RT,
171+
storage: Arc<dyn Storage>,
172+
new_segment: TextSegmentPaths,
173+
) -> anyhow::Result<FragmentedTextSegment> {
174+
let TextSegmentPaths {
175+
index_path,
176+
id_tracker_path,
177+
alive_bit_set_path,
178+
deleted_terms_path,
179+
} = new_segment;
180+
let upload_index =
181+
upload_index_archive_from_path(index_path, storage.clone(), SearchFileType::Text);
182+
let upload_id_tracker = upload_single_file_from_path(
183+
id_tracker_path,
184+
storage.clone(),
185+
SearchFileType::TextIdTracker,
186+
);
187+
let upload_bitset = upload_single_file_from_path(
188+
alive_bit_set_path,
189+
storage.clone(),
190+
SearchFileType::TextAliveBitset,
191+
);
192+
let upload_deleted_terms = upload_single_file_from_path(
193+
deleted_terms_path,
194+
storage.clone(),
195+
SearchFileType::TextDeletedTerms,
196+
);
197+
let result = futures::try_join!(
198+
upload_index,
199+
upload_id_tracker,
200+
upload_bitset,
201+
upload_deleted_terms
202+
)?;
203+
let (segment_key, id_tracker_key, alive_bitset_key, deleted_terms_table_key) = result;
204+
Ok(FragmentedTextSegment {
205+
segment_key,
206+
id_tracker_key,
207+
deleted_terms_table_key,
208+
alive_bitset_key,
209+
// TODO(sam): Wrap TextSegmentPaths in another struct with some metadata and pass it through
210+
// here.
211+
num_indexed_documents: 0,
212+
id: rt.new_uuid_v4().to_string(),
213+
})
214+
}
215+
216+
pub async fn upload_vector_segment<RT: Runtime>(
166217
rt: &RT,
167218
storage: Arc<dyn Storage>,
168219
new_segment: VectorDiskSegmentValues,

crates/search/src/fragmented_segment.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ use crate::{
4646
archive::cache::ArchiveCacheManager,
4747
disk_index::{
4848
download_single_file_zip,
49-
upload_segment,
5049
upload_single_file,
50+
upload_vector_segment,
5151
},
5252
metrics::{
5353
log_compacted_segment_size_bytes,
@@ -242,7 +242,7 @@ impl<RT: Runtime> FragmentedSegmentCompactor<RT> {
242242
})
243243
.await??;
244244

245-
let result = upload_segment(&self.rt, search_storage, new_segment).await?;
245+
let result = upload_vector_segment(&self.rt, search_storage, new_segment).await?;
246246
// Ensure we own the temp dir through the entire upload
247247
drop(tmp_dir);
248248
tracing::debug!("Compacted {} segments", total_segments);

crates/search/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ pub enum SearchFileType {
892892
VectorIdTracker,
893893
Text,
894894
TextIdTracker,
895-
TextDeletedBitset,
895+
TextAliveBitset,
896896
TextDeletedTerms,
897897
}
898898

crates/search/src/metrics.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ impl SearchFileType {
426426
SearchFileType::VectorIdTracker => "vector_id_tracker",
427427
SearchFileType::Text => "text",
428428
SearchFileType::TextIdTracker => "text_id_tracker",
429-
SearchFileType::TextDeletedBitset => "text_deleted_bitset",
429+
SearchFileType::TextAliveBitset => "text_alive_bitset",
430430
SearchFileType::TextDeletedTerms => "text_deleted_terms",
431431
SearchFileType::FragmentedVectorSegment => "fragmented_vector_segment",
432432
};

crates/search/src/searcher/searcher.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ impl<RT: Runtime> Searcher for SearcherImpl<RT> {
373373
self.archive_cache.get(
374374
search_storage.clone(),
375375
&storage_keys.segment,
376-
SearchFileType::TextDeletedBitset
376+
SearchFileType::TextAliveBitset
377377
),
378378
self.archive_cache.get(
379379
search_storage.clone(),
@@ -409,7 +409,7 @@ impl<RT: Runtime> Searcher for SearcherImpl<RT> {
409409
self.archive_cache.get(
410410
search_storage.clone(),
411411
&storage_keys.segment,
412-
SearchFileType::TextDeletedBitset
412+
SearchFileType::TextAliveBitset
413413
),
414414
self.archive_cache.get(
415415
search_storage.clone(),
@@ -450,7 +450,7 @@ impl<RT: Runtime> Searcher for SearcherImpl<RT> {
450450
self.archive_cache.get(
451451
search_storage.clone(),
452452
&storage_keys.segment,
453-
SearchFileType::TextDeletedBitset
453+
SearchFileType::TextAliveBitset
454454
),
455455
self.archive_cache.get(
456456
search_storage.clone(),

0 commit comments

Comments
 (0)