Skip to content

Commit b28ceff

Browse files
sjuddConvex, Inc.
authored and
Convex, Inc.
committed
Extract generic types for needs_backfill in flusher (#25417)
GitOrigin-RevId: 9f426ae02f5596d4c092d4270c999bd737c4fd6d
1 parent 6091975 commit b28ceff

File tree

6 files changed

+265
-71
lines changed

6 files changed

+265
-71
lines changed

crates/common/src/bootstrap_model/index/search_index/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pub use self::{
88
SerializedDeveloperSearchIndexConfig,
99
},
1010
index_snapshot::{
11+
FragmentedSearchSegment,
1112
SearchIndexSnapshot,
1213
SearchIndexSnapshotData,
1314
SearchSnapshotVersion,
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
use std::collections::BTreeMap;
2+
3+
use common::{
4+
bootstrap_model::index::{
5+
search_index::{
6+
DeveloperSearchIndexConfig,
7+
FragmentedSearchSegment,
8+
SearchIndexSnapshot,
9+
SearchIndexSnapshotData,
10+
SearchIndexState,
11+
},
12+
vector_index::{
13+
DeveloperVectorIndexConfig,
14+
FragmentedVectorSegment,
15+
VectorIndexBackfillState,
16+
VectorIndexSnapshot,
17+
VectorIndexSnapshotData,
18+
VectorIndexState,
19+
},
20+
IndexConfig,
21+
},
22+
types::IndexId,
23+
};
24+
use sync_types::Timestamp;
25+
use value::InternalId;
26+
27+
use crate::Snapshot;
28+
29+
pub trait SearchIndexConfigParser {
30+
type IndexType: SearchIndex;
31+
32+
/// Returns the generalized `SearchIndexConfig` if it matches the type of
33+
/// the parser (e.g. Text vs Vector) and `None` otherwise.
34+
fn get_config(config: IndexConfig) -> Option<SearchIndexConfig<Self::IndexType>>;
35+
}
36+
37+
pub struct VectorIndexConfigParser;
38+
39+
impl SearchIndexConfigParser for VectorIndexConfigParser {
40+
type IndexType = VectorSearchIndex;
41+
42+
fn get_config(config: IndexConfig) -> Option<SearchIndexConfig<Self::IndexType>> {
43+
let IndexConfig::Vector {
44+
on_disk_state,
45+
developer_config,
46+
} = config
47+
else {
48+
return None;
49+
};
50+
Some(SearchIndexConfig {
51+
developer_config,
52+
on_disk_state: match on_disk_state {
53+
VectorIndexState::Backfilling(backfill_state) => {
54+
SearchOnDiskState::Backfilling(backfill_state.into())
55+
},
56+
VectorIndexState::Backfilled(snapshot) => {
57+
SearchOnDiskState::Backfilled(snapshot.into())
58+
},
59+
VectorIndexState::SnapshottedAt(snapshot) => {
60+
SearchOnDiskState::SnapshottedAt(snapshot.into())
61+
},
62+
},
63+
})
64+
}
65+
}
66+
67+
pub struct TextIndexConfigParser;
68+
69+
impl SearchIndexConfigParser for TextIndexConfigParser {
70+
type IndexType = TextSearchIndex;
71+
72+
fn get_config(config: IndexConfig) -> Option<SearchIndexConfig<Self::IndexType>> {
73+
let IndexConfig::Search {
74+
on_disk_state,
75+
developer_config,
76+
} = config
77+
else {
78+
return None;
79+
};
80+
Some(SearchIndexConfig {
81+
developer_config,
82+
on_disk_state: match on_disk_state {
83+
SearchIndexState::Backfilling => {
84+
// TODO(sam): Add support for a backfilling partial state to text search
85+
SearchOnDiskState::Backfilling(BackfillState {
86+
segments: vec![],
87+
cursor: None,
88+
backfill_snapshot_ts: None,
89+
})
90+
},
91+
SearchIndexState::Backfilled(snapshot) => {
92+
SearchOnDiskState::Backfilled(snapshot.into())
93+
},
94+
SearchIndexState::SnapshottedAt(snapshot) => {
95+
SearchOnDiskState::SnapshottedAt(snapshot.into())
96+
},
97+
},
98+
})
99+
}
100+
}
101+
102+
pub trait SearchIndex {
103+
type DeveloperConfig;
104+
type SnapshotData;
105+
type Segment;
106+
107+
fn get_index_sizes(snapshot: Snapshot) -> anyhow::Result<BTreeMap<IndexId, usize>>;
108+
109+
fn is_version_current(data: &SearchSnapshot<Self>) -> bool
110+
where
111+
Self: Sized;
112+
}
113+
114+
pub struct TextSearchIndex;
115+
impl SearchIndex for TextSearchIndex {
116+
type DeveloperConfig = DeveloperSearchIndexConfig;
117+
type Segment = FragmentedSearchSegment;
118+
type SnapshotData = SearchIndexSnapshotData;
119+
120+
fn get_index_sizes(snapshot: Snapshot) -> anyhow::Result<BTreeMap<IndexId, usize>> {
121+
Ok(snapshot
122+
.search_indexes
123+
.backfilled_and_enabled_index_sizes()?
124+
.collect())
125+
}
126+
127+
fn is_version_current(snapshot: &SearchSnapshot<Self>) -> bool {
128+
// TODO(sam): This doesn't match the current persistence version based check,
129+
// but it's closer to what vector search does.
130+
matches!(snapshot.data, SearchIndexSnapshotData::SingleSegment(_))
131+
}
132+
}
133+
134+
pub struct VectorSearchIndex;
135+
136+
impl SearchIndex for VectorSearchIndex {
137+
type DeveloperConfig = DeveloperVectorIndexConfig;
138+
type Segment = FragmentedVectorSegment;
139+
type SnapshotData = VectorIndexSnapshotData;
140+
141+
fn get_index_sizes(snapshot: Snapshot) -> anyhow::Result<BTreeMap<IndexId, usize>> {
142+
Ok(snapshot
143+
.vector_indexes
144+
.backfilled_and_enabled_index_sizes()?
145+
.collect())
146+
}
147+
148+
fn is_version_current(snapshot: &SearchSnapshot<Self>) -> bool {
149+
snapshot.data.is_version_current()
150+
}
151+
}
152+
pub struct SearchIndexConfig<T: SearchIndex> {
153+
pub developer_config: T::DeveloperConfig,
154+
pub on_disk_state: SearchOnDiskState<T>,
155+
}
156+
157+
pub struct SearchSnapshot<T: SearchIndex> {
158+
pub ts: Timestamp,
159+
pub data: T::SnapshotData,
160+
}
161+
162+
pub struct BackfillState<T: SearchIndex> {
163+
pub segments: Vec<T::Segment>,
164+
pub cursor: Option<InternalId>,
165+
pub backfill_snapshot_ts: Option<Timestamp>,
166+
}
167+
168+
impl From<VectorIndexBackfillState> for BackfillState<VectorSearchIndex> {
169+
fn from(value: VectorIndexBackfillState) -> Self {
170+
Self {
171+
segments: value.segments,
172+
cursor: value.cursor,
173+
backfill_snapshot_ts: value.backfill_snapshot_ts,
174+
}
175+
}
176+
}
177+
178+
pub enum SearchOnDiskState<T: SearchIndex> {
179+
Backfilling(BackfillState<T>),
180+
Backfilled(SearchSnapshot<T>),
181+
SnapshottedAt(SearchSnapshot<T>),
182+
}
183+
184+
impl From<VectorIndexSnapshot> for SearchSnapshot<VectorSearchIndex> {
185+
fn from(snapshot: VectorIndexSnapshot) -> Self {
186+
Self {
187+
ts: snapshot.ts,
188+
data: snapshot.data,
189+
}
190+
}
191+
}
192+
193+
impl From<SearchIndexSnapshot> for SearchSnapshot<TextSearchIndex> {
194+
fn from(snapshot: SearchIndexSnapshot) -> Self {
195+
Self {
196+
ts: snapshot.ts,
197+
data: snapshot.data,
198+
}
199+
}
200+
}

crates/database/src/index_workers/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod fast_forward;
2+
pub mod index_meta;
23
pub mod retriable_worker;
34
pub mod search_worker;
45

0 commit comments

Comments
 (0)