@@ -44,7 +44,10 @@ use vector::{
44
44
QdrantSchema ,
45
45
} ;
46
46
47
- use crate :: Snapshot ;
47
+ use crate :: {
48
+ metrics:: vector:: log_documents_per_segment,
49
+ Snapshot ,
50
+ } ;
48
51
49
52
pub trait SearchIndexConfigParser {
50
53
type IndexType : SearchIndex ;
@@ -127,10 +130,11 @@ pub trait SearchIndex {
127
130
128
131
type PreviousSegments ;
129
132
133
+ type Statistics : SegmentStatistics ;
134
+
130
135
type Schema : Send + Sync ;
131
- // TODO(sam): Convert these to something more like segment statistics
132
- fn num_vectors ( segment : & Self :: Segment ) -> u32 ;
133
- fn non_deleted_vectors ( segment : & Self :: Segment ) -> anyhow:: Result < u64 > ;
136
+
137
+ fn statistics ( segment : & Self :: Segment ) -> anyhow:: Result < Self :: Statistics > ;
134
138
135
139
async fn upload_new_segment < RT : Runtime > (
136
140
rt : & RT ,
@@ -170,6 +174,11 @@ pub trait SearchIndex {
170
174
) -> anyhow:: Result < Vec < Self :: Segment > > ;
171
175
}
172
176
177
+ pub trait SegmentStatistics : Default {
178
+ fn add ( lhs : anyhow:: Result < Self > , rhs : anyhow:: Result < Self > ) -> anyhow:: Result < Self > ;
179
+ fn log ( & self ) ;
180
+ }
181
+
173
182
pub struct TextSearchIndex ;
174
183
#[ async_trait]
175
184
impl SearchIndex for TextSearchIndex {
@@ -178,6 +187,7 @@ impl SearchIndex for TextSearchIndex {
178
187
type PreviousSegments = ( ) ;
179
188
type Schema = ( ) ;
180
189
type Segment = FragmentedSearchSegment ;
190
+ type Statistics = TextStatistics ;
181
191
182
192
fn get_index_sizes ( snapshot : Snapshot ) -> anyhow:: Result < BTreeMap < IndexId , usize > > {
183
193
Ok ( snapshot
@@ -230,19 +240,26 @@ impl SearchIndex for TextSearchIndex {
230
240
anyhow:: bail!( "Not implemented" )
231
241
}
232
242
233
- fn num_vectors ( _segment : & Self :: Segment ) -> u32 {
234
- 0
235
- }
236
-
237
243
fn segment_id ( _segment : & Self :: Segment ) -> String {
238
244
"" . to_string ( )
239
245
}
240
246
241
- fn non_deleted_vectors ( _segment : & Self :: Segment ) -> anyhow:: Result < u64 > {
242
- anyhow :: bail! ( "Not implemented" )
247
+ fn statistics ( _segment : & Self :: Segment ) -> anyhow:: Result < Self :: Statistics > {
248
+ Ok ( TextStatistics )
243
249
}
244
250
}
245
251
252
+ #[ derive( Default ) ]
253
+ pub struct TextStatistics ;
254
+
255
+ impl SegmentStatistics for TextStatistics {
256
+ fn add ( _: anyhow:: Result < Self > , _: anyhow:: Result < Self > ) -> anyhow:: Result < Self > {
257
+ Ok ( Self )
258
+ }
259
+
260
+ fn log ( & self ) { }
261
+ }
262
+
246
263
#[ derive( Debug ) ]
247
264
pub struct VectorSearchIndex ;
248
265
@@ -253,6 +270,7 @@ impl SearchIndex for VectorSearchIndex {
253
270
type PreviousSegments = Vec < MutableFragmentedSegmentMetadata > ;
254
271
type Schema = QdrantSchema ;
255
272
type Segment = FragmentedVectorSegment ;
273
+ type Statistics = VectorStatistics ;
256
274
257
275
fn get_index_sizes ( snapshot : Snapshot ) -> anyhow:: Result < BTreeMap < IndexId , usize > > {
258
276
Ok ( snapshot
@@ -322,16 +340,16 @@ impl SearchIndex for VectorSearchIndex {
322
340
upload_segment ( rt, storage, new_segment) . await
323
341
}
324
342
325
- fn num_vectors ( segment : & Self :: Segment ) -> u32 {
326
- segment. num_vectors
327
- }
328
-
329
343
fn segment_id ( segment : & Self :: Segment ) -> String {
330
344
segment. id . clone ( )
331
345
}
332
346
333
- fn non_deleted_vectors ( segment : & Self :: Segment ) -> anyhow:: Result < u64 > {
334
- segment. non_deleted_vectors ( )
347
+ fn statistics ( segment : & Self :: Segment ) -> anyhow:: Result < Self :: Statistics > {
348
+ let non_deleted_vectors = segment. non_deleted_vectors ( ) ?;
349
+ Ok ( VectorStatistics {
350
+ non_deleted_vectors,
351
+ num_vectors : segment. num_vectors ,
352
+ } )
335
353
}
336
354
}
337
355
pub struct SearchIndexConfig < T : SearchIndex > {
@@ -350,6 +368,27 @@ pub struct BackfillState<T: SearchIndex> {
350
368
pub backfill_snapshot_ts : Option < Timestamp > ,
351
369
}
352
370
371
+ #[ derive( Debug , Default ) ]
372
+ pub struct VectorStatistics {
373
+ pub num_vectors : u32 ,
374
+ pub non_deleted_vectors : u64 ,
375
+ }
376
+
377
+ impl SegmentStatistics for VectorStatistics {
378
+ fn add ( lhs : anyhow:: Result < Self > , rhs : anyhow:: Result < Self > ) -> anyhow:: Result < Self > {
379
+ let rhs = rhs?;
380
+ let lhs = lhs?;
381
+ Ok ( Self {
382
+ num_vectors : lhs. num_vectors + rhs. num_vectors ,
383
+ non_deleted_vectors : lhs. non_deleted_vectors + rhs. non_deleted_vectors ,
384
+ } )
385
+ }
386
+
387
+ fn log ( & self ) {
388
+ log_documents_per_segment ( self . non_deleted_vectors ) ;
389
+ }
390
+ }
391
+
353
392
impl From < VectorIndexBackfillState > for BackfillState < VectorSearchIndex > {
354
393
fn from ( value : VectorIndexBackfillState ) -> Self {
355
394
Self {
0 commit comments