File tree 2 files changed +33
-4
lines changed
2 files changed +33
-4
lines changed Original file line number Diff line number Diff line change @@ -755,6 +755,26 @@ async fn test_filtering_match_updates(rt: TestRuntime) -> anyhow::Result<()> {
755
755
anyhow:: Ok ( ( ) )
756
756
}
757
757
758
+ #[ convex_macro:: test_runtime]
759
+ async fn test_bm25_stats_no_underflow ( rt : TestRuntime ) -> anyhow:: Result < ( ) > {
760
+ let mut scenario = Scenario :: new ( rt) . await ?;
761
+ scenario
762
+ . patch ( TestKey :: C , vec ! [ TestValue :: D ] , TestValue :: A )
763
+ . await ?;
764
+ scenario. execute ( TestAction :: Backfill ) . await ?;
765
+ scenario. execute ( TestAction :: Delete ( TestKey :: C ) ) . await ?;
766
+ // This query doens't use the filter field, so the BM25 stats will not include
767
+ // the filter field while the commit statistics will in the memory index from
768
+ // the delete.
769
+ scenario
770
+ . execute ( TestAction :: QueryAndCheckScores ( TestQuery {
771
+ search : vec ! [ TestValue :: D ] ,
772
+ filter : None ,
773
+ } ) )
774
+ . await ?;
775
+ anyhow:: Ok ( ( ) )
776
+ }
777
+
758
778
// Regression test: We had a bug where we were computing the index of a matching
759
779
// union term incorrectly.
760
780
//
Original file line number Diff line number Diff line change @@ -464,10 +464,19 @@ impl MemorySearchIndex {
464
464
. checked_add_signed ( commit_stats. total_docs_diff as i64 )
465
465
. context ( "num_documents underflow" ) ?;
466
466
for ( field, total_term_diff) in & commit_stats. total_term_diff_by_field {
467
- let term_diff = stats. num_terms_by_field . entry ( * field) . or_insert ( 0 ) ;
468
- * term_diff = term_diff
469
- . checked_add_signed ( * total_term_diff as i64 )
470
- . context ( "num_terms underflow" ) ?;
467
+ // It's possible some fields are present in the commit statistics but not the
468
+ // Bm25Stats because the Bm25Stats are query-specific, and the commit statistics
469
+ // are not. e.g. a filter field that isn't in the query might not appear in the
470
+ // Bm25 stats. We only need to update the fields that are in the Bm25Stats.
471
+ if let Some ( term_diff) = stats. num_terms_by_field . get_mut ( field) {
472
+ * term_diff = term_diff
473
+ . checked_add_signed ( * total_term_diff as i64 )
474
+ . context ( "num_terms underflow" ) ?;
475
+ } else if field == & Field :: from_field_id ( SEARCH_FIELD_ID ) {
476
+ stats
477
+ . num_terms_by_field
478
+ . insert ( * field, ( * total_term_diff as i64 ) . try_into ( ) ?) ;
479
+ }
471
480
}
472
481
for ( term, term_id) in & term_ids {
473
482
let Some ( & increment) = commit_stats. term_freq_diffs . get ( term_id) else {
You can’t perform that action at this time.
0 commit comments