@@ -198,3 +198,99 @@ message StorageType {
198
198
LocalStorage local = 2 ;
199
199
}
200
200
}
201
+
202
+ service IncrementalSearchlight {
203
+ // Query a set of tokens against the term dictionary, optionally allowing
204
+ // for fuzzy matching and prefix matching. Take the top `K` results with
205
+ // respect to to `(edit distance, term)` lexicographical order.
206
+ rpc QueryTokens (QueryTokensRequest ) returns (QueryTokensResponse );
207
+
208
+ // For the given index, compute the total number of documents and terms
209
+ // in the index. Also, given a list of pointers to terms within the index,
210
+ // compute the document frequency of each term.
211
+ rpc QueryBm25Stats (QueryBm25StatsRequest ) returns (QueryBm25StatsResponse );
212
+
213
+ // Given a AND + OR query of term pointers and BM25 statistics for the OR
214
+ // terms, return the top `K` results with respect to BM25 score.
215
+ rpc QueryPostingLists (QueryPostingListsRequest ) returns (QueryPostingListsResponse );
216
+ }
217
+
218
+ message QueryTokensRequest {
219
+ StorageType storage_type = 1 ;
220
+ FragmentedTextSegmentPaths segment = 2 ;
221
+ SearchIndexConfig index_config = 3 ;
222
+ repeated TokenQuery token_queries = 4 ;
223
+ uint32 max_results = 5 ;
224
+ }
225
+
226
+ message FragmentedTextSegmentPaths {
227
+ StorageKey segment = 1 ;
228
+ StorageKey id_tracker = 2 ;
229
+ StorageKey deletions = 3 ;
230
+ }
231
+
232
+ message TokenQuery {
233
+ convex_token.FieldPath field_path = 1 ;
234
+ bytes token = 2 ;
235
+ uint32 max_distance = 3 ;
236
+ bool prefix = 4 ;
237
+ }
238
+
239
+ message QueryTokensResponse {
240
+ repeated TokenMatch token_matches = 2 ;
241
+ }
242
+
243
+ message TokenMatch {
244
+ uint32 distance = 1 ;
245
+ bool prefix = 2 ;
246
+ bytes tantivy_bytes = 3 ;
247
+ // Offset into `QueryTokensRequest.token_queries`.
248
+ uint32 token_ord = 4 ;
249
+ }
250
+
251
+ message QueryBm25StatsRequest {
252
+ StorageType storage_type = 1 ;
253
+ FragmentedTextSegmentPaths segment = 2 ;
254
+ repeated bytes terms = 3 ;
255
+ }
256
+
257
+ message QueryBm25StatsResponse {
258
+ uint64 num_terms = 1 ;
259
+ uint64 num_documents = 2 ;
260
+ repeated DocFrequency doc_frequencies = 3 ;
261
+ }
262
+
263
+ message DocFrequency {
264
+ bytes term = 1 ;
265
+ uint64 frequency = 2 ;
266
+ }
267
+
268
+ message QueryPostingListsRequest {
269
+ StorageType storage_type = 1 ;
270
+ FragmentedTextSegmentPaths segment = 2 ;
271
+ PostingListQuery query = 3 ;
272
+ }
273
+
274
+ message PostingListQuery {
275
+ repeated bytes deleted_internal_ids = 1 ;
276
+
277
+ repeated bytes or_terms = 2 ;
278
+ repeated bytes and_terms = 3 ;
279
+
280
+ uint64 num_terms = 4 ;
281
+ uint64 num_documents = 5 ;
282
+ repeated DocFrequency doc_frequencies = 6 ;
283
+
284
+ uint32 max_results = 7 ;
285
+ }
286
+
287
+ message QueryPostingListsResponse {
288
+ repeated PostingListMatch matches = 1 ;
289
+ }
290
+
291
+ message PostingListMatch {
292
+ bytes internal_id = 1 ;
293
+ uint64 ts = 2 ;
294
+ double creation_time = 3 ;
295
+ float bm25_score = 4 ;
296
+ }
0 commit comments