@@ -10,6 +10,7 @@ import 'package:collection/collection.dart';
10
10
import 'package:logging/logging.dart' ;
11
11
import 'package:meta/meta.dart' ;
12
12
import 'package:pub_dev/service/topics/models.dart' ;
13
+ import 'package:pub_dev/third_party/bit_array/bit_array.dart' ;
13
14
14
15
import '../shared/utils.dart' show boundedList;
15
16
import 'models.dart' ;
@@ -30,10 +31,9 @@ class InMemoryPackageIndex {
30
31
late final TokenIndex <IndexedApiDocPage > _apiSymbolIndex;
31
32
late final _scorePool = ScorePool (_packageNameIndex._packageNames);
32
33
33
- /// Maps the tag strings to a list of document index values
34
- /// (`PackageDocument doc.tags -> List<_documents.indexOf(doc)>` ).
35
- final _tagDocumentIndices = < String , List <int >> {};
36
- final _documentTagIds = < List <int >> [];
34
+ /// Maps the tag strings to a list of document index values using bit arrays.
35
+ /// - (`PackageDocument doc.tags -> BitArray(List<_documents.indexOf(doc)>)` ).
36
+ final _tagBitArrays = < String , BitArray > {};
37
37
38
38
/// Adjusted score takes the overall score and transforms
39
39
/// it linearly into the [0.4-1.0] range.
@@ -66,12 +66,11 @@ class InMemoryPackageIndex {
66
66
_nameToIndex[doc.package] = i;
67
67
68
68
// transform tags into numberical IDs
69
- final tagIds = < int > [];
70
69
for (final tag in doc.tags) {
71
- _tagDocumentIndices.putIfAbsent (tag, () => []).add (i);
70
+ _tagBitArrays
71
+ .putIfAbsent (tag, () => BitArray (_documents.length))
72
+ .setBit (i);
72
73
}
73
- tagIds.sort ();
74
- _documentTagIds.add (tagIds);
75
74
76
75
final apiDocPages = doc.apiDocPages;
77
76
if (apiDocPages != null ) {
@@ -138,66 +137,58 @@ class InMemoryPackageIndex {
138
137
139
138
PackageSearchResult search (ServiceSearchQuery query) {
140
139
// prevent any work if offset is outside of the range
141
- if ((query.offset ?? 0 ) > _documents.length) {
140
+ if ((query.offset ?? 0 ) >= _documents.length) {
142
141
return PackageSearchResult .empty ();
143
142
}
144
143
return _scorePool.withScore (
145
- value: 1 .0 ,
144
+ value: 0 .0 ,
146
145
fn: (score) {
147
146
return _search (query, score);
148
147
},
149
148
);
150
149
}
151
150
152
151
PackageSearchResult _search (
153
- ServiceSearchQuery query, IndexedScore <String > packageScores) {
154
- // filter on package prefix
155
- if (query.parsedQuery.packagePrefix != null ) {
156
- final String prefix = query.parsedQuery.packagePrefix! .toLowerCase ();
157
- packageScores.retainWhere (
158
- (i, _) => _documents[i].packageNameLowerCased.startsWith (prefix),
159
- );
160
- }
152
+ ServiceSearchQuery query,
153
+ IndexedScore <String > packageScores,
154
+ ) {
155
+ // TODO: implement pooling of this object similarly to [ScorePool].
156
+ final packages = BitArray (_documents.length)
157
+ ..setRange (0 , _documents.length);
161
158
162
159
// filter on tags
163
160
final combinedTagsPredicate =
164
161
query.tagsPredicate.appendPredicate (query.parsedQuery.tagsPredicate);
165
162
if (combinedTagsPredicate.isNotEmpty) {
166
163
for (final entry in combinedTagsPredicate.entries) {
167
- final docIndexes = _tagDocumentIndices[entry.key];
168
-
164
+ final tagBits = _tagBitArrays[entry.key];
169
165
if (entry.value) {
170
- // predicate is required, zeroing the gaps between index values
171
- if (docIndexes == null ) {
172
- // the predicate is required, no document will match it
166
+ if (tagBits == null ) {
167
+ // the predicate is not matched by any document
173
168
return PackageSearchResult .empty ();
174
169
}
175
-
176
- for (var i = 0 ; i < docIndexes.length; i++ ) {
177
- if (i == 0 ) {
178
- packageScores.fillRange (0 , docIndexes[i], 0.0 );
179
- continue ;
180
- }
181
- packageScores.fillRange (docIndexes[i - 1 ] + 1 , docIndexes[i], 0.0 );
182
- }
183
- packageScores.fillRange (docIndexes.last + 1 , _documents.length, 0.0 );
170
+ packages.and (tagBits);
184
171
} else {
185
- // predicate is prohibited, zeroing the values
186
-
187
- if (docIndexes == null ) {
188
- // the predicate is prohibited, no document has it, always a match
172
+ if (tagBits == null ) {
173
+ // negative predicate without index means all document is matched
189
174
continue ;
190
175
}
191
- for (final i in docIndexes) {
192
- packageScores.setValue (i, 0.0 );
193
- }
176
+ packages.andNot (tagBits);
194
177
}
195
178
}
196
179
}
197
180
181
+ // filter on package prefix
182
+ if (query.parsedQuery.packagePrefix != null ) {
183
+ final prefix = query.parsedQuery.packagePrefix! .toLowerCase ();
184
+ packages.clearWhere (
185
+ (i) => ! _documents[i].packageNameLowerCased.startsWith (prefix),
186
+ );
187
+ }
188
+
198
189
// filter on dependency
199
190
if (query.parsedQuery.hasAnyDependency) {
200
- packageScores. removeWhere ((i, _ ) {
191
+ packages. clearWhere ((i) {
201
192
final doc = _documents[i];
202
193
if (doc.dependencies.isEmpty) return true ;
203
194
for (final dependency in query.parsedQuery.allDependencies) {
@@ -213,22 +204,29 @@ class InMemoryPackageIndex {
213
204
214
205
// filter on points
215
206
if (query.minPoints != null && query.minPoints! > 0 ) {
216
- packageScores. removeWhere (
217
- (i, _ ) => _documents[i].grantedPoints < query.minPoints! );
207
+ packages
208
+ . clearWhere ((i ) => _documents[i].grantedPoints < query.minPoints! );
218
209
}
219
210
220
211
// filter on updatedDuration
221
212
final updatedDuration = query.parsedQuery.updatedDuration;
222
213
if (updatedDuration != null && updatedDuration > Duration .zero) {
223
214
final now = clock.now ();
224
- packageScores.removeWhere (
225
- (i, _) => now.difference (_documents[i].updated) > updatedDuration);
215
+ packages.clearWhere (
216
+ (i) => now.difference (_documents[i].updated) > updatedDuration);
217
+ }
218
+
219
+ // TODO: find a better way to handle predicate-only filtering and scoring
220
+ for (final index in packages.asIntIterable ()) {
221
+ if (index >= _documents.length) break ;
222
+ packageScores.setValue (index, 1.0 );
226
223
}
227
224
228
225
// do text matching
229
226
final parsedQueryText = query.parsedQuery.text;
230
227
final textResults = _searchText (
231
228
packageScores,
229
+ packages,
232
230
parsedQueryText,
233
231
textMatchExtent: query.textMatchExtent ?? TextMatchExtent .api,
234
232
);
@@ -362,6 +360,7 @@ class InMemoryPackageIndex {
362
360
363
361
_TextResults ? _searchText (
364
362
IndexedScore <String > packageScores,
363
+ BitArray packages,
365
364
String ? text, {
366
365
required TextMatchExtent textMatchExtent,
367
366
}) {
@@ -372,12 +371,14 @@ class InMemoryPackageIndex {
372
371
final sw = Stopwatch ()..start ();
373
372
final words = splitForQuery (text);
374
373
if (words.isEmpty) {
374
+ // packages.clearAll();
375
375
packageScores.fillRange (0 , packageScores.length, 0 );
376
376
return _TextResults .empty ();
377
377
}
378
378
379
379
final matchName = textMatchExtent.shouldMatchName ();
380
380
if (! matchName) {
381
+ // packages.clearAll();
381
382
packageScores.fillRange (0 , packageScores.length, 0 );
382
383
return _TextResults .empty (
383
384
errorMessage:
@@ -394,12 +395,6 @@ class InMemoryPackageIndex {
394
395
return aborted;
395
396
}
396
397
397
- // Multiple words are scored separately, and then the individual scores
398
- // are multiplied. We can use a package filter that is applied after each
399
- // word to reduce the scope of the later words based on the previous results.
400
- /// However, API docs search should be filtered on the original list.
401
- final indexedPositiveList = packageScores.toIndexedPositiveList ();
402
-
403
398
final matchDescription = textMatchExtent.shouldMatchDescription ();
404
399
final matchReadme = textMatchExtent.shouldMatchReadme ();
405
400
final matchApi = textMatchExtent.shouldMatchApi ();
@@ -435,7 +430,7 @@ class InMemoryPackageIndex {
435
430
if (value < 0.01 ) continue ;
436
431
437
432
final doc = symbolPages.keys[i];
438
- if (! indexedPositiveList [doc.index]) continue ;
433
+ if (! packages [doc.index]) continue ;
439
434
440
435
// skip if the previously found pages are better than the current one
441
436
final pages =
0 commit comments