@@ -281,40 +281,27 @@ fn get_search_results(
281
281
releases.release_time,
282
282
releases.rustdoc_status,
283
283
crates.github_stars,
284
- -- Get the total number of results, disregarding the limit
285
284
COUNT(*) OVER() as total
286
- FROM releases
287
- INNER JOIN crates on releases.crate_id = crates.id
288
- WHERE
289
- -- Only select the newest release by release time
290
- releases.id = (
291
- SELECT releases.id
285
+ FROM crates
286
+ INNER JOIN (
287
+ SELECT releases.id, releases.crate_id
288
+ FROM (
289
+ SELECT
290
+ releases.id,
291
+ releases.crate_id,
292
+ rank() OVER (PARTITION BY crate_id ORDER BY release_time DESC) as rank
292
293
FROM releases
293
- -- Filter unbuilt/failing builds and yanked releases
294
- WHERE
295
- releases.crate_id = crates.id
296
- AND releases.rustdoc_status
297
- AND NOT releases.yanked
298
- -- Only select releases/crates that pass our criteria:
299
- -- - Levenshtein distance between the name and query is acceptable
300
- -- - The query sandwiched between wildcards matches the crate's name
301
- -- - The query matches the release's description
302
- AND (
303
- -- Turn the levenshtein distance into a percentage using `distance / max(query.len(), crates.name.len())`
304
- -- this percentage is normalized and allows us to empirically compare the 'sameness' of different names
305
- ((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65
306
- OR crates.name ILIKE CONCAT('%', $1, '%')
307
- OR plainto_tsquery($1) @@ to_tsvector(releases.description)
308
- )
309
- ORDER BY releases.release_time DESC
310
- LIMIT 1
311
- )
294
+ WHERE releases.rustdoc_status AND NOT releases.yanked
295
+ ) AS releases
296
+ WHERE releases.rank = 1
297
+ ) AS latest_release ON latest_release.crate_id = crates.id
298
+ INNER JOIN releases ON latest_release.id = releases.id
299
+ WHERE
300
+ ((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65
301
+ OR crates.name ILIKE CONCAT('%', $1, '%')
302
+ OR plainto_tsquery($1) @@ to_tsvector(releases.description)
312
303
GROUP BY crates.id, releases.id
313
- -- Order by the levenshtein distance of the name, the text search ranking of the description
314
- -- and finally the number of downloads
315
304
ORDER BY
316
- -- Order the levenshtein matches by their literal distance, so that `fo` matches `foo` more closely than `fooo`,
317
- -- because their normalized distances will be the same
318
305
levenshtein(crates.name, $1) ASC,
319
306
crates.name ILIKE CONCAT('%', $1, '%'),
320
307
ts_rank_cd(to_tsvector(releases.description), plainto_tsquery($1), 32) DESC,
@@ -738,16 +725,17 @@ mod tests {
738
725
wrapper ( |env| {
739
726
let db = env. db ( ) ;
740
727
741
- let releases = [ "regex" , "reg3x" , " regex-", "regex-syntax" ] ;
728
+ let releases = [ "regex" , "regex-" , "regex-syntax" ] ;
742
729
for release in releases. iter ( ) {
743
730
db. fake_release ( ) . name ( release) . version ( "0.0.0" ) . create ( ) ?;
744
731
}
745
732
746
733
let near_matches = [ "Regex" , "rEgex" , "reGex" , "regEx" , "regeX" ] ;
747
734
748
735
for name in near_matches. iter ( ) {
749
- let ( num_results, mut results) = get_search_results ( & db. conn ( ) , * name, 1 , 100 ) ;
750
- assert_eq ! ( num_results, 4 ) ;
736
+ let ( num_results, mut results) =
737
+ dbg ! ( get_search_results( & db. conn( ) , * name, 1 , 100 ) ) ;
738
+ assert_eq ! ( num_results, 3 ) ;
751
739
752
740
for name in releases. iter ( ) {
753
741
assert_eq ! ( results. remove( 0 ) . name, * name) ;
0 commit comments