@@ -272,57 +272,61 @@ fn get_search_results(
272
272
query = query. trim ( ) ;
273
273
let offset = ( page - 1 ) * limit;
274
274
275
- let statement =
276
- "SELECT crates.name,
277
- -- NOTE: this selects the latest alphanumeric version, which may not be the latest semver
278
- MAX(releases.version) AS version,
279
- MAX(releases.description) AS description,
280
- MAX(releases.target_name) AS target_name,
281
- MAX(releases.release_time) AS release_time,
282
- -- Cast the boolean into an integer and then cast it into a boolean.
283
- -- Posgres moves in mysterious ways, don't question it
284
- CAST(MAX(releases.rustdoc_status::integer) AS boolean) as rustdoc_status,
285
- crates.github_stars,
286
- crates.downloads_total as downloads,
287
-
288
- -- The levenshtein distance between the search query and the crate's name
289
- levenshtein_less_equal($1, crates.name, 3) as distance,
290
- -- The similarity of the tokens of the search vs the tokens of `crates.content`.
291
- -- The `32` normalizes the number by using `rank / (rank + 1)`
292
- ts_rank_cd(crates.content, to_tsquery($2), 32) as content_rank
293
- FROM releases INNER JOIN crates on releases.crate_id = crates.id
294
-
295
- -- Filter crates that haven't been built and crates that have been yanked
296
- WHERE releases.rustdoc_status = true
297
- AND releases.yanked = false
298
- AND (
299
- -- Crates names that match the query sandwiched between wildcards will pass
300
- crates.name ILIKE CONCAT('%', $1, '%')
301
- -- Crate names with which the levenshtein distance is closer or equal to 3 will pass
302
- OR levenshtein_less_equal($1, crates.name, 3) <= 3
303
- -- Crates where their content matches the query will pass
304
- OR plainto_tsquery($1) @@ crates.content
305
- )
306
-
307
- GROUP BY crates.id, releases.id
308
-
309
- -- Ordering is prioritized by how closely the query matches the name, how closely the
310
- -- query matches the description, and finally how many downloads the crate has
311
- -- NOTE: this means that exact matches will be shown first
312
- ORDER BY distance DESC,
313
- content_rank DESC,
314
- downloads_total DESC
315
-
316
- -- Allows pagination
317
- LIMIT $2 OFFSET $3" ;
275
+ let statement = "
276
+ SELECT
277
+ crates.name,
278
+ releases.version,
279
+ releases.description,
280
+ releases.target_name,
281
+ releases.release_time,
282
+ releases.rustdoc_status,
283
+ SUM(crates.github_stars),
284
+ COUNT(*) OVER() as total
285
+ FROM releases
286
+ INNER JOIN crates on releases.crate_id = crates.id
287
+ WHERE
288
+ -- Only select the newest release by release time
289
+ releases.id = (
290
+ SELECT releases.id
291
+ FROM releases
292
+ -- Filter unbuilt/failing builds and yanked releases
293
+ WHERE
294
+ releases.crate_id = crates.id
295
+ AND releases.rustdoc_status
296
+ AND NOT releases.yanked
297
+ ORDER BY releases.release_time DESC
298
+ LIMIT 1
299
+ )
300
+ -- Only select releases/crates that pass our criteria:
301
+ -- - Levenshtein distance if the name and query is greater than three
302
+ -- - The query sandwiched between wildcards matches the crate's name
303
+ -- - The query matches the release's description
304
+ AND (
305
+ levenshtein_less_equal($1, crates.name, 3) <= 3
306
+ OR crates.name ILIKE CONCAT('%', $1, '%')
307
+ OR plainto_tsquery($1) @@ to_tsvector(releases.description)
308
+ )
309
+ GROUP BY crates.id, releases.id
310
+ -- Order by the levenshtein distance of the name, the text search ranking of the description
311
+ -- and finally the number of downloads
312
+ ORDER BY
313
+ levenshtein_less_equal($1, crates.name, 3) ASC,
314
+ ts_rank_cd(to_tsvector(releases.description), plainto_tsquery($1), 32) DESC,
315
+ releases.downloads DESC
316
+ LIMIT $2 OFFSET $3" ;
318
317
319
318
let rows = if let Ok ( rows) = conn. query ( statement, & [ & query, & limit, & offset] ) {
320
319
rows
321
320
} else {
322
321
return ( 0 , Vec :: new ( ) ) ;
323
322
} ;
324
323
325
- let total_results = rows. iter ( ) . map ( |row| row. get :: < _ , i64 > ( 8 ) ) . sum ( ) ;
324
+ // Each row contains the total number of possible/valid results, just get it once
325
+ let total_results = rows
326
+ . iter ( )
327
+ . next ( )
328
+ . map ( |row| row. get :: < _ , i64 > ( 7 ) )
329
+ . unwrap_or_default ( ) ;
326
330
let packages: Vec < Release > = rows
327
331
. into_iter ( )
328
332
. map ( |row| Release {
@@ -332,7 +336,7 @@ fn get_search_results(
332
336
target_name : row. get ( 3 ) ,
333
337
release_time : row. get ( 4 ) ,
334
338
rustdoc_status : row. get ( 5 ) ,
335
- stars : row. get ( 6 ) ,
339
+ stars : row. get :: < _ , i64 > ( 6 ) as i32 ,
336
340
} )
337
341
. collect ( ) ;
338
342
@@ -706,10 +710,10 @@ mod tests {
706
710
. create ( ) ?;
707
711
708
712
let ( num_results, results) = get_search_results ( & db. conn ( ) , "foo" , 1 , 100 ) ;
709
- let mut results = results. into_iter ( ) ;
710
-
711
713
assert_eq ! ( num_results, 4 ) ;
712
714
715
+ let mut results = results. into_iter ( ) ;
716
+
713
717
let expected = [ "foo" , "fo0" , "bar-foo" , "foo-bar" ] ;
714
718
for expected in expected. iter ( ) {
715
719
assert_eq ! ( expected, & results. next( ) . unwrap( ) . name) ;
@@ -751,15 +755,145 @@ mod tests {
751
755
non_exact ( & db) ?;
752
756
753
757
let ( num_results, results) = get_search_results ( & db. conn ( ) , "regex" , 1 , 100 ) ;
754
- let mut results = results. into_iter ( ) ;
755
-
756
758
assert_eq ! ( num_results, 4 ) ;
757
759
758
- assert_eq ! ( & results. next( ) . unwrap( ) . name, "regex" ) ;
760
+ let mut results = results. into_iter ( ) ;
761
+ assert_eq ! ( results. next( ) . unwrap( ) . name, * name) ;
759
762
rest_non_exact ( results. collect ( ) ) ;
760
763
761
764
Ok ( ( ) )
762
765
} )
763
766
}
764
767
}
768
+
769
+ #[ test]
770
+ fn unsuccessful_not_shown ( ) {
771
+ wrapper ( |env| {
772
+ let db = env. db ( ) ;
773
+ db. fake_release ( )
774
+ . name ( "regex" )
775
+ . version ( "0.0.0" )
776
+ . build_result_successful ( false )
777
+ . create ( ) ?;
778
+
779
+ let ( num_results, results) = get_search_results ( & db. conn ( ) , "regex" , 1 , 100 ) ;
780
+ assert_eq ! ( num_results, 0 ) ;
781
+
782
+ let results = results. into_iter ( ) ;
783
+ assert_eq ! ( results. count( ) , 0 ) ;
784
+
785
+ Ok ( ( ) )
786
+ } )
787
+ }
788
+
789
+ #[ test]
790
+ fn yanked_not_shown ( ) {
791
+ wrapper ( |env| {
792
+ let db = env. db ( ) ;
793
+ db. fake_release ( )
794
+ . name ( "regex" )
795
+ . version ( "0.0.0" )
796
+ . cratesio_data_yanked ( true )
797
+ . create ( ) ?;
798
+
799
+ let ( num_results, results) = get_search_results ( & db. conn ( ) , "regex" , 1 , 100 ) ;
800
+ assert_eq ! ( num_results, 0 ) ;
801
+
802
+ let results = results. into_iter ( ) ;
803
+ assert_eq ! ( results. count( ) , 0 ) ;
804
+
805
+ Ok ( ( ) )
806
+ } )
807
+ }
808
+
809
+ #[ test]
810
+ fn fuzzily_match ( ) {
811
+ wrapper ( |env| {
812
+ let db = env. db ( ) ;
813
+ db. fake_release ( ) . name ( "regex" ) . version ( "0.0.0" ) . create ( ) ?;
814
+
815
+ let ( num_results, results) = get_search_results ( & db. conn ( ) , "redex" , 1 , 100 ) ;
816
+ assert_eq ! ( num_results, 1 ) ;
817
+
818
+ let mut results = results. into_iter ( ) ;
819
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "regex" ) ;
820
+ assert_eq ! ( results. count( ) , 0 ) ;
821
+
822
+ Ok ( ( ) )
823
+ } )
824
+ }
825
+
826
+ #[ test]
827
+ fn search_descriptions ( ) {
828
+ wrapper ( |env| {
829
+ let db = env. db ( ) ;
830
+ db. fake_release ( )
831
+ . name ( "something_completely_unrelated" )
832
+ . description ( "Supercalifragilisticexpialidocious" )
833
+ . create ( ) ?;
834
+
835
+ let ( num_results, results) =
836
+ get_search_results ( & db. conn ( ) , "supercalifragilisticexpialidocious" , 1 , 100 ) ;
837
+ assert_eq ! ( num_results, 1 ) ;
838
+
839
+ let mut results = results. into_iter ( ) ;
840
+ assert_eq ! (
841
+ results. next( ) . unwrap( ) . name,
842
+ "something_completely_unrelated"
843
+ ) ;
844
+ assert_eq ! ( results. count( ) , 0 ) ;
845
+
846
+ Ok ( ( ) )
847
+ } )
848
+ }
849
+
850
+ #[ test]
851
+ fn search_limits ( ) {
852
+ wrapper ( |env| {
853
+ let db = env. db ( ) ;
854
+
855
+ db. fake_release ( ) . name ( "something_magical" ) . create ( ) ?;
856
+ db. fake_release ( ) . name ( "something_sinister" ) . create ( ) ?;
857
+ db. fake_release ( ) . name ( "something_fantastical" ) . create ( ) ?;
858
+ db. fake_release ( )
859
+ . name ( "something_completely_unrelated" )
860
+ . create ( ) ?;
861
+
862
+ let ( num_results, results) = get_search_results ( & db. conn ( ) , "something" , 1 , 2 ) ;
863
+ assert_eq ! ( num_results, 4 ) ;
864
+
865
+ let mut results = results. into_iter ( ) ;
866
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "something_magical" ) ;
867
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "something_sinister" ) ;
868
+ assert_eq ! ( results. count( ) , 0 ) ;
869
+
870
+ Ok ( ( ) )
871
+ } )
872
+ }
873
+
874
+ #[ test]
875
+ fn search_offsets ( ) {
876
+ wrapper ( |env| {
877
+ let db = env. db ( ) ;
878
+ db. fake_release ( ) . name ( "something_magical" ) . create ( ) ?;
879
+ db. fake_release ( ) . name ( "something_sinister" ) . create ( ) ?;
880
+ db. fake_release ( ) . name ( "something_fantastical" ) . create ( ) ?;
881
+ db. fake_release ( )
882
+ . name ( "something_completely_unrelated" )
883
+ . create ( ) ?;
884
+
885
+ let ( num_results, results) = get_search_results ( & db. conn ( ) , "something" , 2 , 2 ) ;
886
+ assert_eq ! ( num_results, 4 ) ;
887
+
888
+ let mut results = results. into_iter ( ) ;
889
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "something_fantastical" ) ;
890
+ assert_eq ! (
891
+ results. next( ) . unwrap( ) . name,
892
+ "something_completely_unrelated"
893
+ ) ;
894
+ assert_eq ! ( results. count( ) , 0 ) ;
895
+
896
+ Ok ( ( ) )
897
+ } )
898
+ }
765
899
}
0 commit comments