@@ -265,7 +265,7 @@ fn get_releases_by_owner(
265
265
///
266
266
fn get_search_results (
267
267
conn : & Connection ,
268
- query : & str ,
268
+ mut query : & str ,
269
269
page : i64 ,
270
270
limit : i64 ,
271
271
) -> ( i64 , Vec < Release > ) {
@@ -280,7 +280,8 @@ fn get_search_results(
280
280
releases.target_name,
281
281
releases.release_time,
282
282
releases.rustdoc_status,
283
- SUM(crates.github_stars),
283
+ crates.github_stars,
284
+ -- Get the total number of results, disregarding the limit
284
285
COUNT(*) OVER() as total
285
286
FROM releases
286
287
INNER JOIN crates on releases.crate_id = crates.id
@@ -294,28 +295,36 @@ fn get_search_results(
294
295
releases.crate_id = crates.id
295
296
AND releases.rustdoc_status
296
297
AND NOT releases.yanked
298
+ -- Only select releases/crates that pass our criteria:
299
+ -- - Levenshtein distance between the name and query is acceptable
300
+ -- - The query sandwiched between wildcards matches the crate's name
301
+ -- - The query matches the release's description
302
+ AND (
303
+ -- Turn the levenshtein distance into a percentage using `distance / max(query.len(), crates.name.len())`
304
+ -- this percentage is normalized and allows us to empirically compare the 'sameness' of different names
305
+ ((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65
306
+ OR crates.name ILIKE CONCAT('%', $1, '%')
307
+ OR plainto_tsquery($1) @@ to_tsvector(releases.description)
308
+ )
297
309
ORDER BY releases.release_time DESC
298
310
LIMIT 1
299
311
)
300
- -- Only select releases/crates that pass our criteria:
301
- -- - Levenshtein distance if the name and query is greater than three
302
- -- - The query sandwiched between wildcards matches the crate's name
303
- -- - The query matches the release's description
304
- AND (
305
- levenshtein_less_equal($1, crates.name, 3) <= 3
306
- OR crates.name ILIKE CONCAT('%', $1, '%')
307
- OR plainto_tsquery($1) @@ to_tsvector(releases.description)
308
- )
309
312
GROUP BY crates.id, releases.id
310
313
-- Order by the levenshtein distance of the name, the text search ranking of the description
311
314
-- and finally the number of downloads
312
315
ORDER BY
313
- levenshtein_less_equal($1, crates.name, 3) ASC,
316
+ -- Order the levenshtein matches by their literal distance, so that `fo` matches `foo` more closely than `fooo`,
317
+ -- because their normalized distances will be the same
318
+ levenshtein(crates.name, $1) ASC,
319
+ crates.name ILIKE CONCAT('%', $1, '%'),
314
320
ts_rank_cd(to_tsvector(releases.description), plainto_tsquery($1), 32) DESC,
315
321
releases.downloads DESC
316
322
LIMIT $2 OFFSET $3" ;
317
323
318
- let rows = if let Ok ( rows) = conn. query ( statement, & [ & query, & limit, & offset] ) {
324
+ let rows = if let Ok ( rows) = conn
325
+ . query ( statement, & [ & query, & limit, & offset] )
326
+ . map_err ( |err| dbg ! ( err) )
327
+ {
319
328
rows
320
329
} else {
321
330
return ( 0 , Vec :: new ( ) ) ;
@@ -336,7 +345,7 @@ fn get_search_results(
336
345
target_name : row. get ( 3 ) ,
337
346
release_time : row. get ( 4 ) ,
338
347
rustdoc_status : row. get ( 5 ) ,
339
- stars : row. get :: < _ , i64 > ( 6 ) as i32 ,
348
+ stars : row. get :: < _ , i32 > ( 6 ) ,
340
349
} )
341
350
. collect ( ) ;
342
351
@@ -678,7 +687,7 @@ pub fn build_queue_handler(req: &mut Request) -> IronResult<Response> {
678
687
#[ cfg( test) ]
679
688
mod tests {
680
689
use super :: * ;
681
- use crate :: test:: { wrapper, TestDatabase } ;
690
+ use crate :: test:: wrapper;
682
691
683
692
#[ test]
684
693
fn database_search ( ) {
@@ -724,46 +733,30 @@ mod tests {
724
733
} )
725
734
}
726
735
727
- fn non_exact ( db : & TestDatabase ) -> Result < ( ) , crate :: error:: Error > {
728
- db. fake_release ( ) . name ( "reg3x" ) . version ( "0.0.0" ) . create ( ) ?;
729
- db. fake_release ( ) . name ( "regex-" ) . version ( "0.0.0" ) . create ( ) ?;
730
- db. fake_release ( )
731
- . name ( "regex-syntax" )
732
- . version ( "0.0.0" )
733
- . create ( ) ?;
734
-
735
- Ok ( ( ) )
736
- }
737
-
738
- fn rest_non_exact ( mut rest : Vec < Release > ) {
739
- for name in [ "reg3x" , "regex-" , "regex-syntax" ] . iter ( ) {
740
- assert_eq ! ( rest. remove( 0 ) . name, * name) ;
741
- }
742
-
743
- assert ! ( rest. is_empty( ) ) ;
744
- }
745
-
746
736
#[ test]
747
737
fn exacts_dont_care ( ) {
748
- let near_matches = [ "regex" , "Regex" , "rEgex" , "reGex" , "regEx" , "regeX" ] ;
738
+ wrapper ( |env| {
739
+ let db = env. db ( ) ;
749
740
750
- for name in near_matches . iter ( ) {
751
- wrapper ( |env| {
752
- let db = env . db ( ) ;
753
- db . fake_release ( ) . name ( name ) . version ( "0.0.0" ) . create ( ) ? ;
741
+ let releases = [ "regex" , "reg3x" , "regex-" , "regex-syntax" ] ;
742
+ for release in releases . iter ( ) {
743
+ db . fake_release ( ) . name ( release ) . version ( "0.0.0" ) . create ( ) ? ;
744
+ }
754
745
755
- non_exact ( & db ) ? ;
746
+ let near_matches = [ "Regex" , "rEgex" , "reGex" , "regEx" , "regeX" ] ;
756
747
757
- let ( num_results, results) = get_search_results ( & db. conn ( ) , "regex" , 1 , 100 ) ;
748
+ for name in near_matches. iter ( ) {
749
+ let ( num_results, mut results) = get_search_results ( & db. conn ( ) , * name, 1 , 100 ) ;
758
750
assert_eq ! ( num_results, 4 ) ;
759
751
760
- let mut results = results. into_iter ( ) ;
761
- assert_eq ! ( results. next( ) . unwrap( ) . name, * name) ;
762
- rest_non_exact ( results. collect ( ) ) ;
752
+ for name in releases. iter ( ) {
753
+ assert_eq ! ( results. remove( 0 ) . name, * name) ;
754
+ }
755
+ assert ! ( results. is_empty( ) ) ;
756
+ }
763
757
764
- Ok ( ( ) )
765
- } )
766
- }
758
+ Ok ( ( ) )
759
+ } )
767
760
}
768
761
769
762
#[ test]
@@ -889,11 +882,138 @@ mod tests {
889
882
assert_eq ! ( results. next( ) . unwrap( ) . name, "something_fantastical" ) ;
890
883
assert_eq ! (
891
884
results. next( ) . unwrap( ) . name,
892
- "something_completely_unrelated"
885
+ "something_completely_unrelated" ,
886
+ ) ;
887
+ assert_eq ! ( results. count( ) , 0 ) ;
888
+
889
+ Ok ( ( ) )
890
+ } )
891
+ }
892
+
893
+ #[ test]
894
+ fn release_dates ( ) {
895
+ wrapper ( |env| {
896
+ let db = env. db ( ) ;
897
+ db. fake_release ( )
898
+ . name ( "somethang" )
899
+ . release_time ( time:: Timespec :: new ( 1000 , 0 ) )
900
+ . version ( "0.3.0" )
901
+ . description ( "this is the correct choice" )
902
+ . create ( ) ?;
903
+ db. fake_release ( )
904
+ . name ( "somethang" )
905
+ . release_time ( time:: Timespec :: new ( 100 , 0 ) )
906
+ . description ( "second" )
907
+ . version ( "0.2.0" )
908
+ . create ( ) ?;
909
+ db. fake_release ( )
910
+ . name ( "somethang" )
911
+ . release_time ( time:: Timespec :: new ( 10 , 0 ) )
912
+ . description ( "third" )
913
+ . version ( "0.1.0" )
914
+ . create ( ) ?;
915
+ db. fake_release ( )
916
+ . name ( "somethang" )
917
+ . release_time ( time:: Timespec :: new ( 1 , 0 ) )
918
+ . description ( "fourth" )
919
+ . version ( "0.0.0" )
920
+ . create ( ) ?;
921
+
922
+ let ( num_results, results) = get_search_results ( & db. conn ( ) , "somethang" , 1 , 100 ) ;
923
+ assert_eq ! ( num_results, 1 ) ;
924
+
925
+ let mut results = results. into_iter ( ) ;
926
+ assert_eq ! (
927
+ results. next( ) . unwrap( ) . description,
928
+ Some ( "this is the correct choice" . into( ) ) ,
893
929
) ;
894
930
assert_eq ! ( results. count( ) , 0 ) ;
895
931
896
932
Ok ( ( ) )
897
933
} )
898
934
}
935
+
936
+ #[ test]
937
+ fn fuzzy_over_description ( ) {
938
+ wrapper ( |env| {
939
+ let db = env. db ( ) ;
940
+ db. fake_release ( )
941
+ . name ( "name_better_than_description" )
942
+ . description ( "this is the correct choice" )
943
+ . create ( ) ?;
944
+ db. fake_release ( )
945
+ . name ( "im_completely_unrelated" )
946
+ . description ( "name_better_than_description" )
947
+ . create ( ) ?;
948
+ db. fake_release ( )
949
+ . name ( "i_have_zero_relation_whatsoever" )
950
+ . create ( ) ?;
951
+
952
+ let ( num_results, results) =
953
+ get_search_results ( & db. conn ( ) , "name_better_than_description" , 1 , 100 ) ;
954
+ assert_eq ! ( num_results, 2 ) ;
955
+
956
+ let mut results = results. into_iter ( ) ;
957
+
958
+ let next = results. next ( ) . unwrap ( ) ;
959
+ assert_eq ! ( next. name, "name_better_than_description" ) ;
960
+ assert_eq ! ( next. description, Some ( "this is the correct choice" . into( ) ) ) ;
961
+
962
+ let next = results. next ( ) . unwrap ( ) ;
963
+ assert_eq ! ( next. name, "im_completely_unrelated" ) ;
964
+ assert_eq ! (
965
+ next. description,
966
+ Some ( "name_better_than_description" . into( ) )
967
+ ) ;
968
+
969
+ assert_eq ! ( results. count( ) , 0 ) ;
970
+
971
+ Ok ( ( ) )
972
+ } )
973
+ }
974
+
975
+ #[ test]
976
+ fn dont_return_unrelated ( ) {
977
+ wrapper ( |env| {
978
+ let db = env. db ( ) ;
979
+ db. fake_release ( ) . name ( "match" ) . create ( ) ?;
980
+ db. fake_release ( ) . name ( "matcher" ) . create ( ) ?;
981
+ db. fake_release ( ) . name ( "matchest" ) . create ( ) ?;
982
+ db. fake_release ( )
983
+ . name ( "i_am_useless_and_mean_nothing" )
984
+ . create ( ) ?;
985
+
986
+ let ( num_results, results) = get_search_results ( & db. conn ( ) , "match" , 1 , 100 ) ;
987
+ assert_eq ! ( num_results, 3 ) ;
988
+
989
+ let mut results = results. into_iter ( ) ;
990
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "match" ) ;
991
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "matcher" ) ;
992
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "matchest" ) ;
993
+ assert_eq ! ( results. count( ) , 0 ) ;
994
+
995
+ Ok ( ( ) )
996
+ } )
997
+ }
998
+
999
+ #[ test]
1000
+ fn order_by_downloads ( ) {
1001
+ wrapper ( |env| {
1002
+ let db = env. db ( ) ;
1003
+ db. fake_release ( ) . name ( "matca" ) . downloads ( 100 ) . create ( ) ?;
1004
+ db. fake_release ( ) . name ( "matcb" ) . downloads ( 10 ) . create ( ) ?;
1005
+ db. fake_release ( ) . name ( "matcc" ) . downloads ( 1 ) . create ( ) ?;
1006
+
1007
+ let ( num_results, results) = get_search_results ( & db. conn ( ) , "match" , 1 , 100 ) ;
1008
+ assert_eq ! ( num_results, 3 ) ;
1009
+
1010
+ let mut results = results. into_iter ( ) ;
1011
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "matca" ) ;
1012
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "matcb" ) ;
1013
+ assert_eq ! ( results. next( ) . unwrap( ) . name, "matcc" ) ;
1014
+ assert_eq ! ( results. count( ) , 0 ) ;
1015
+
1016
+ Ok ( ( ) )
1017
+ } )
1018
+ }
899
1019
}
0 commit comments