Skip to content

Commit eaf558b

Browse files
committed
Added extension creation to CI and worked on CI
1 parent 53de395 commit eaf558b

File tree

4 files changed

+182
-61
lines changed

4 files changed

+182
-61
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
---
2-
32
on: [push, pull_request]
43
name: CI
54

@@ -59,6 +58,10 @@ jobs:
5958
sleep 5
6059
# Make sure the database is actually working
6160
psql "${CRATESFYI_DATABASE_URL}"
61+
sudo -u postgres psql -c "CREATE EXTENSION IF NOT EXISTS fuzzystrsearch"
62+
63+
- name: Build docs.rs
64+
run: cargo build --locked
6265

6366
- name: Run rustfmt
6467
run: cargo fmt -- --check
@@ -79,7 +82,6 @@ jobs:
7982
name: Docker
8083
runs-on: ubuntu-latest
8184
steps:
82-
8385
- uses: actions/checkout@master
8486
with:
8587
fetch-depth: 2

src/db/migrate.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -325,17 +325,6 @@ pub fn migrate(version: Option<Version>, conn: &Connection) -> CratesfyiResult<(
325325
ALTER TABLE releases ALTER COLUMN doc_targets DROP NOT NULL;
326326
"
327327
),
328-
migration!(
329-
context,
330-
// version
331-
13,
332-
// description
333-
"Add fuzzy string searching",
334-
// upgrade query
335-
"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch",
336-
// downgrade query
337-
"DROP EXTENSION IF EXISTS fuzzystrmatch;",
338-
),
339328
];
340329

341330
for migration in migrations {

src/test/fakes.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,21 @@ impl<'a> FakeRelease<'a> {
6464
}
6565
}
6666

67+
pub(crate) fn downloads(mut self, downloads: i32) -> Self {
68+
self.cratesio_data.downloads = downloads;
69+
self
70+
}
71+
6772
pub(crate) fn description(mut self, new: impl Into<String>) -> Self {
6873
self.package.description = Some(new.into());
6974
self
7075
}
7176

77+
pub(crate) fn release_time(mut self, new: time::Timespec) -> Self {
78+
self.cratesio_data.release_time = new;
79+
self
80+
}
81+
7282
pub(crate) fn name(mut self, new: &str) -> Self {
7383
self.package.name = new.into();
7484
self.package.id = format!("{}-id", new);

src/web/releases.rs

Lines changed: 168 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ fn get_releases_by_owner(
265265
///
266266
fn get_search_results(
267267
conn: &Connection,
268-
query: &str,
268+
mut query: &str,
269269
page: i64,
270270
limit: i64,
271271
) -> (i64, Vec<Release>) {
@@ -280,7 +280,8 @@ fn get_search_results(
280280
releases.target_name,
281281
releases.release_time,
282282
releases.rustdoc_status,
283-
SUM(crates.github_stars),
283+
crates.github_stars,
284+
-- Get the total number of results, disregarding the limit
284285
COUNT(*) OVER() as total
285286
FROM releases
286287
INNER JOIN crates on releases.crate_id = crates.id
@@ -294,28 +295,36 @@ fn get_search_results(
294295
releases.crate_id = crates.id
295296
AND releases.rustdoc_status
296297
AND NOT releases.yanked
298+
-- Only select releases/crates that pass our criteria:
299+
-- - Levenshtein distance between the name and query is acceptable
300+
-- - The query sandwiched between wildcards matches the crate's name
301+
-- - The query matches the release's description
302+
AND (
303+
-- Turn the levenshtein distance into a percentage using `distance / max(query.len(), crates.name.len())`
304+
-- this percentage is normalized and allows us to empirically compare the 'sameness' of different names
305+
((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65
306+
OR crates.name ILIKE CONCAT('%', $1, '%')
307+
OR plainto_tsquery($1) @@ to_tsvector(releases.description)
308+
)
297309
ORDER BY releases.release_time DESC
298310
LIMIT 1
299311
)
300-
-- Only select releases/crates that pass our criteria:
301-
-- - Levenshtein distance if the name and query is greater than three
302-
-- - The query sandwiched between wildcards matches the crate's name
303-
-- - The query matches the release's description
304-
AND (
305-
levenshtein_less_equal($1, crates.name, 3) <= 3
306-
OR crates.name ILIKE CONCAT('%', $1, '%')
307-
OR plainto_tsquery($1) @@ to_tsvector(releases.description)
308-
)
309312
GROUP BY crates.id, releases.id
310313
-- Order by the levenshtein distance of the name, the text search ranking of the description
311314
-- and finally the number of downloads
312315
ORDER BY
313-
levenshtein_less_equal($1, crates.name, 3) ASC,
316+
-- Order the levenshtein matches by their literal distance, so that `fo` matches `foo` more closely than `fooo`,
317+
-- because their normalized distances will be the same
318+
levenshtein(crates.name, $1) ASC,
319+
crates.name ILIKE CONCAT('%', $1, '%'),
314320
ts_rank_cd(to_tsvector(releases.description), plainto_tsquery($1), 32) DESC,
315321
releases.downloads DESC
316322
LIMIT $2 OFFSET $3";
317323

318-
let rows = if let Ok(rows) = conn.query(statement, &[&query, &limit, &offset]) {
324+
let rows = if let Ok(rows) = conn
325+
.query(statement, &[&query, &limit, &offset])
326+
.map_err(|err| dbg!(err))
327+
{
319328
rows
320329
} else {
321330
return (0, Vec::new());
@@ -336,7 +345,7 @@ fn get_search_results(
336345
target_name: row.get(3),
337346
release_time: row.get(4),
338347
rustdoc_status: row.get(5),
339-
stars: row.get::<_, i64>(6) as i32,
348+
stars: row.get::<_, i32>(6),
340349
})
341350
.collect();
342351

@@ -678,7 +687,7 @@ pub fn build_queue_handler(req: &mut Request) -> IronResult<Response> {
678687
#[cfg(test)]
679688
mod tests {
680689
use super::*;
681-
use crate::test::{wrapper, TestDatabase};
690+
use crate::test::wrapper;
682691

683692
#[test]
684693
fn database_search() {
@@ -724,46 +733,30 @@ mod tests {
724733
})
725734
}
726735

727-
fn non_exact(db: &TestDatabase) -> Result<(), crate::error::Error> {
728-
db.fake_release().name("reg3x").version("0.0.0").create()?;
729-
db.fake_release().name("regex-").version("0.0.0").create()?;
730-
db.fake_release()
731-
.name("regex-syntax")
732-
.version("0.0.0")
733-
.create()?;
734-
735-
Ok(())
736-
}
737-
738-
fn rest_non_exact(mut rest: Vec<Release>) {
739-
for name in ["reg3x", "regex-", "regex-syntax"].iter() {
740-
assert_eq!(rest.remove(0).name, *name);
741-
}
742-
743-
assert!(rest.is_empty());
744-
}
745-
746736
#[test]
747737
fn exacts_dont_care() {
748-
let near_matches = ["regex", "Regex", "rEgex", "reGex", "regEx", "regeX"];
738+
wrapper(|env| {
739+
let db = env.db();
749740

750-
for name in near_matches.iter() {
751-
wrapper(|env| {
752-
let db = env.db();
753-
db.fake_release().name(name).version("0.0.0").create()?;
741+
let releases = ["regex", "reg3x", "regex-", "regex-syntax"];
742+
for release in releases.iter() {
743+
db.fake_release().name(release).version("0.0.0").create()?;
744+
}
754745

755-
non_exact(&db)?;
746+
let near_matches = ["Regex", "rEgex", "reGex", "regEx", "regeX"];
756747

757-
let (num_results, results) = get_search_results(&db.conn(), "regex", 1, 100);
748+
for name in near_matches.iter() {
749+
let (num_results, mut results) = get_search_results(&db.conn(), *name, 1, 100);
758750
assert_eq!(num_results, 4);
759751

760-
let mut results = results.into_iter();
761-
assert_eq!(results.next().unwrap().name, *name);
762-
rest_non_exact(results.collect());
752+
for name in releases.iter() {
753+
assert_eq!(results.remove(0).name, *name);
754+
}
755+
assert!(results.is_empty());
756+
}
763757

764-
Ok(())
765-
})
766-
}
758+
Ok(())
759+
})
767760
}
768761

769762
#[test]
@@ -889,11 +882,138 @@ mod tests {
889882
assert_eq!(results.next().unwrap().name, "something_fantastical");
890883
assert_eq!(
891884
results.next().unwrap().name,
892-
"something_completely_unrelated"
885+
"something_completely_unrelated",
886+
);
887+
assert_eq!(results.count(), 0);
888+
889+
Ok(())
890+
})
891+
}
892+
893+
#[test]
894+
fn release_dates() {
895+
wrapper(|env| {
896+
let db = env.db();
897+
db.fake_release()
898+
.name("somethang")
899+
.release_time(time::Timespec::new(1000, 0))
900+
.version("0.3.0")
901+
.description("this is the correct choice")
902+
.create()?;
903+
db.fake_release()
904+
.name("somethang")
905+
.release_time(time::Timespec::new(100, 0))
906+
.description("second")
907+
.version("0.2.0")
908+
.create()?;
909+
db.fake_release()
910+
.name("somethang")
911+
.release_time(time::Timespec::new(10, 0))
912+
.description("third")
913+
.version("0.1.0")
914+
.create()?;
915+
db.fake_release()
916+
.name("somethang")
917+
.release_time(time::Timespec::new(1, 0))
918+
.description("fourth")
919+
.version("0.0.0")
920+
.create()?;
921+
922+
let (num_results, results) = get_search_results(&db.conn(), "somethang", 1, 100);
923+
assert_eq!(num_results, 1);
924+
925+
let mut results = results.into_iter();
926+
assert_eq!(
927+
results.next().unwrap().description,
928+
Some("this is the correct choice".into()),
893929
);
894930
assert_eq!(results.count(), 0);
895931

896932
Ok(())
897933
})
898934
}
935+
936+
#[test]
937+
fn fuzzy_over_description() {
938+
wrapper(|env| {
939+
let db = env.db();
940+
db.fake_release()
941+
.name("name_better_than_description")
942+
.description("this is the correct choice")
943+
.create()?;
944+
db.fake_release()
945+
.name("im_completely_unrelated")
946+
.description("name_better_than_description")
947+
.create()?;
948+
db.fake_release()
949+
.name("i_have_zero_relation_whatsoever")
950+
.create()?;
951+
952+
let (num_results, results) =
953+
get_search_results(&db.conn(), "name_better_than_description", 1, 100);
954+
assert_eq!(num_results, 2);
955+
956+
let mut results = results.into_iter();
957+
958+
let next = results.next().unwrap();
959+
assert_eq!(next.name, "name_better_than_description");
960+
assert_eq!(next.description, Some("this is the correct choice".into()));
961+
962+
let next = results.next().unwrap();
963+
assert_eq!(next.name, "im_completely_unrelated");
964+
assert_eq!(
965+
next.description,
966+
Some("name_better_than_description".into())
967+
);
968+
969+
assert_eq!(results.count(), 0);
970+
971+
Ok(())
972+
})
973+
}
974+
975+
#[test]
976+
fn dont_return_unrelated() {
977+
wrapper(|env| {
978+
let db = env.db();
979+
db.fake_release().name("match").create()?;
980+
db.fake_release().name("matcher").create()?;
981+
db.fake_release().name("matchest").create()?;
982+
db.fake_release()
983+
.name("i_am_useless_and_mean_nothing")
984+
.create()?;
985+
986+
let (num_results, results) = get_search_results(&db.conn(), "match", 1, 100);
987+
assert_eq!(num_results, 3);
988+
989+
let mut results = results.into_iter();
990+
assert_eq!(results.next().unwrap().name, "match");
991+
assert_eq!(results.next().unwrap().name, "matcher");
992+
assert_eq!(results.next().unwrap().name, "matchest");
993+
assert_eq!(results.count(), 0);
994+
995+
Ok(())
996+
})
997+
}
998+
999+
#[test]
1000+
fn order_by_downloads() {
1001+
wrapper(|env| {
1002+
let db = env.db();
1003+
db.fake_release().name("matca").downloads(100).create()?;
1004+
db.fake_release().name("matcb").downloads(10).create()?;
1005+
db.fake_release().name("matcc").downloads(1).create()?;
1006+
1007+
let (num_results, results) = get_search_results(&db.conn(), "match", 1, 100);
1008+
assert_eq!(num_results, 3);
1009+
1010+
let mut results = results.into_iter();
1011+
assert_eq!(results.next().unwrap().name, "matca");
1012+
assert_eq!(results.next().unwrap().name, "matcb");
1013+
assert_eq!(results.next().unwrap().name, "matcc");
1014+
assert_eq!(results.count(), 0);
1015+
1016+
Ok(())
1017+
})
1018+
}
8991019
}

0 commit comments

Comments
 (0)