Skip to content

Commit 6f56965

Browse files
KixironJoshua Nelson
authored and
Joshua Nelson
committed
Searching works now + testing
1 parent f3f5aa5 commit 6f56965

File tree

2 files changed

+189
-50
lines changed

2 files changed

+189
-50
lines changed

src/test/fakes.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ impl<'a> FakeRelease<'a> {
6464
}
6565
}
6666

67+
pub(crate) fn description(mut self, new: impl Into<String>) -> Self {
68+
self.package.description = Some(new.into());
69+
self
70+
}
71+
6772
pub(crate) fn name(mut self, new: &str) -> Self {
6873
self.package.name = new.into();
6974
self.package.id = format!("{}-id", new);

src/web/releases.rs

Lines changed: 184 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -272,57 +272,61 @@ fn get_search_results(
272272
query = query.trim();
273273
let offset = (page - 1) * limit;
274274

275-
let statement =
276-
"SELECT crates.name,
277-
-- NOTE: this selects the latest alphanumeric version, which may not be the latest semver
278-
MAX(releases.version) AS version,
279-
MAX(releases.description) AS description,
280-
MAX(releases.target_name) AS target_name,
281-
MAX(releases.release_time) AS release_time,
282-
-- Cast the boolean into an integer and then cast it into a boolean.
283-
-- Posgres moves in mysterious ways, don't question it
284-
CAST(MAX(releases.rustdoc_status::integer) AS boolean) as rustdoc_status,
285-
crates.github_stars,
286-
crates.downloads_total as downloads,
287-
288-
-- The levenshtein distance between the search query and the crate's name
289-
levenshtein_less_equal($1, crates.name, 3) as distance,
290-
-- The similarity of the tokens of the search vs the tokens of `crates.content`.
291-
-- The `32` normalizes the number by using `rank / (rank + 1)`
292-
ts_rank_cd(crates.content, to_tsquery($2), 32) as content_rank
293-
FROM releases INNER JOIN crates on releases.crate_id = crates.id
294-
295-
-- Filter crates that haven't been built and crates that have been yanked
296-
WHERE releases.rustdoc_status = true
297-
AND releases.yanked = false
298-
AND (
299-
-- Crates names that match the query sandwiched between wildcards will pass
300-
crates.name ILIKE CONCAT('%', $1, '%')
301-
-- Crate names with which the levenshtein distance is closer or equal to 3 will pass
302-
OR levenshtein_less_equal($1, crates.name, 3) <= 3
303-
-- Crates where their content matches the query will pass
304-
OR plainto_tsquery($1) @@ crates.content
305-
)
306-
307-
GROUP BY crates.id, releases.id
308-
309-
-- Ordering is prioritized by how closely the query matches the name, how closely the
310-
-- query matches the description, and finally how many downloads the crate has
311-
-- NOTE: this means that exact matches will be shown first
312-
ORDER BY distance DESC,
313-
content_rank DESC,
314-
downloads_total DESC
315-
316-
-- Allows pagination
317-
LIMIT $2 OFFSET $3";
275+
let statement = "
276+
SELECT
277+
crates.name,
278+
releases.version,
279+
releases.description,
280+
releases.target_name,
281+
releases.release_time,
282+
releases.rustdoc_status,
283+
SUM(crates.github_stars),
284+
COUNT(*) OVER() as total
285+
FROM releases
286+
INNER JOIN crates on releases.crate_id = crates.id
287+
WHERE
288+
-- Only select the newest release by release time
289+
releases.id = (
290+
SELECT releases.id
291+
FROM releases
292+
-- Filter unbuilt/failing builds and yanked releases
293+
WHERE
294+
releases.crate_id = crates.id
295+
AND releases.rustdoc_status
296+
AND NOT releases.yanked
297+
ORDER BY releases.release_time DESC
298+
LIMIT 1
299+
)
300+
-- Only select releases/crates that pass our criteria:
301+
-- - Levenshtein distance if the name and query is greater than three
302+
-- - The query sandwiched between wildcards matches the crate's name
303+
-- - The query matches the release's description
304+
AND (
305+
levenshtein_less_equal($1, crates.name, 3) <= 3
306+
OR crates.name ILIKE CONCAT('%', $1, '%')
307+
OR plainto_tsquery($1) @@ to_tsvector(releases.description)
308+
)
309+
GROUP BY crates.id, releases.id
310+
-- Order by the levenshtein distance of the name, the text search ranking of the description
311+
-- and finally the number of downloads
312+
ORDER BY
313+
levenshtein_less_equal($1, crates.name, 3) ASC,
314+
ts_rank_cd(to_tsvector(releases.description), plainto_tsquery($1), 32) DESC,
315+
releases.downloads DESC
316+
LIMIT $2 OFFSET $3";
318317

319318
let rows = if let Ok(rows) = conn.query(statement, &[&query, &limit, &offset]) {
320319
rows
321320
} else {
322321
return (0, Vec::new());
323322
};
324323

325-
let total_results = rows.iter().map(|row| row.get::<_, i64>(8)).sum();
324+
// Each row contains the total number of possible/valid results, just get it once
325+
let total_results = rows
326+
.iter()
327+
.next()
328+
.map(|row| row.get::<_, i64>(7))
329+
.unwrap_or_default();
326330
let packages: Vec<Release> = rows
327331
.into_iter()
328332
.map(|row| Release {
@@ -332,7 +336,7 @@ fn get_search_results(
332336
target_name: row.get(3),
333337
release_time: row.get(4),
334338
rustdoc_status: row.get(5),
335-
stars: row.get(6),
339+
stars: row.get::<_, i64>(6) as i32,
336340
})
337341
.collect();
338342

@@ -706,10 +710,10 @@ mod tests {
706710
.create()?;
707711

708712
let (num_results, results) = get_search_results(&db.conn(), "foo", 1, 100);
709-
let mut results = results.into_iter();
710-
711713
assert_eq!(num_results, 4);
712714

715+
let mut results = results.into_iter();
716+
713717
let expected = ["foo", "fo0", "bar-foo", "foo-bar"];
714718
for expected in expected.iter() {
715719
assert_eq!(expected, &results.next().unwrap().name);
@@ -751,15 +755,145 @@ mod tests {
751755
non_exact(&db)?;
752756

753757
let (num_results, results) = get_search_results(&db.conn(), "regex", 1, 100);
754-
let mut results = results.into_iter();
755-
756758
assert_eq!(num_results, 4);
757759

758-
assert_eq!(&results.next().unwrap().name, "regex");
760+
let mut results = results.into_iter();
761+
assert_eq!(results.next().unwrap().name, *name);
759762
rest_non_exact(results.collect());
760763

761764
Ok(())
762765
})
763766
}
764767
}
768+
769+
#[test]
770+
fn unsuccessful_not_shown() {
771+
wrapper(|env| {
772+
let db = env.db();
773+
db.fake_release()
774+
.name("regex")
775+
.version("0.0.0")
776+
.build_result_successful(false)
777+
.create()?;
778+
779+
let (num_results, results) = get_search_results(&db.conn(), "regex", 1, 100);
780+
assert_eq!(num_results, 0);
781+
782+
let results = results.into_iter();
783+
assert_eq!(results.count(), 0);
784+
785+
Ok(())
786+
})
787+
}
788+
789+
#[test]
790+
fn yanked_not_shown() {
791+
wrapper(|env| {
792+
let db = env.db();
793+
db.fake_release()
794+
.name("regex")
795+
.version("0.0.0")
796+
.cratesio_data_yanked(true)
797+
.create()?;
798+
799+
let (num_results, results) = get_search_results(&db.conn(), "regex", 1, 100);
800+
assert_eq!(num_results, 0);
801+
802+
let results = results.into_iter();
803+
assert_eq!(results.count(), 0);
804+
805+
Ok(())
806+
})
807+
}
808+
809+
#[test]
810+
fn fuzzily_match() {
811+
wrapper(|env| {
812+
let db = env.db();
813+
db.fake_release().name("regex").version("0.0.0").create()?;
814+
815+
let (num_results, results) = get_search_results(&db.conn(), "redex", 1, 100);
816+
assert_eq!(num_results, 1);
817+
818+
let mut results = results.into_iter();
819+
assert_eq!(results.next().unwrap().name, "regex");
820+
assert_eq!(results.count(), 0);
821+
822+
Ok(())
823+
})
824+
}
825+
826+
#[test]
827+
fn search_descriptions() {
828+
wrapper(|env| {
829+
let db = env.db();
830+
db.fake_release()
831+
.name("something_completely_unrelated")
832+
.description("Supercalifragilisticexpialidocious")
833+
.create()?;
834+
835+
let (num_results, results) =
836+
get_search_results(&db.conn(), "supercalifragilisticexpialidocious", 1, 100);
837+
assert_eq!(num_results, 1);
838+
839+
let mut results = results.into_iter();
840+
assert_eq!(
841+
results.next().unwrap().name,
842+
"something_completely_unrelated"
843+
);
844+
assert_eq!(results.count(), 0);
845+
846+
Ok(())
847+
})
848+
}
849+
850+
#[test]
851+
fn search_limits() {
852+
wrapper(|env| {
853+
let db = env.db();
854+
855+
db.fake_release().name("something_magical").create()?;
856+
db.fake_release().name("something_sinister").create()?;
857+
db.fake_release().name("something_fantastical").create()?;
858+
db.fake_release()
859+
.name("something_completely_unrelated")
860+
.create()?;
861+
862+
let (num_results, results) = get_search_results(&db.conn(), "something", 1, 2);
863+
assert_eq!(num_results, 4);
864+
865+
let mut results = results.into_iter();
866+
assert_eq!(results.next().unwrap().name, "something_magical");
867+
assert_eq!(results.next().unwrap().name, "something_sinister");
868+
assert_eq!(results.count(), 0);
869+
870+
Ok(())
871+
})
872+
}
873+
874+
#[test]
875+
fn search_offsets() {
876+
wrapper(|env| {
877+
let db = env.db();
878+
db.fake_release().name("something_magical").create()?;
879+
db.fake_release().name("something_sinister").create()?;
880+
db.fake_release().name("something_fantastical").create()?;
881+
db.fake_release()
882+
.name("something_completely_unrelated")
883+
.create()?;
884+
885+
let (num_results, results) = get_search_results(&db.conn(), "something", 2, 2);
886+
assert_eq!(num_results, 4);
887+
888+
let mut results = results.into_iter();
889+
assert_eq!(results.next().unwrap().name, "something_fantastical");
890+
assert_eq!(
891+
results.next().unwrap().name,
892+
"something_completely_unrelated"
893+
);
894+
assert_eq!(results.count(), 0);
895+
896+
Ok(())
897+
})
898+
}
765899
}

0 commit comments

Comments
 (0)