Skip to content

Commit f3f5aa5

Browse files
KixironJoshua Nelson
and
Joshua Nelson
committed
Code review
Co-Authored-By: Joshua Nelson <[email protected]>
1 parent a70a0fe commit f3f5aa5

File tree

2 files changed

+33
-63
lines changed

2 files changed

+33
-63
lines changed

src/db/migrate.rs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -330,19 +330,11 @@ pub fn migrate(version: Option<Version>, conn: &Connection) -> CratesfyiResult<(
330330
// version
331331
13,
332332
// description
333-
"Add string searching",
333+
"Add fuzzy string searching",
334334
// upgrade query
335-
"DO $$ BEGIN
336-
IF (SELECT COUNT(*) FROM pg_extension WHERE extname = 'fuzzystrmatch') = 0 THEN
337-
CREATE EXTENSION fuzzystrmatch;
338-
END IF;
339-
END $$;",
335+
"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch",
340336
// downgrade query
341-
"DO $$ BEGIN
342-
IF (SELECT COUNT(*) FROM pg_extension WHERE extname = 'fuzzystrmatch') > 0 THEN
343-
DROP EXTENSION fuzzystrmatch;
344-
END IF;
345-
END $$;",
337+
"DROP EXTENSION IF EXISTS fuzzystrmatch;",
346338
),
347339
];
348340

src/web/releases.rs

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -251,69 +251,50 @@ fn get_releases_by_owner(
251251
(author_name, packages)
252252
}
253253

254-
/// Get the search results for a search query
254+
/// Get the search results for a crate search query
255255
///
256256
/// Retrieves crates which names have a levenshtein distance of less than or equal to 3,
257-
/// crates who fit into or otherwise are made up of the query or crates who's descriptions
257+
/// crates who fit into or otherwise are made up of the query or crates whose descriptions
258258
/// match the search query.
259259
///
260260
/// * `query`: The query string, unfiltered
261261
/// * `page`: The page of results to show (1-indexed)
262262
/// * `limit`: The number of results to return
263263
///
264-
/// Returns `None` if no results are found and `Some` with the total number of results and the
265-
/// currently requested results
264+
/// Returns 0 and an empty Vec when no results are found or if a database error occurs
266265
///
267266
fn get_search_results(
268267
conn: &Connection,
269268
query: &str,
270269
page: i64,
271270
limit: i64,
272271
) -> (i64, Vec<Release>) {
273-
let query = query.trim().to_lowercase();
272+
query = query.trim();
274273
let offset = (page - 1) * limit;
275274

276-
let statement = "SELECT
277-
crates.name,
278-
latest_release.version AS version,
279-
latest_release.description AS description,
280-
latest_release.target_name AS target_name,
281-
latest_release.release_time AS release_time,
282-
latest_release.rustdoc_status AS rustdoc_status,
275+
let statement =
276+
"SELECT crates.name,
277+
-- NOTE: this selects the latest alphanumeric version, which may not be the latest semver
278+
MAX(releases.version) AS version,
279+
MAX(releases.description) AS description,
280+
MAX(releases.target_name) AS target_name,
281+
MAX(releases.release_time) AS release_time,
282+
-- Cast the boolean into an integer and then cast it into a boolean.
283+
-- Posgres moves in mysterious ways, don't question it
284+
CAST(MAX(releases.rustdoc_status::integer) AS boolean) as rustdoc_status,
283285
crates.github_stars,
284-
SUM(releases.downloads) AS downloads,
285-
-- Get the total number of results, disregarding the limit
286-
COUNT(*) OVER() as total,
286+
crates.downloads_total as downloads,
287287
288288
-- The levenshtein distance between the search query and the crate's name
289-
levenshtein_less_equal(CAST($1 AS TEXT), CAST(crates.name AS TEXT), 3) as distance,
289+
levenshtein_less_equal($1, crates.name, 3) as distance,
290290
-- The similarity of the tokens of the search vs the tokens of `crates.content`.
291291
-- The `32` normalizes the number by using `rank / (rank + 1)`
292-
ts_rank_cd(crates.content, plainto_tsquery($1), 32) as content_rank
293-
FROM
294-
crates
295-
INNER JOIN releases ON releases.crate_id = crates.id
296-
INNER JOIN (
297-
SELECT DISTINCT ON (crate_id)
298-
crate_id,
299-
version,
300-
description,
301-
target_name,
302-
release_time,
303-
rustdoc_status,
304-
yanked
305-
FROM
306-
releases
307-
ORDER BY
308-
crate_id,
309-
release_time DESC
310-
) AS latest_release ON latest_release.crate_id = crates.id
311-
312-
-- Filter crates that haven't been built and crates that have been yanked and
313-
-- crates that don't match the query closely enough
314-
WHERE
315-
latest_release.rustdoc_status
316-
AND NOT latest_release.yanked
292+
ts_rank_cd(crates.content, to_tsquery($2), 32) as content_rank
293+
FROM releases INNER JOIN crates on releases.crate_id = crates.id
294+
295+
-- Filter crates that haven't been built and crates that have been yanked
296+
WHERE releases.rustdoc_status = true
297+
AND releases.yanked = false
317298
AND (
318299
-- Crates names that match the query sandwiched between wildcards will pass
319300
crates.name ILIKE CONCAT('%', $1, '%')
@@ -326,25 +307,22 @@ fn get_search_results(
326307
GROUP BY crates.id, releases.id
327308
328309
-- Ordering is prioritized by how closely the query matches the name, how closely the
329-
-- query matches the description finally how many downloads the crate has
330-
ORDER BY
331-
distance ASC,
310+
-- query matches the description, and finally how many downloads the crate has
311+
-- NOTE: this means that exact matches will be shown first
312+
ORDER BY distance DESC,
332313
content_rank DESC,
333314
downloads_total DESC
334315
335316
-- Allows pagination
336317
LIMIT $2 OFFSET $3";
337318

338-
let rows = if let Ok(rows) = conn
339-
.query(statement, &[&query, &limit, &offset])
340-
.map_err(|err| dbg!(err))
341-
{
319+
let rows = if let Ok(rows) = conn.query(statement, &[&query, &limit, &offset]) {
342320
rows
343321
} else {
344322
return (0, Vec::new());
345323
};
346324

347-
let total_results: i64 = rows.iter().next().map(|row| row.get(8)).unwrap_or_default();
325+
let total_results = rows.iter().map(|row| row.get::<_, i64>(8)).sum();
348326
let packages: Vec<Release> = rows
349327
.into_iter()
350328
.map(|row| Release {
@@ -736,7 +714,7 @@ mod tests {
736714
for expected in expected.iter() {
737715
assert_eq!(expected, &results.next().unwrap().name);
738716
}
739-
assert!(results.collect::<Vec<_>>().is_empty());
717+
assert_eq!(results.count(), 0);
740718

741719
Ok(())
742720
})
@@ -763,7 +741,7 @@ mod tests {
763741

764742
#[test]
765743
fn exacts_dont_care() {
766-
let near_matches = ["Regex", "rEgex", "reGex", "regEx", "regeX"];
744+
let near_matches = ["regex", "Regex", "rEgex", "reGex", "regEx", "regeX"];
767745

768746
for name in near_matches.iter() {
769747
wrapper(|env| {
@@ -772,7 +750,7 @@ mod tests {
772750

773751
non_exact(&db)?;
774752

775-
let (num_results, results) = get_search_results(&db.conn(), "foo", 1, 100);
753+
let (num_results, results) = get_search_results(&db.conn(), "regex", 1, 100);
776754
let mut results = results.into_iter();
777755

778756
assert_eq!(num_results, 4);

0 commit comments

Comments
 (0)