Skip to content

Commit a8c4fcc

Browse files
KixironJoshua Nelson
authored andcommitted
Search, and you shall find
1 parent 4b54b52 commit a8c4fcc

File tree

2 files changed

+103
-44
lines changed

2 files changed

+103
-44
lines changed

src/db/migrate.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,25 @@ pub fn migrate(version: Option<Version>, conn: &Connection) -> CratesfyiResult<(
325325
ALTER TABLE releases ALTER COLUMN doc_targets DROP NOT NULL;
326326
"
327327
),
328+
migration!(
329+
context,
330+
// version
331+
13,
332+
// description
333+
"Add string searching",
334+
// upgrade query
335+
"DO $$ BEGIN
336+
IF (SELECT COUNT(*) FROM pg_extension WHERE extname = 'fuzzystrmatch') = 0 THEN
337+
CREATE EXTENSION fuzzystrmatch;
338+
END IF;
339+
END $$;",
340+
// downgrade query
341+
"DO $$ BEGIN
342+
IF (SELECT COUNT(*) FROM pg_extension WHERE extname = 'fuzzystrmatch') > 0 THEN
343+
DROP EXTENSION fuzzystrmatch;
344+
END IF;
345+
END $$;",
346+
),
328347
];
329348

330349
for migration in migrations {

src/web/releases.rs

Lines changed: 84 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -244,57 +244,94 @@ fn get_releases_by_owner(
244244
(author_name, packages)
245245
}
246246

247+
/// Get the search results for a search query
248+
///
249+
/// Retrieves crates which names have a levenshtein distance of less than or equal to 3,
250+
/// crates who fit into or otherwise are made up of the query or crates who's descriptions
251+
/// match the search query.
252+
///
253+
/// * `query`: The query string, unfiltered
254+
/// * `page`: The page of results to show (1-indexed)
255+
/// * `limit`: The number of results to return
256+
///
257+
/// Returns `None` if no results are found and `Some` with the total number of results and the
258+
/// currently requested results
259+
///
247260
fn get_search_results(
248261
conn: &Connection,
249-
query: &str,
262+
mut query: &str,
250263
page: i64,
251264
limit: i64,
252265
) -> Option<(i64, Vec<Release>)> {
266+
query = query.trim();
267+
let split_query = query.replace(' ', " & ");
253268
let offset = (page - 1) * limit;
254-
let mut packages = Vec::new();
255269

256-
let rows = match conn.query(
257-
"SELECT crates.name,
258-
releases.version,
259-
releases.description,
260-
releases.target_name,
261-
releases.release_time,
262-
releases.rustdoc_status,
263-
ts_rank_cd(crates.content, to_tsquery($1)) AS rank
264-
FROM crates
265-
INNER JOIN releases ON crates.latest_version_id = releases.id
266-
WHERE crates.name LIKE concat('%', $1, '%')
267-
OR crates.content @@ to_tsquery($1)
268-
ORDER BY crates.name = $1 DESC,
269-
crates.name LIKE concat('%', $1, '%') DESC,
270-
rank DESC
271-
LIMIT $2 OFFSET $3",
272-
&[&query, &limit, &offset],
273-
) {
274-
Ok(r) => r,
275-
Err(_) => return None,
276-
};
270+
let rows = conn
271+
.query(
272+
"SELECT crates.name,
273+
MAX(releases.version) AS version,
274+
MAX(releases.description) AS description,
275+
MAX(releases.target_name) AS target_name,
276+
MAX(releases.release_time) AS release_time,
277+
-- Cast the boolean into an integer and then cast it into a boolean.
278+
-- Posgres moves in mysterious ways, don't question it
279+
CAST(MAX(releases.rustdoc_status::integer) AS boolean) as rustdoc_status,
280+
crates.github_stars,
281+
SUM(releases.downloads) AS downloads,
282+
283+
-- The levenshtein distance between the search query and the crate's name
284+
levenshtein_less_equal($1, crates.name, 3) as distance,
285+
-- The similarity of the tokens of the search vs the tokens of `crates.content`.
286+
-- The `32` normalizes the number by using `rank / (rank + 1)`
287+
ts_rank_cd(crates.content, to_tsquery($2), 32) as content_rank
288+
FROM releases INNER JOIN crates on releases.crate_id = crates.id
289+
290+
-- Filter crates that haven't been built and crates that have been yanked
291+
WHERE releases.rustdoc_status = true
292+
AND releases.yanked = false
293+
AND (
294+
-- Crates names that match the query sandwiched between wildcards will pass
295+
crates.name ILIKE CONCAT('%', $1, '%')
296+
-- Crate names with which the levenshtein distance is closer or equal to 3 will pass
297+
OR levenshtein_less_equal($1, crates.name, 3) <= 3
298+
-- Crates where their content matches the query will pass
299+
OR to_tsquery($2) @@ crates.content
300+
)
301+
GROUP BY crates.id
302+
-- Ordering is prioritized by how closely the query matches the name, how closely the
303+
-- query matches the description finally how many downloads the crate has
304+
ORDER BY distance DESC,
305+
content_rank DESC,
306+
SUM(downloads) DESC
307+
-- Allows pagination
308+
LIMIT $3 OFFSET $4",
309+
&[&query, &split_query, &limit, &offset],
310+
)
311+
.ok()?;
277312

278-
for row in &rows {
279-
let package = Release {
313+
let packages: Vec<Release> = rows
314+
.into_iter()
315+
.map(|row| Release {
280316
name: row.get(0),
281317
version: row.get(1),
282318
description: row.get(2),
283319
target_name: row.get(3),
284320
release_time: row.get(4),
285321
rustdoc_status: row.get(5),
286-
..Release::default()
287-
};
288-
289-
packages.push(package);
290-
}
322+
stars: row.get(6),
323+
})
324+
.collect();
291325

292326
if !packages.is_empty() {
293-
// get count of total results
327+
// Get the total number of results that the query matches
294328
let rows = conn
295329
.query(
296-
"SELECT COUNT(*) FROM crates WHERE content @@ to_tsquery($1)",
297-
&[&query],
330+
"SELECT COUNT(*) FROM crates
331+
WHERE crates.name ILIKE CONCAT('%', CAST($1 AS TEXT), '%')
332+
OR levenshtein_less_equal(CAST($1 AS TEXT), crates.name, 3) <= 3
333+
OR crates.content @@ to_tsquery(CAST($2 AS TEXT))",
334+
&[&(query as &str), &(&split_query as &str)],
298335
)
299336
.unwrap();
300337

@@ -570,17 +607,20 @@ pub fn search_handler(req: &mut Request) -> IronResult<Response> {
570607
}
571608
}
572609

573-
let search_query = query.replace(" ", " & ");
574-
#[allow(clippy::or_fun_call)]
575-
get_search_results(&conn, &search_query, 1, RELEASES_IN_RELEASES)
576-
.ok_or_else(|| IronError::new(Nope::NoResults, status::NotFound))
577-
.and_then(|(_, results)| {
578-
// FIXME: There is no pagination
579-
Page::new(results)
580-
.set("search_query", &query)
581-
.title(&format!("Search results for '{}'", query))
582-
.to_resp("releases")
583-
})
610+
if let Some((_, results)) = get_search_results(&conn, &query, 1, RELEASES_IN_RELEASES) {
611+
// FIXME: There is no pagination
612+
Page::new(results)
613+
.set("search_query", &query)
614+
.title(&format!("Search results for '{}'", query))
615+
.to_resp("releases")
616+
} else {
617+
// Return an empty page with an error message and an intact query so that
618+
// the user can edit it
619+
Page::new("".to_string())
620+
.set("search_query", &query)
621+
.title(&format!("No results found for '{}'", query))
622+
.to_resp("releases")
623+
}
584624
} else {
585625
Err(IronError::new(Nope::NoResults, status::NotFound))
586626
}

0 commit comments

Comments
 (0)