Skip to content

Commit 8925edb

Browse files
KixironJoshua Nelson
authored and
Joshua Nelson
committed
New query, thanks to @Nemo157
1 parent e4e9c14 commit 8925edb

File tree

2 files changed

+22
-34
lines changed

2 files changed

+22
-34
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
sleep 5
5959
# Make sure the database is actually working
6060
psql "${CRATESFYI_DATABASE_URL}"
61-
sudo -u postgres psql -c "CREATE EXTENSION IF NOT EXISTS fuzzystrsearch"
61+
psql "${CRATESFYI_DATABASE_URL}" -c "CREATE EXTENSION IF NOT EXISTS fuzzystrsearch"
6262
6363
- name: Build docs.rs
6464
run: cargo build --locked

src/web/releases.rs

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -281,40 +281,27 @@ fn get_search_results(
281281
releases.release_time,
282282
releases.rustdoc_status,
283283
crates.github_stars,
284-
-- Get the total number of results, disregarding the limit
285284
COUNT(*) OVER() as total
286-
FROM releases
287-
INNER JOIN crates on releases.crate_id = crates.id
288-
WHERE
289-
-- Only select the newest release by release time
290-
releases.id = (
291-
SELECT releases.id
285+
FROM crates
286+
INNER JOIN (
287+
SELECT releases.id, releases.crate_id
288+
FROM (
289+
SELECT
290+
releases.id,
291+
releases.crate_id,
292+
rank() OVER (PARTITION BY crate_id ORDER BY release_time DESC) as rank
292293
FROM releases
293-
-- Filter unbuilt/failing builds and yanked releases
294-
WHERE
295-
releases.crate_id = crates.id
296-
AND releases.rustdoc_status
297-
AND NOT releases.yanked
298-
-- Only select releases/crates that pass our criteria:
299-
-- - Levenshtein distance between the name and query is acceptable
300-
-- - The query sandwiched between wildcards matches the crate's name
301-
-- - The query matches the release's description
302-
AND (
303-
-- Turn the levenshtein distance into a percentage using `distance / max(query.len(), crates.name.len())`
304-
-- this percentage is normalized and allows us to empirically compare the 'sameness' of different names
305-
((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65
306-
OR crates.name ILIKE CONCAT('%', $1, '%')
307-
OR plainto_tsquery($1) @@ to_tsvector(releases.description)
308-
)
309-
ORDER BY releases.release_time DESC
310-
LIMIT 1
311-
)
294+
WHERE releases.rustdoc_status AND NOT releases.yanked
295+
) AS releases
296+
WHERE releases.rank = 1
297+
) AS latest_release ON latest_release.crate_id = crates.id
298+
INNER JOIN releases ON latest_release.id = releases.id
299+
WHERE
300+
((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65
301+
OR crates.name ILIKE CONCAT('%', $1, '%')
302+
OR plainto_tsquery($1) @@ to_tsvector(releases.description)
312303
GROUP BY crates.id, releases.id
313-
-- Order by the levenshtein distance of the name, the text search ranking of the description
314-
-- and finally the number of downloads
315304
ORDER BY
316-
-- Order the levenshtein matches by their literal distance, so that `fo` matches `foo` more closely than `fooo`,
317-
-- because their normalized distances will be the same
318305
levenshtein(crates.name, $1) ASC,
319306
crates.name ILIKE CONCAT('%', $1, '%'),
320307
ts_rank_cd(to_tsvector(releases.description), plainto_tsquery($1), 32) DESC,
@@ -738,16 +725,17 @@ mod tests {
738725
wrapper(|env| {
739726
let db = env.db();
740727

741-
let releases = ["regex", "reg3x", "regex-", "regex-syntax"];
728+
let releases = ["regex", "regex-", "regex-syntax"];
742729
for release in releases.iter() {
743730
db.fake_release().name(release).version("0.0.0").create()?;
744731
}
745732

746733
let near_matches = ["Regex", "rEgex", "reGex", "regEx", "regeX"];
747734

748735
for name in near_matches.iter() {
749-
let (num_results, mut results) = get_search_results(&db.conn(), *name, 1, 100);
750-
assert_eq!(num_results, 4);
736+
let (num_results, mut results) =
737+
dbg!(get_search_results(&db.conn(), *name, 1, 100));
738+
assert_eq!(num_results, 3);
751739

752740
for name in releases.iter() {
753741
assert_eq!(results.remove(0).name, *name);

0 commit comments

Comments
 (0)