Skip to content

Commit a70a0fe

Browse files
KixironJoshua Nelson
authored and
Joshua Nelson
committed
Worked on search functions a bunch, worked on query, changed search api and added tests
1 parent a8c4fcc commit a70a0fe

File tree

3 files changed

+172
-60
lines changed

3 files changed

+172
-60
lines changed

src/test/fakes.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ impl<'a> FakeRelease<'a> {
8282
}
8383

8484
pub(crate) fn build_result_successful(mut self, new: bool) -> Self {
85+
self.has_docs = new;
8586
self.build_result.successful = new;
8687
self
8788
}

src/test/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ impl TestDatabase {
139139
conn.batch_execute(&format!(
140140
"
141141
CREATE SCHEMA {0};
142-
SET search_path TO {0};
142+
SET search_path TO {0}, public;
143143
",
144144
schema
145145
))?;

src/web/releases.rs

Lines changed: 170 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ const RELEASES_IN_RELEASES: i64 = 30;
1818
/// Releases in recent releases feed
1919
const RELEASES_IN_FEED: i64 = 150;
2020

21-
#[derive(Debug, Clone)]
21+
#[derive(Debug, Clone, PartialEq, Eq)]
2222
pub struct Release {
2323
name: String,
2424
version: String,
@@ -64,13 +64,20 @@ impl ToJson for Release {
6464
}
6565
}
6666

67+
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
6768
enum Order {
6869
ReleaseTime, // this is default order
6970
GithubStars,
7071
RecentFailures,
7172
FailuresByGithubStars,
7273
}
7374

75+
impl Default for Order {
76+
fn default() -> Self {
77+
Self::ReleaseTime
78+
}
79+
}
80+
7481
fn get_releases(conn: &Connection, page: i64, limit: i64, order: Order) -> Vec<Release> {
7582
let offset = (page - 1) * limit;
7683

@@ -259,57 +266,85 @@ fn get_releases_by_owner(
259266
///
260267
fn get_search_results(
261268
conn: &Connection,
262-
mut query: &str,
269+
query: &str,
263270
page: i64,
264271
limit: i64,
265-
) -> Option<(i64, Vec<Release>)> {
266-
query = query.trim();
267-
let split_query = query.replace(' ', " & ");
272+
) -> (i64, Vec<Release>) {
273+
let query = query.trim().to_lowercase();
268274
let offset = (page - 1) * limit;
269275

270-
let rows = conn
271-
.query(
272-
"SELECT crates.name,
273-
MAX(releases.version) AS version,
274-
MAX(releases.description) AS description,
275-
MAX(releases.target_name) AS target_name,
276-
MAX(releases.release_time) AS release_time,
277-
-- Cast the boolean into an integer and then cast it into a boolean.
278-
-- Posgres moves in mysterious ways, don't question it
279-
CAST(MAX(releases.rustdoc_status::integer) AS boolean) as rustdoc_status,
276+
let statement = "SELECT
277+
crates.name,
278+
latest_release.version AS version,
279+
latest_release.description AS description,
280+
latest_release.target_name AS target_name,
281+
latest_release.release_time AS release_time,
282+
latest_release.rustdoc_status AS rustdoc_status,
280283
crates.github_stars,
281284
SUM(releases.downloads) AS downloads,
285+
-- Get the total number of results, disregarding the limit
286+
COUNT(*) OVER() as total,
282287
283288
-- The levenshtein distance between the search query and the crate's name
284-
levenshtein_less_equal($1, crates.name, 3) as distance,
289+
levenshtein_less_equal(CAST($1 AS TEXT), CAST(crates.name AS TEXT), 3) as distance,
285290
-- The similarity of the tokens of the search vs the tokens of `crates.content`.
286291
-- The `32` normalizes the number by using `rank / (rank + 1)`
287-
ts_rank_cd(crates.content, to_tsquery($2), 32) as content_rank
288-
FROM releases INNER JOIN crates on releases.crate_id = crates.id
289-
290-
-- Filter crates that haven't been built and crates that have been yanked
291-
WHERE releases.rustdoc_status = true
292-
AND releases.yanked = false
292+
ts_rank_cd(crates.content, plainto_tsquery($1), 32) as content_rank
293+
FROM
294+
crates
295+
INNER JOIN releases ON releases.crate_id = crates.id
296+
INNER JOIN (
297+
SELECT DISTINCT ON (crate_id)
298+
crate_id,
299+
version,
300+
description,
301+
target_name,
302+
release_time,
303+
rustdoc_status,
304+
yanked
305+
FROM
306+
releases
307+
ORDER BY
308+
crate_id,
309+
release_time DESC
310+
) AS latest_release ON latest_release.crate_id = crates.id
311+
312+
-- Filter crates that haven't been built and crates that have been yanked and
313+
-- crates that don't match the query closely enough
314+
WHERE
315+
latest_release.rustdoc_status
316+
AND NOT latest_release.yanked
293317
AND (
294318
-- Crates names that match the query sandwiched between wildcards will pass
295319
crates.name ILIKE CONCAT('%', $1, '%')
296320
-- Crate names with which the levenshtein distance is closer or equal to 3 will pass
297321
OR levenshtein_less_equal($1, crates.name, 3) <= 3
298322
-- Crates where their content matches the query will pass
299-
OR to_tsquery($2) @@ crates.content
323+
OR plainto_tsquery($1) @@ crates.content
300324
)
301-
GROUP BY crates.id
325+
326+
GROUP BY crates.id, releases.id
327+
302328
-- Ordering is prioritized by how closely the query matches the name, how closely the
303329
-- query matches the description finally how many downloads the crate has
304-
ORDER BY distance DESC,
330+
ORDER BY
331+
distance ASC,
305332
content_rank DESC,
306-
SUM(downloads) DESC
333+
downloads_total DESC
334+
307335
-- Allows pagination
308-
LIMIT $3 OFFSET $4",
309-
&[&query, &split_query, &limit, &offset],
310-
)
311-
.ok()?;
336+
LIMIT $2 OFFSET $3";
312337

338+
let rows = if let Ok(rows) = conn
339+
.query(statement, &[&query, &limit, &offset])
340+
.map_err(|err| dbg!(err))
341+
{
342+
rows
343+
} else {
344+
return (0, Vec::new());
345+
};
346+
347+
let total_results: i64 = rows.iter().next().map(|row| row.get(8)).unwrap_or_default();
313348
let packages: Vec<Release> = rows
314349
.into_iter()
315350
.map(|row| Release {
@@ -323,22 +358,7 @@ fn get_search_results(
323358
})
324359
.collect();
325360

326-
if !packages.is_empty() {
327-
// Get the total number of results that the query matches
328-
let rows = conn
329-
.query(
330-
"SELECT COUNT(*) FROM crates
331-
WHERE crates.name ILIKE CONCAT('%', CAST($1 AS TEXT), '%')
332-
OR levenshtein_less_equal(CAST($1 AS TEXT), crates.name, 3) <= 3
333-
OR crates.content @@ to_tsquery(CAST($2 AS TEXT))",
334-
&[&(query as &str), &(&split_query as &str)],
335-
)
336-
.unwrap();
337-
338-
Some((rows.get(0).get(0), packages))
339-
} else {
340-
None
341-
}
361+
(total_results, packages)
342362
}
343363

344364
pub fn home_page(req: &mut Request) -> IronResult<Response> {
@@ -607,20 +627,18 @@ pub fn search_handler(req: &mut Request) -> IronResult<Response> {
607627
}
608628
}
609629

610-
if let Some((_, results)) = get_search_results(&conn, &query, 1, RELEASES_IN_RELEASES) {
611-
// FIXME: There is no pagination
612-
Page::new(results)
613-
.set("search_query", &query)
614-
.title(&format!("Search results for '{}'", query))
615-
.to_resp("releases")
630+
let (_, results) = get_search_results(&conn, &query, 1, RELEASES_IN_RELEASES);
631+
let title = if results.is_empty() {
632+
format!("No results found for '{}'", query)
616633
} else {
617-
// Return an empty page with an error message and an intact query so that
618-
// the user can edit it
619-
Page::new("".to_string())
620-
.set("search_query", &query)
621-
.title(&format!("No results found for '{}'", query))
622-
.to_resp("releases")
623-
}
634+
format!("Search results for '{}'", query)
635+
};
636+
637+
// FIXME: There is no pagination
638+
Page::new(results)
639+
.set("search_query", &query)
640+
.title(&title)
641+
.to_resp("releases")
624642
} else {
625643
Err(IronError::new(Nope::NoResults, status::NotFound))
626644
}
@@ -674,3 +692,96 @@ pub fn build_queue_handler(req: &mut Request) -> IronResult<Response> {
674692
.set_true("releases_queue_tab")
675693
.to_resp("releases_queue")
676694
}
695+
696+
#[cfg(test)]
697+
mod tests {
698+
use super::*;
699+
use crate::test::{wrapper, TestDatabase};
700+
701+
#[test]
702+
fn database_search() {
703+
wrapper(|env| {
704+
let db = env.db();
705+
706+
db.fake_release().name("foo").version("0.0.0").create()?;
707+
db.fake_release()
708+
.name("bar-foo")
709+
.version("0.0.0")
710+
.create()?;
711+
db.fake_release()
712+
.name("foo-bar")
713+
.version("0.0.1")
714+
.create()?;
715+
db.fake_release().name("fo0").version("0.0.0").create()?;
716+
db.fake_release()
717+
.name("fool")
718+
.version("0.0.0")
719+
.build_result_successful(false)
720+
.create()?;
721+
db.fake_release()
722+
.name("freakin")
723+
.version("0.0.0")
724+
.create()?;
725+
db.fake_release()
726+
.name("something unreleated")
727+
.version("0.0.0")
728+
.create()?;
729+
730+
let (num_results, results) = get_search_results(&db.conn(), "foo", 1, 100);
731+
let mut results = results.into_iter();
732+
733+
assert_eq!(num_results, 4);
734+
735+
let expected = ["foo", "fo0", "bar-foo", "foo-bar"];
736+
for expected in expected.iter() {
737+
assert_eq!(expected, &results.next().unwrap().name);
738+
}
739+
assert!(results.collect::<Vec<_>>().is_empty());
740+
741+
Ok(())
742+
})
743+
}
744+
745+
fn non_exact(db: &TestDatabase) -> Result<(), crate::error::Error> {
746+
db.fake_release().name("reg3x").version("0.0.0").create()?;
747+
db.fake_release().name("regex-").version("0.0.0").create()?;
748+
db.fake_release()
749+
.name("regex-syntax")
750+
.version("0.0.0")
751+
.create()?;
752+
753+
Ok(())
754+
}
755+
756+
fn rest_non_exact(mut rest: Vec<Release>) {
757+
for name in ["reg3x", "regex-", "regex-syntax"].iter() {
758+
assert_eq!(rest.remove(0).name, *name);
759+
}
760+
761+
assert!(rest.is_empty());
762+
}
763+
764+
#[test]
765+
fn exacts_dont_care() {
766+
let near_matches = ["Regex", "rEgex", "reGex", "regEx", "regeX"];
767+
768+
for name in near_matches.iter() {
769+
wrapper(|env| {
770+
let db = env.db();
771+
db.fake_release().name(name).version("0.0.0").create()?;
772+
773+
non_exact(&db)?;
774+
775+
let (num_results, results) = get_search_results(&db.conn(), "foo", 1, 100);
776+
let mut results = results.into_iter();
777+
778+
assert_eq!(num_results, 4);
779+
780+
assert_eq!(&results.next().unwrap().name, "regex");
781+
rest_non_exact(results.collect());
782+
783+
Ok(())
784+
})
785+
}
786+
}
787+
}

0 commit comments

Comments
 (0)