Skip to content

Commit 1ae58eb

Browse files
committed
Use trigram search for single word queries
1 parent 120ae61 commit 1ae58eb

File tree

4 files changed

+195
-23
lines changed

4 files changed

+195
-23
lines changed

src/Share/Postgres/Search/DefinitionSearch/Queries.hs

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ module Share.Postgres.Search.DefinitionSearch.Queries
66
claimUnsyncedRelease,
77
insertDefinitionDocuments,
88
cleanIndexForRelease,
9-
defNameInfixSearch,
10-
definitionSearch,
9+
defNameCompletionSearch,
10+
definitionTokenSearch,
11+
definitionNameSearch,
1112
DefnNameSearchFilter (..),
1213
-- Exported for logging/debugging
1314
searchTokensToTsQuery,
@@ -268,9 +269,9 @@ data DefnNameSearchFilter
268269
| ReleaseFilter ReleaseId
269270
| UserFilter UserId
270271

271-
-- | Find definitions whose name contains the query.
272-
defNameInfixSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeTag)]
273-
defNameInfixSearch mayCaller mayFilter (Query query) limit = do
272+
-- | Find names which would be valid completions for the given query.
273+
defNameCompletionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeTag)]
274+
defNameCompletionSearch mayCaller mayFilter (Query query) limit = do
274275
let filters = case mayFilter of
275276
Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |]
276277
Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |]
@@ -309,8 +310,9 @@ defNameInfixSearch mayCaller mayFilter (Query query) limit = do
309310
-- Names are stored in absolute form, but we usually work with them in relative form.
310311
<&> over (traversed . _3) Name.makeRelative
311312

312-
definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either Name ShortHash)) -> Maybe Arity -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeSummary)]
313-
definitionSearch mayCaller mayFilter limit searchTokens preferredArity = do
313+
-- | Perform a type search for the given tokens.
314+
definitionTokenSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either Name ShortHash)) -> Maybe Arity -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeSummary)]
315+
definitionTokenSearch mayCaller mayFilter limit searchTokens preferredArity = do
314316
let filters = case mayFilter of
315317
Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |]
316318
Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |]
@@ -355,3 +357,40 @@ definitionSearch mayCaller mayFilter limit searchTokens preferredArity = do
355357
Aeson.Error err -> unrecoverableError $ FailedToDecodeMetadata v (Text.pack err)
356358
Aeson.Success summary -> pure summary
357359
)
360+
361+
-- | Perform a fuzzy trigram search on definition names
362+
definitionNameSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Query -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeSummary)]
363+
definitionNameSearch mayCaller mayFilter limit (Query query) = do
364+
let filters = case mayFilter of
365+
Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |]
366+
Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |]
367+
Just (UserFilter userId) -> [sql| AND p.owner_user_id = #{userId} |]
368+
Nothing -> mempty
369+
rows <-
370+
queryListRows @(ProjectId, ReleaseId, Name, Hasql.Jsonb)
371+
[sql|
372+
WITH matches_deduped_by_project(project_id, release_id, name, metadata) AS (
373+
SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.metadata FROM global_definition_search_docs doc
374+
JOIN projects p ON p.id = doc.project_id
375+
JOIN project_releases r ON r.id = doc.release_id
376+
WHERE
377+
-- Adjust the similarity threshold as needed (between 0 and 1)
378+
word_similarity(#{query}, doc.name) >= 0.5
379+
AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects pp WHERE pp.user_id = #{mayCaller} AND pp.project_id = p.id)))
380+
^{filters}
381+
ORDER BY doc.project_id, doc.name, r.major_version, r.minor_version, r.patch_version
382+
) SELECT m.project_id, m.release_id, m.name, m.metadata
383+
FROM matches_deduped_by_project m
384+
-- Score matches by name similarity
385+
ORDER BY word_similarity(#{query}, m.name) DESC
386+
LIMIT #{limit}
387+
|]
388+
rows
389+
& over (traversed . _3) Name.makeRelative
390+
& traverseOf
391+
(traversed . _4)
392+
( \(Hasql.Jsonb v) -> do
393+
case fromJSON v of
394+
Aeson.Error err -> unrecoverableError $ FailedToDecodeMetadata v (Text.pack err)
395+
Aeson.Success summary -> pure summary
396+
)

src/Share/Web/Share/Impl.hs

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ searchDefinitionNamesEndpoint ::
383383
searchDefinitionNamesEndpoint callerUserId query@(Query queryText) mayLimit userFilter projectFilter releaseFilter = do
384384
filter <- runMaybeT $ resolveProjectAndReleaseFilter projectFilter releaseFilter <|> resolveUserFilter (IDs.unPrefix <$> userFilter)
385385
matches <-
386-
(PG.runTransaction $ DDQ.defNameInfixSearch callerUserId filter query limit)
386+
(PG.runTransaction $ DDQ.defNameCompletionSearch callerUserId filter query limit)
387387
<&> ordNubOn (view _3) . (mapMaybe $ traverseOf _3 (rewriteMatches queryText))
388388
let response = matches <&> \(_projId, _releaseId, name, tag) -> DefinitionNameSearchResult name tag
389389
pure response
@@ -438,27 +438,39 @@ searchDefinitionsEndpoint ::
438438
searchDefinitionsEndpoint callerUserId (Query query) mayLimit userFilter projectFilter releaseFilter = do
439439
Logging.logInfoText $ "definition-search-query: " <> query
440440
filter <- runMaybeT $ resolveProjectAndReleaseFilter projectFilter releaseFilter <|> resolveUserFilter (IDs.unPrefix <$> userFilter)
441-
case DefinitionSearch.queryToTokens query of
442-
Left _err -> do
443-
Logging.logErrorText $ "Failed to parse query: " <> query
444-
pure $ DefinitionSearchResults []
445-
Right (searchTokens, mayArity) -> do
446-
Logging.logInfoText $ "definition-search-tokens: " <> DSQ.searchTokensToTsQuery searchTokens
447-
matches <-
448-
PG.runTransactionMode PG.ReadCommitted PG.Read $
449-
DDQ.definitionSearch callerUserId filter limit searchTokens mayArity
450-
>>= PQ.expectProjectShortHandsOf (traversed . _1)
451-
>>= RQ.expectReleaseVersionsOf (traversed . _2)
452-
<&> over (traversed . _2) IDs.ReleaseShortHand
453-
let results =
454-
matches <&> \(project, release, fqn, summary) ->
441+
matches <- case Text.words query of
442+
[] -> pure $ []
443+
[":"] -> pure $ []
444+
-- If the query is a single word, and it doesn't contain "->", we treat it as a name search
445+
[name]
446+
| not (Text.isInfixOf "->" name) ->
447+
PG.runTransactionMode PG.ReadCommitted PG.Read $
448+
DDQ.definitionNameSearch callerUserId filter limit (Query name)
449+
_ -> do
450+
case DefinitionSearch.queryToTokens query of
451+
Left _err -> do
452+
Logging.logErrorText $ "Failed to parse query: " <> query
453+
pure $ []
454+
Right (searchTokens, mayArity) -> do
455+
Logging.logInfoText $ "definition-search-tokens: " <> DSQ.searchTokensToTsQuery searchTokens
456+
PG.runTransactionMode PG.ReadCommitted PG.Read $
457+
DDQ.definitionTokenSearch callerUserId filter limit searchTokens mayArity
458+
results <-
459+
PG.runTransactionMode PG.ReadCommitted PG.Read $
460+
do
461+
PQ.expectProjectShortHandsOf (traversed . _1) matches
462+
>>= RQ.expectReleaseVersionsOf (traversed . _2)
463+
<&> over (traversed . _2) IDs.ReleaseShortHand
464+
<&> fmap
465+
( \(project, release, fqn, summary) ->
455466
DefinitionSearchResult
456467
{ fqn,
457468
summary,
458469
project,
459470
release
460471
}
461-
pure $ DefinitionSearchResults results
472+
)
473+
pure $ DefinitionSearchResults results
462474
where
463475
limit = fromMaybe (Limit 20) mayLimit
464476

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
{
2+
"body": {
3+
"results": [
4+
{
5+
"branchRef": "releases/1.2.3",
6+
"definition": {
7+
"displayName": "data.Text",
8+
"hash": "##Text",
9+
"summary": {
10+
"contents": [
11+
{
12+
"annotation": null,
13+
"segment": "data.Text"
14+
}
15+
],
16+
"tag": "BuiltinObject"
17+
},
18+
"tag": "Data"
19+
},
20+
"fqn": "data.Text",
21+
"kind": "type",
22+
"projectRef": "@transcripts/search"
23+
},
24+
{
25+
"branchRef": "releases/1.2.3",
26+
"definition": {
27+
"displayName": "Nat.fromText",
28+
"hash": "#he2h9hm6c2obcmnvp2g17dcg1l65h9t0q5mamjlrl2lk0flmjioc5j350l0386r88ho1u1ncqklh31qdqgrmbltrincivfcjcsmdeo0",
29+
"summary": {
30+
"contents": [
31+
{
32+
"annotation": {
33+
"contents": "##Text",
34+
"tag": "TypeReference"
35+
},
36+
"segment": "Text"
37+
},
38+
{
39+
"annotation": null,
40+
"segment": " "
41+
},
42+
{
43+
"annotation": {
44+
"tag": "TypeOperator"
45+
},
46+
"segment": "->"
47+
},
48+
{
49+
"annotation": null,
50+
"segment": " "
51+
},
52+
{
53+
"annotation": {
54+
"contents": "##Nat",
55+
"tag": "TypeReference"
56+
},
57+
"segment": "Nat"
58+
}
59+
],
60+
"tag": "UserObject"
61+
},
62+
"tag": "Plain"
63+
},
64+
"fqn": "Nat.fromText",
65+
"kind": "term",
66+
"projectRef": "@transcripts/search"
67+
},
68+
{
69+
"branchRef": "releases/1.2.3",
70+
"definition": {
71+
"displayName": "Nat.toText",
72+
"hash": "#9da94h7t7rhg55vdur11jle4b1r8tn8ptpqc4m4hg8tve9vb57g64av66ghc0dpoc4r5e4f7ippu9qja7nqcq1c73cvd5fuvl0o6efo",
73+
"summary": {
74+
"contents": [
75+
{
76+
"annotation": {
77+
"contents": "##Nat",
78+
"tag": "TypeReference"
79+
},
80+
"segment": "Nat"
81+
},
82+
{
83+
"annotation": null,
84+
"segment": " "
85+
},
86+
{
87+
"annotation": {
88+
"tag": "TypeOperator"
89+
},
90+
"segment": "->"
91+
},
92+
{
93+
"annotation": null,
94+
"segment": " "
95+
},
96+
{
97+
"annotation": {
98+
"contents": "##Text",
99+
"tag": "TypeReference"
100+
},
101+
"segment": "Text"
102+
}
103+
],
104+
"tag": "UserObject"
105+
},
106+
"tag": "Plain"
107+
},
108+
"fqn": "Nat.toText",
109+
"kind": "term",
110+
"projectRef": "@transcripts/search"
111+
}
112+
]
113+
},
114+
"status": [
115+
{
116+
"status_code": 200
117+
}
118+
]
119+
}

transcripts/share-apis/search/run.zsh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ fetch "$transcript_user" GET 'name-search-release-filter-sad' '/search-names?que
5555
# "b -> a -> a"
5656
fetch "$transcript_user" GET 'type-var-search' '/search-definitions?query=b%20-%3E%20a%20-%3E%20a'
5757

58+
# Fuzzy search, should find 'Text' and 'Nat.toText' and 'Nat.fromText'
59+
fetch "$transcript_user" GET 'defn-search-fuzzy' '/search-definitions?query=Text'
5860

5961
# (a -> b) -> List a -> List b
6062
fetch "$transcript_user" GET 'complex-type-mention-search' '/search-definitions?query=(a%20-%3E%20b)%20-%3E%20List%20a%20-%3E%20List%20b'

0 commit comments

Comments
 (0)