Skip to content

Commit 99f86e2

Browse files
committed
Merge in project-search fixes
2 parents 10416b9 + 158b64c commit 99f86e2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+2796
-300
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ docker/tmp
1515
*.prof
1616
*.prof.html
1717
prelude.output.md
18+
19+
# Scratch files
20+
*.u

app/Env.hs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import Share.Utils.Logging qualified as Logging
2323
import Share.Utils.Servant.Cookies qualified as Cookies
2424
import Share.Web.Authentication (cookieSessionTTL)
2525
import Hasql.Pool qualified as Pool
26+
import Hasql.Pool.Config qualified as Pool
2627
import Network.URI (parseURI)
2728
import Servant.API qualified as Servant
2829
import System.Environment (lookupEnv)
@@ -97,11 +98,11 @@ withEnv action = do
9798
let pgConnectionMaxIdleTime = Time.secondsToDiffTime (60 * 5) -- 5 minutes
9899
-- Limiting max lifetime helps cycle connections which may have accumulated memory cruft.
99100
let pgConnectionMaxLifetime = Time.secondsToDiffTime (60 * 60) -- 1 hour
100-
pgConnectionPool <-
101-
Pool.acquire postgresConnMax pgConnectionAcquisitionTimeout pgConnectionMaxLifetime pgConnectionMaxIdleTime (Text.encodeUtf8 postgresConfig)
101+
let pgSettings = Pool.settings [Pool.staticConnectionSettings (Text.encodeUtf8 postgresConfig), Pool.size postgresConnMax, Pool.acquisitionTimeout pgConnectionAcquisitionTimeout, Pool.idlenessTimeout pgConnectionMaxIdleTime, Pool.agingTimeout pgConnectionMaxLifetime]
102+
pgConnectionPool <- Pool.acquire pgSettings
102103
timeCache <- FL.newTimeCache FL.simpleTimeFormat -- E.g. 05/Sep/2023:13:23:56 -0700
103104
sandboxedRuntime <- RT.startRuntime True RT.Persistent "share"
104-
let requestCtx = ()
105+
let ctx = ()
105106
-- We use a zero-width-space to separate log-lines on ingestion, this allows us to use newlines for
106107
-- formatting, but without affecting log-grouping.
107108
let zeroWidthSpace = "\x200B"

docker/docker-compose.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ services:
1717
POSTGRES_PASSWORD: sekrit
1818
volumes:
1919
- ../sql:/docker-entrypoint-initdb.d
20-
# # Optionally persist the data between container invocations
21-
# - postgresVolume:/var/lib/postgresql/data
20+
# Optionally persist the data between container invocations
21+
- postgresVolume:/var/lib/postgresql/data
2222
- ./postgresql.conf:/etc/postgresql/postgresql.conf
23-
command: postgres -c config_file=/etc/postgresql/postgresql.conf # -c log_statement=all
23+
command: postgres -c config_file=/etc/postgresql/postgresql.conf # -c log_statement=all
2424

2525

2626
redis:
@@ -85,5 +85,5 @@ services:
8585
- redis
8686
- postgres
8787

88-
# volumes:
89-
# postgresVolume:
88+
volumes:
89+
postgresVolume:

package.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,20 @@ dependencies:
6262
- http-media
6363
- http-types
6464
- jose
65+
- ki-unlifted
6566
- lens
6667
- megaparsec
6768
- memory
6869
- mmorph
6970
- monad-validate
71+
- monoidal-containers
7072
- mtl
7173
- network
7274
- network-simple
7375
- network-uri
7476
- nonempty-containers
7577
- parallel
78+
- parser-combinators
7679
- pem
7780
- hasql
7881
- hasql-pool
@@ -159,6 +162,7 @@ default-extensions:
159162
- BlockArguments
160163
- QuasiQuotes
161164
- ImportQualifiedPost
165+
- OverloadedRecordDot
162166

163167
library:
164168
source-dirs: src

share-api.cabal

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ library
2727
Share
2828
Share.App
2929
Share.Backend
30+
Share.BackgroundJobs
31+
Share.BackgroundJobs.Errors
32+
Share.BackgroundJobs.Monad
33+
Share.BackgroundJobs.Search.DefinitionSync
34+
Share.BackgroundJobs.Search.DefinitionSync.Types
35+
Share.BackgroundJobs.Workers
3036
Share.Branch
3137
Share.Codebase
3238
Share.Codebase.Types
@@ -45,6 +51,7 @@ library
4551
Share.Postgres.Causal.Types
4652
Share.Postgres.Comments.Queries
4753
Share.Postgres.Contributions.Queries
54+
Share.Postgres.Cursors
4855
Share.Postgres.Definitions.Queries
4956
Share.Postgres.Definitions.Types
5057
Share.Postgres.Hashes.Queries
@@ -63,6 +70,8 @@ library
6370
Share.Postgres.Projects.Queries
6471
Share.Postgres.Queries
6572
Share.Postgres.Refs.Types
73+
Share.Postgres.Releases.Queries
74+
Share.Postgres.Search.DefinitionSearch.Queries
6675
Share.Postgres.Serialization
6776
Share.Postgres.Sync.Conversions
6877
Share.Postgres.Sync.Queries
@@ -118,6 +127,7 @@ library
118127
Share.Web.Share.Contributions.Impl
119128
Share.Web.Share.Contributions.MergeDetection
120129
Share.Web.Share.Contributions.Types
130+
Share.Web.Share.DefinitionSearch
121131
Share.Web.Share.Diffs.Impl
122132
Share.Web.Share.Diffs.Types
123133
Share.Web.Share.Impl
@@ -144,6 +154,7 @@ library
144154
Unison.Server.NameSearch.Postgres
145155
Unison.Server.Share.Definitions
146156
Unison.Server.Share.DefinitionSummary
157+
Unison.Server.Share.DefinitionSummary.Types
147158
Unison.Server.Share.Docs
148159
Unison.Server.Share.FuzzyFind
149160
Unison.Server.Share.NamespaceDetails
@@ -180,6 +191,7 @@ library
180191
BlockArguments
181192
QuasiQuotes
182193
ImportQualifiedPost
194+
OverloadedRecordDot
183195
ghc-options: -Wall -Werror -Wname-shadowing -Wno-type-defaults -Wno-missing-pattern-synonym-signatures -fprint-expanded-synonyms -fwrite-ide-info -O2 -funbox-strict-fields
184196
build-depends:
185197
Diff
@@ -217,17 +229,20 @@ library
217229
, http-media
218230
, http-types
219231
, jose
232+
, ki-unlifted
220233
, lens
221234
, megaparsec
222235
, memory
223236
, mmorph
224237
, monad-validate
238+
, monoidal-containers
225239
, mtl
226240
, network
227241
, network-simple
228242
, network-uri
229243
, nonempty-containers
230244
, parallel
245+
, parser-combinators
231246
, pem
232247
, prometheus-client
233248
, prometheus-metrics-ghc
@@ -319,6 +334,7 @@ executable share-api
319334
BlockArguments
320335
QuasiQuotes
321336
ImportQualifiedPost
337+
OverloadedRecordDot
322338
ghc-options: -Wall -Werror -Wname-shadowing -Wno-type-defaults -Wno-missing-pattern-synonym-signatures -fprint-expanded-synonyms -fwrite-ide-info -O2 -funbox-strict-fields -threaded -rtsopts "-with-rtsopts=-N -A32m -qn2 -T"
323339
build-depends:
324340
Diff
@@ -356,17 +372,20 @@ executable share-api
356372
, http-media
357373
, http-types
358374
, jose
375+
, ki-unlifted
359376
, lens
360377
, megaparsec
361378
, memory
362379
, mmorph
363380
, monad-validate
381+
, monoidal-containers
364382
, mtl
365383
, network
366384
, network-simple
367385
, network-uri
368386
, nonempty-containers
369387
, parallel
388+
, parser-combinators
370389
, pem
371390
, prometheus-client
372391
, prometheus-metrics-ghc

share-utils/package.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ dependencies:
7171
- jose
7272
- memory
7373
- network-uri
74+
- pretty-simple
7475
- random
7576
- servant-auth
7677
- servant-server

share-utils/share-utils.cabal

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cabal-version: 1.12
22

3-
-- This file has been generated from package.yaml by hpack version 0.35.2.
3+
-- This file has been generated from package.yaml by hpack version 0.36.0.
44
--
55
-- see: https://github.com/sol/hpack
66

@@ -24,6 +24,7 @@ source-repository head
2424

2525
library
2626
exposed-modules:
27+
Share.Debug
2728
Share.Utils.Binary
2829
Share.Utils.Deployment
2930
Share.Utils.IDs
@@ -83,6 +84,7 @@ library
8384
, lens
8485
, memory
8586
, network-uri
87+
, pretty-simple
8688
, random
8789
, servant-auth
8890
, servant-server

share-utils/src/Share/Debug.hs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
{-# LANGUAGE OverloadedStrings #-}
2+
-- pTrace is marked deprecated so you get warnings when you use it.
3+
{-# OPTIONS_GHC -Wno-deprecations #-}
4+
5+
module Share.Debug
6+
( debug,
7+
debugM,
8+
whenDebug,
9+
debugLog,
10+
debugLogM,
11+
shouldDebug,
12+
DebugFlag (..),
13+
)
14+
where
15+
16+
import Control.Monad
17+
import Data.Set (Set)
18+
import Data.Set qualified as Set
19+
import Data.Text qualified as Text
20+
import Debug.Pretty.Simple (pTrace, pTraceM)
21+
import Debug.Trace
22+
import System.IO.Unsafe (unsafePerformIO)
23+
import Text.Pretty.Simple (pShow)
24+
import UnliftIO.Environment (lookupEnv)
25+
import Witch (into)
26+
27+
data DebugFlag
28+
= Timing
29+
| Queries
30+
deriving (Eq, Ord, Show, Bounded, Enum)
31+
32+
debugFlags :: Set DebugFlag
33+
debugFlags = case (unsafePerformIO (lookupEnv "SHARE_DEBUG")) of
34+
Nothing -> Set.empty
35+
Just "" -> Set.fromList [minBound .. maxBound]
36+
Just s -> Set.fromList $ do
37+
w <- (Text.splitOn "," . Text.pack $ s)
38+
case Text.toUpper . Text.strip $ w of
39+
"TIMING" -> pure Timing
40+
"QUERIES" -> pure Queries
41+
_ -> mempty
42+
{-# NOINLINE debugFlags #-}
43+
44+
debugTiming :: Bool
45+
debugTiming = Timing `Set.member` debugFlags
46+
{-# NOINLINE debugTiming #-}
47+
48+
debugQueries :: Bool
49+
debugQueries = Queries `Set.member` debugFlags
50+
{-# NOINLINE debugQueries #-}
51+
52+
-- | Use for trace-style selective debugging.
53+
-- E.g. 1 + (debug Sync "The second number" 2)
54+
--
55+
-- Or, use in pattern matching to view arguments.
56+
-- E.g.
57+
-- myFunc (debug Sync "argA" -> argA) = ...
58+
debug :: (Show a) => DebugFlag -> String -> a -> a
59+
debug flag msg a =
60+
if shouldDebug flag
61+
then (trace (msg <> ":\n" <> into @String (pShow a)) a)
62+
else a
63+
64+
-- | Use for selective debug logging in monadic contexts.
65+
-- E.g.
66+
-- do
67+
-- debugM Sync "source repo" srcRepo
68+
-- ...
69+
debugM :: (Show a, Monad m) => DebugFlag -> String -> a -> m ()
70+
debugM flag msg a =
71+
whenDebug flag do
72+
traceM (msg <> ":\n" <> into @String (pShow a))
73+
74+
debugLog :: DebugFlag -> String -> a -> a
75+
debugLog flag msg =
76+
if shouldDebug flag
77+
then pTrace msg
78+
else id
79+
80+
debugLogM :: (Monad m) => DebugFlag -> String -> m ()
81+
debugLogM flag msg =
82+
whenDebug flag $ pTraceM msg
83+
84+
-- | A 'when' block which is triggered if the given flag is being debugged.
85+
whenDebug :: (Monad m) => DebugFlag -> m () -> m ()
86+
whenDebug flag action = do
87+
when (shouldDebug flag) action
88+
89+
shouldDebug :: DebugFlag -> Bool
90+
shouldDebug = \case
91+
Timing -> debugTiming
92+
Queries -> debugQueries

share-utils/src/Share/Utils/IDs.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ instance (IsID a, Typeable a) => Hasql.DecodeValue (UsingID a) where
122122
Hasql.decodeValue
123123
& Decoder.refine \txt -> fromText txt <&> UsingID
124124

125+
instance (IsID a) => From (UsingID a) Text where
126+
from = toText
127+
125128
-- | CI doesnt' expose its internal constructor so we can't derive via without adding our own
126129
-- instances.
127130
newtype CaseInsensitiveID = CaseInsensitiveID (CI Text)

sql/2024-07-18-00-00_defn_search.sql

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
-- Allows us to create composite indexes over traditionally non-GIN indexable types.
2+
-- In this case it allows us to include the project_id and release_id in the GIN index for search tokens.
3+
CREATE EXTENSION IF NOT EXISTS btree_gin;
4+
5+
-- Allows us to create trigram indexes for fuzzy searching.
6+
CREATE EXTENSION IF NOT EXISTS pg_trgm;
7+
8+
-- New table for coordinating background job for syncing global definitions for search.
9+
10+
-- Table of all releases which have been published, but not yet synced to the global definition search index.
11+
CREATE TABLE global_definition_search_release_queue (
12+
release_id UUID PRIMARY KEY REFERENCES project_releases(id) ON DELETE CASCADE,
13+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP
14+
);
15+
16+
-- Every defn fits into one of these categories.
17+
CREATE TYPE definition_tag AS ENUM ('doc', 'test', 'plain', 'data', 'ability', 'data-constructor', 'ability-constructor');
18+
19+
CREATE TABLE global_definition_search_docs (
20+
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
21+
release_id UUID NOT NULL REFERENCES project_releases(id) ON DELETE CASCADE,
22+
-- Fully qualified name
23+
name TEXT NOT NULL,
24+
search_tokens TSVECTOR NOT NULL,
25+
-- Number of arguments. 0 for values.
26+
arity INT NOT NULL,
27+
tag definition_tag NOT NULL,
28+
29+
-- Contains the rendered type signature, type, hash, etc.
30+
-- so we don't need to look up types for hundreds of search results on the fly.
31+
metadata JSONB NOT NULL,
32+
33+
-- Ostensibly there's the possibility of name conflicts,
34+
-- but those are rare enough we don't care, we just insert with ON CONFLICT DO NOTHING.
35+
PRIMARY KEY (project_id, release_id, name)
36+
);
37+
38+
-- Index for searching global definitions by 'search token', with an optional project/release filter.
39+
-- P.s. there's a search token type for name, so we don't need to index that separately.
40+
CREATE INDEX global_definition_search_tokens ON global_definition_search_docs USING GIN(search_tokens, tag, project_id, release_id);
41+
42+
-- Index for fuzzy-searching on the fully qualified name.
43+
CREATE INDEX global_definition_search_name_trigram ON global_definition_search_docs USING GIST (name gist_trgm_ops);

0 commit comments

Comments
 (0)