Skip to content

Commit

Permalink
Merge in project-search fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisPenner committed Jul 25, 2024
2 parents 10416b9 + 158b64c commit 99f86e2
Show file tree
Hide file tree
Showing 60 changed files with 2,796 additions and 300 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ docker/tmp
*.prof
*.prof.html
prelude.output.md

# Scratch files
*.u
7 changes: 4 additions & 3 deletions app/Env.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import Share.Utils.Logging qualified as Logging
import Share.Utils.Servant.Cookies qualified as Cookies
import Share.Web.Authentication (cookieSessionTTL)
import Hasql.Pool qualified as Pool
import Hasql.Pool.Config qualified as Pool
import Network.URI (parseURI)
import Servant.API qualified as Servant
import System.Environment (lookupEnv)
Expand Down Expand Up @@ -97,11 +98,11 @@ withEnv action = do
let pgConnectionMaxIdleTime = Time.secondsToDiffTime (60 * 5) -- 5 minutes
-- Limiting max lifetime helps cycle connections which may have accumulated memory cruft.
let pgConnectionMaxLifetime = Time.secondsToDiffTime (60 * 60) -- 1 hour
pgConnectionPool <-
Pool.acquire postgresConnMax pgConnectionAcquisitionTimeout pgConnectionMaxLifetime pgConnectionMaxIdleTime (Text.encodeUtf8 postgresConfig)
let pgSettings = Pool.settings [Pool.staticConnectionSettings (Text.encodeUtf8 postgresConfig), Pool.size postgresConnMax, Pool.acquisitionTimeout pgConnectionAcquisitionTimeout, Pool.idlenessTimeout pgConnectionMaxIdleTime, Pool.agingTimeout pgConnectionMaxLifetime]
pgConnectionPool <- Pool.acquire pgSettings
timeCache <- FL.newTimeCache FL.simpleTimeFormat -- E.g. 05/Sep/2023:13:23:56 -0700
sandboxedRuntime <- RT.startRuntime True RT.Persistent "share"
let requestCtx = ()
let ctx = ()
-- We use a zero-width-space to separate log-lines on ingestion, this allows us to use newlines for
-- formatting, but without affecting log-grouping.
let zeroWidthSpace = "\x200B"
Expand Down
10 changes: 5 additions & 5 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ services:
POSTGRES_PASSWORD: sekrit
volumes:
- ../sql:/docker-entrypoint-initdb.d
# # Optionally persist the data between container invocations
# - postgresVolume:/var/lib/postgresql/data
# Optionally persist the data between container invocations
- postgresVolume:/var/lib/postgresql/data
- ./postgresql.conf:/etc/postgresql/postgresql.conf
command: postgres -c config_file=/etc/postgresql/postgresql.conf # -c log_statement=all
command: postgres -c config_file=/etc/postgresql/postgresql.conf # -c log_statement=all


redis:
Expand Down Expand Up @@ -85,5 +85,5 @@ services:
- redis
- postgres

# volumes:
# postgresVolume:
volumes:
postgresVolume:
4 changes: 4 additions & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,20 @@ dependencies:
- http-media
- http-types
- jose
- ki-unlifted
- lens
- megaparsec
- memory
- mmorph
- monad-validate
- monoidal-containers
- mtl
- network
- network-simple
- network-uri
- nonempty-containers
- parallel
- parser-combinators
- pem
- hasql
- hasql-pool
Expand Down Expand Up @@ -159,6 +162,7 @@ default-extensions:
- BlockArguments
- QuasiQuotes
- ImportQualifiedPost
- OverloadedRecordDot

library:
source-dirs: src
Expand Down
19 changes: 19 additions & 0 deletions share-api.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ library
Share
Share.App
Share.Backend
Share.BackgroundJobs
Share.BackgroundJobs.Errors
Share.BackgroundJobs.Monad
Share.BackgroundJobs.Search.DefinitionSync
Share.BackgroundJobs.Search.DefinitionSync.Types
Share.BackgroundJobs.Workers
Share.Branch
Share.Codebase
Share.Codebase.Types
Expand All @@ -45,6 +51,7 @@ library
Share.Postgres.Causal.Types
Share.Postgres.Comments.Queries
Share.Postgres.Contributions.Queries
Share.Postgres.Cursors
Share.Postgres.Definitions.Queries
Share.Postgres.Definitions.Types
Share.Postgres.Hashes.Queries
Expand All @@ -63,6 +70,8 @@ library
Share.Postgres.Projects.Queries
Share.Postgres.Queries
Share.Postgres.Refs.Types
Share.Postgres.Releases.Queries
Share.Postgres.Search.DefinitionSearch.Queries
Share.Postgres.Serialization
Share.Postgres.Sync.Conversions
Share.Postgres.Sync.Queries
Expand Down Expand Up @@ -118,6 +127,7 @@ library
Share.Web.Share.Contributions.Impl
Share.Web.Share.Contributions.MergeDetection
Share.Web.Share.Contributions.Types
Share.Web.Share.DefinitionSearch
Share.Web.Share.Diffs.Impl
Share.Web.Share.Diffs.Types
Share.Web.Share.Impl
Expand All @@ -144,6 +154,7 @@ library
Unison.Server.NameSearch.Postgres
Unison.Server.Share.Definitions
Unison.Server.Share.DefinitionSummary
Unison.Server.Share.DefinitionSummary.Types
Unison.Server.Share.Docs
Unison.Server.Share.FuzzyFind
Unison.Server.Share.NamespaceDetails
Expand Down Expand Up @@ -180,6 +191,7 @@ library
BlockArguments
QuasiQuotes
ImportQualifiedPost
OverloadedRecordDot
ghc-options: -Wall -Werror -Wname-shadowing -Wno-type-defaults -Wno-missing-pattern-synonym-signatures -fprint-expanded-synonyms -fwrite-ide-info -O2 -funbox-strict-fields
build-depends:
Diff
Expand Down Expand Up @@ -217,17 +229,20 @@ library
, http-media
, http-types
, jose
, ki-unlifted
, lens
, megaparsec
, memory
, mmorph
, monad-validate
, monoidal-containers
, mtl
, network
, network-simple
, network-uri
, nonempty-containers
, parallel
, parser-combinators
, pem
, prometheus-client
, prometheus-metrics-ghc
Expand Down Expand Up @@ -319,6 +334,7 @@ executable share-api
BlockArguments
QuasiQuotes
ImportQualifiedPost
OverloadedRecordDot
ghc-options: -Wall -Werror -Wname-shadowing -Wno-type-defaults -Wno-missing-pattern-synonym-signatures -fprint-expanded-synonyms -fwrite-ide-info -O2 -funbox-strict-fields -threaded -rtsopts "-with-rtsopts=-N -A32m -qn2 -T"
build-depends:
Diff
Expand Down Expand Up @@ -356,17 +372,20 @@ executable share-api
, http-media
, http-types
, jose
, ki-unlifted
, lens
, megaparsec
, memory
, mmorph
, monad-validate
, monoidal-containers
, mtl
, network
, network-simple
, network-uri
, nonempty-containers
, parallel
, parser-combinators
, pem
, prometheus-client
, prometheus-metrics-ghc
Expand Down
1 change: 1 addition & 0 deletions share-utils/package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ dependencies:
- jose
- memory
- network-uri
- pretty-simple
- random
- servant-auth
- servant-server
Expand Down
4 changes: 3 additions & 1 deletion share-utils/share-utils.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 1.12

-- This file has been generated from package.yaml by hpack version 0.35.2.
-- This file has been generated from package.yaml by hpack version 0.36.0.
--
-- see: https://github.com/sol/hpack

Expand All @@ -24,6 +24,7 @@ source-repository head

library
exposed-modules:
Share.Debug
Share.Utils.Binary
Share.Utils.Deployment
Share.Utils.IDs
Expand Down Expand Up @@ -83,6 +84,7 @@ library
, lens
, memory
, network-uri
, pretty-simple
, random
, servant-auth
, servant-server
Expand Down
92 changes: 92 additions & 0 deletions share-utils/src/Share/Debug.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{-# LANGUAGE OverloadedStrings #-}
-- pTrace is marked deprecated so you get warnings when you use it.
{-# OPTIONS_GHC -Wno-deprecations #-}

module Share.Debug
( debug,
debugM,
whenDebug,
debugLog,
debugLogM,
shouldDebug,
DebugFlag (..),
)
where

import Control.Monad
import Data.Set (Set)
import Data.Set qualified as Set
import Data.Text qualified as Text
import Debug.Pretty.Simple (pTrace, pTraceM)
import Debug.Trace
import System.IO.Unsafe (unsafePerformIO)
import Text.Pretty.Simple (pShow)
import UnliftIO.Environment (lookupEnv)
import Witch (into)

data DebugFlag
= Timing
| Queries
deriving (Eq, Ord, Show, Bounded, Enum)

debugFlags :: Set DebugFlag
debugFlags = case (unsafePerformIO (lookupEnv "SHARE_DEBUG")) of
Nothing -> Set.empty
Just "" -> Set.fromList [minBound .. maxBound]
Just s -> Set.fromList $ do
w <- (Text.splitOn "," . Text.pack $ s)
case Text.toUpper . Text.strip $ w of
"TIMING" -> pure Timing
"QUERIES" -> pure Queries
_ -> mempty
{-# NOINLINE debugFlags #-}

debugTiming :: Bool
debugTiming = Timing `Set.member` debugFlags
{-# NOINLINE debugTiming #-}

debugQueries :: Bool
debugQueries = Queries `Set.member` debugFlags
{-# NOINLINE debugQueries #-}

-- | Use for trace-style selective debugging.
-- E.g. 1 + (debug Sync "The second number" 2)
--
-- Or, use in pattern matching to view arguments.
-- E.g.
-- myFunc (debug Sync "argA" -> argA) = ...
debug :: (Show a) => DebugFlag -> String -> a -> a
debug flag msg a =
if shouldDebug flag
then (trace (msg <> ":\n" <> into @String (pShow a)) a)
else a

-- | Use for selective debug logging in monadic contexts.
-- E.g.
-- do
-- debugM Sync "source repo" srcRepo
-- ...
debugM :: (Show a, Monad m) => DebugFlag -> String -> a -> m ()
debugM flag msg a =
whenDebug flag do
traceM (msg <> ":\n" <> into @String (pShow a))

debugLog :: DebugFlag -> String -> a -> a
debugLog flag msg =
if shouldDebug flag
then pTrace msg
else id

debugLogM :: (Monad m) => DebugFlag -> String -> m ()
debugLogM flag msg =
whenDebug flag $ pTraceM msg

-- | A 'when' block which is triggered if the given flag is being debugged.
whenDebug :: (Monad m) => DebugFlag -> m () -> m ()
whenDebug flag action = do
when (shouldDebug flag) action

shouldDebug :: DebugFlag -> Bool
shouldDebug = \case
Timing -> debugTiming
Queries -> debugQueries
3 changes: 3 additions & 0 deletions share-utils/src/Share/Utils/IDs.hs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ instance (IsID a, Typeable a) => Hasql.DecodeValue (UsingID a) where
Hasql.decodeValue
& Decoder.refine \txt -> fromText txt <&> UsingID

instance (IsID a) => From (UsingID a) Text where
from = toText

-- | CI doesnt' expose its internal constructor so we can't derive via without adding our own
-- instances.
newtype CaseInsensitiveID = CaseInsensitiveID (CI Text)
Expand Down
43 changes: 43 additions & 0 deletions sql/2024-07-18-00-00_defn_search.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-- Allows us to create composite indexes over traditionally non-GIN indexable types.
-- In this case it allows us to include the project_id and release_id in the GIN index for search tokens.
CREATE EXTENSION IF NOT EXISTS btree_gin;

-- Allows us to create trigram indexes for fuzzy searching.
CREATE EXTENSION IF NOT EXISTS pg_trgm;

-- New table for coordinating background job for syncing global definitions for search.

-- Table of all releases which have been published, but not yet synced to the global definition search index.
CREATE TABLE global_definition_search_release_queue (
release_id UUID PRIMARY KEY REFERENCES project_releases(id) ON DELETE CASCADE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP
);

-- Every defn fits into one of these categories.
CREATE TYPE definition_tag AS ENUM ('doc', 'test', 'plain', 'data', 'ability', 'data-constructor', 'ability-constructor');

CREATE TABLE global_definition_search_docs (
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
release_id UUID NOT NULL REFERENCES project_releases(id) ON DELETE CASCADE,
-- Fully qualified name
name TEXT NOT NULL,
search_tokens TSVECTOR NOT NULL,
-- Number of arguments. 0 for values.
arity INT NOT NULL,
tag definition_tag NOT NULL,

-- Contains the rendered type signature, type, hash, etc.
-- so we don't need to look up types for hundreds of search results on the fly.
metadata JSONB NOT NULL,

-- Ostensibly there's the possibility of name conflicts,
-- but those are rare enough we don't care, we just insert with ON CONFLICT DO NOTHING.
PRIMARY KEY (project_id, release_id, name)
);

-- Index for searching global definitions by 'search token', with an optional project/release filter.
-- P.s. there's a search token type for name, so we don't need to index that separately.
CREATE INDEX global_definition_search_tokens ON global_definition_search_docs USING GIN(search_tokens, tag, project_id, release_id);

-- Index for fuzzy-searching on the fully qualified name.
CREATE INDEX global_definition_search_name_trigram ON global_definition_search_docs USING GIST (name gist_trgm_ops);
Loading

0 comments on commit 99f86e2

Please sign in to comment.