From 56462dd79865877564dd784b00f713a9f8d64d5c Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 7 May 2024 15:07:45 -0700 Subject: [PATCH 01/48] Add pg cursors helpers for streaming --- share-api.cabal | 1 + src/Share/Postgres.hs | 14 ++++----- src/Share/Postgres/Cursors.hs | 53 +++++++++++++++++++++++++++++++++++ src/Share/Prelude/Orphans.hs | 7 +++++ 4 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 src/Share/Postgres/Cursors.hs diff --git a/share-api.cabal b/share-api.cabal index 4fae0bb..0a10282 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -45,6 +45,7 @@ library Share.Postgres.Causal.Types Share.Postgres.Comments.Queries Share.Postgres.Contributions.Queries + Share.Postgres.Cursors Share.Postgres.Definitions.Queries Share.Postgres.Definitions.Types Share.Postgres.Hashes.Queries diff --git a/src/Share/Postgres.hs b/src/Share/Postgres.hs index 68b1680..06b1e51 100644 --- a/src/Share/Postgres.hs +++ b/src/Share/Postgres.hs @@ -71,6 +71,13 @@ import Data.Map qualified as Map import Data.Maybe import Data.Text qualified as Text import Data.Void +import Hasql.Decoders qualified as Decoders +import Hasql.Encoders qualified as Encoders +import Hasql.Interpolate qualified as Interp +import Hasql.Pool qualified as Pool +import Hasql.Session qualified as Hasql +import Hasql.Session qualified as Session +import Hasql.Statement qualified as Hasql import Share.App import Share.Env qualified as Env import Share.Postgres.Orphans () @@ -79,13 +86,6 @@ import Share.Utils.Logging (Loggable (..)) import Share.Utils.Logging qualified as Logging import Share.Web.App import Share.Web.Errors (ErrorID (..), SomeServerError, ToServerError (..), internalServerError, respondError, someServerError) -import Hasql.Decoders qualified as Decoders -import Hasql.Encoders qualified as Encoders -import Hasql.Interpolate qualified as Interp -import Hasql.Pool qualified as Pool -import Hasql.Session qualified as Hasql -import Hasql.Session qualified as Session -import Hasql.Statement qualified as Hasql debug :: Bool debug = False diff --git a/src/Share/Postgres/Cursors.hs b/src/Share/Postgres/Cursors.hs new file mode 100644 index 0000000..acadcd1 --- /dev/null +++ b/src/Share/Postgres/Cursors.hs @@ -0,0 +1,53 @@ +-- | Helpers for streamable cursors +module Share.Postgres.Cursors + ( newCursor, + fetchNRows, + fetchNCols, + PGCursor, + ) +where + +import Data.List.NonEmpty qualified as NEL +import Data.UUID (UUID) +import Share.Postgres +import Share.Prelude +import System.Random (randomIO) + +data PGCursor result = PGCursor Text + +-- | Create a new cursor. The name is only for debugging purposes since it will be munged with +-- a random UUID. +-- +-- This cursor will be closed when the transaction ends, and must not be used outside of the +-- transaction in which it was created. +newCursor :: Text -> Sql -> Transaction e (PGCursor r) +newCursor namePrefix query = do + uuid <- transactionUnsafeIO $ randomIO @UUID + let cursorName = namePrefix <> "_" <> into @Text uuid + execute_ + [sql| + DECLARE #{uuid} + NO SCROLL + CURSOR + WITHOUT HOLD + FOR ^{query} + |] + pure $ PGCursor cursorName + +-- | Fetch UP TO the next N rows from the cursor. If there are no more rows, returns Nothing. +fetchNRows :: DecodeRow r => PGCursor r -> Int32 -> Transaction e (Maybe (NonEmpty r)) +fetchNRows (PGCursor cursorName) n = do + rows <- + queryListRows + [sql| FETCH FORWARD #{n} FROM #{cursorName} + |] + pure $ NEL.nonEmpty rows + +-- | Fetch UP TO the next N single-column rows from the cursor. If there are no more rows, returns Nothing. +fetchNCols :: DecodeField r => PGCursor r -> Int32 -> Transaction e (Maybe (NonEmpty r)) +fetchNCols (PGCursor cursorName) n = do + rows <- + queryListCol + [sql| FETCH FORWARD #{n} FROM #{cursorName} + |] + pure $ NEL.nonEmpty rows diff --git a/src/Share/Prelude/Orphans.hs b/src/Share/Prelude/Orphans.hs index 397713b..16a7e2a 100644 --- a/src/Share/Prelude/Orphans.hs +++ b/src/Share/Prelude/Orphans.hs @@ -7,10 +7,14 @@ module Share.Prelude.Orphans () where import Control.Comonad.Cofree (Cofree (..)) import Data.Align (Semialign (..)) +import Data.Text (Text) import Data.These (These (..)) +import Data.UUID (UUID) +import Data.UUID qualified as UUID import GHC.TypeLits qualified as TypeError import Hasql.Interpolate qualified as Interp import Unison.Server.Orphans () +import Witch instance {-# OVERLAPPING #-} TypeError.TypeError ('TypeError.Text "A String will be encoded as char[], Did you mean to use Text instead?") => Interp.EncodeValue String where encodeValue = error "unpossible" @@ -29,3 +33,6 @@ instance Semialign f => Semialign (Cofree f) where This x -> This <$> x That y -> That <$> y These x y -> align x y + +instance From UUID Text where + from = UUID.toText From c88df611a5e02c7e58d31fff99edd2165121f0b2 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 8 May 2024 12:23:23 -0700 Subject: [PATCH 02/48] Make PGCursor a functor --- src/Share/Postgres/Cursors.hs | 51 ++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/src/Share/Postgres/Cursors.hs b/src/Share/Postgres/Cursors.hs index acadcd1..0f25716 100644 --- a/src/Share/Postgres/Cursors.hs +++ b/src/Share/Postgres/Cursors.hs @@ -1,8 +1,10 @@ +{-# LANGUAGE GADTs #-} + -- | Helpers for streamable cursors module Share.Postgres.Cursors - ( newCursor, - fetchNRows, - fetchNCols, + ( newRowCursor, + newColCursor, + fetchN, PGCursor, ) where @@ -13,15 +15,31 @@ import Share.Postgres import Share.Prelude import System.Random (randomIO) -data PGCursor result = PGCursor Text +-- | A cursor that can be used to fetch rows from the database. +-- Includes a mapper (CoYoneda) to allow the type to be a functor. +data PGCursor result where + PGCursor :: + forall row result. + DecodeRow row {- decoder for original row -} => + Text {- cursor name -} -> + (row -> result {- mapper for Functor instance -}) -> + PGCursor result + +instance Functor PGCursor where + fmap f (PGCursor name g) = PGCursor name (f . g) + +newColCursor :: DecodeField a => Text -> Sql -> Transaction e (PGCursor a) +newColCursor namePrefix query = do + newRowCursor namePrefix query + <&> fmap fromOnly -- | Create a new cursor. The name is only for debugging purposes since it will be munged with -- a random UUID. -- --- This cursor will be closed when the transaction ends, and must not be used outside of the +-- This cursor will be closed when the transaction ends, and must not be used outside of the -- transaction in which it was created. -newCursor :: Text -> Sql -> Transaction e (PGCursor r) -newCursor namePrefix query = do +newRowCursor :: DecodeRow r => Text -> Sql -> Transaction e (PGCursor r) +newRowCursor namePrefix query = do uuid <- transactionUnsafeIO $ randomIO @UUID let cursorName = namePrefix <> "_" <> into @Text uuid execute_ @@ -32,22 +50,13 @@ newCursor namePrefix query = do WITHOUT HOLD FOR ^{query} |] - pure $ PGCursor cursorName + pure $ PGCursor cursorName id --- | Fetch UP TO the next N rows from the cursor. If there are no more rows, returns Nothing. -fetchNRows :: DecodeRow r => PGCursor r -> Int32 -> Transaction e (Maybe (NonEmpty r)) -fetchNRows (PGCursor cursorName) n = do +-- | Fetch UP TO the next N results from the cursor. If there are no more rows, returns Nothing. +fetchN :: PGCursor r -> Int32 -> Transaction e (Maybe (NonEmpty r)) +fetchN (PGCursor cursorName f) n = do rows <- queryListRows [sql| FETCH FORWARD #{n} FROM #{cursorName} |] - pure $ NEL.nonEmpty rows - --- | Fetch UP TO the next N single-column rows from the cursor. If there are no more rows, returns Nothing. -fetchNCols :: DecodeField r => PGCursor r -> Int32 -> Transaction e (Maybe (NonEmpty r)) -fetchNCols (PGCursor cursorName) n = do - rows <- - queryListCol - [sql| FETCH FORWARD #{n} FROM #{cursorName} - |] - pure $ NEL.nonEmpty rows + pure $ NEL.nonEmpty (f <$> rows) From ba09724cab0809ccb41c325dfa48bc636a397a47 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 6 May 2024 11:17:52 -0700 Subject: [PATCH 03/48] Add background sync worker abstraction --- app/Env.hs | 2 +- package.yaml | 1 + share-api.cabal | 6 +++++ src/Share.hs | 38 ++++++++++++++++------------- src/Share/App.hs | 16 +++++++----- src/Share/BackgroundJobs.hs | 12 +++++++++ src/Share/BackgroundJobs/Errors.hs | 27 ++++++++++++++++++++ src/Share/BackgroundJobs/Monad.hs | 23 +++++++++++++++++ src/Share/BackgroundJobs/Workers.hs | 31 +++++++++++++++++++++++ src/Share/Env.hs | 10 ++++---- src/Share/Web/App.hs | 8 +++--- src/Share/Web/Errors.hs | 8 +++--- 12 files changed, 145 insertions(+), 37 deletions(-) create mode 100644 src/Share/BackgroundJobs.hs create mode 100644 src/Share/BackgroundJobs/Errors.hs create mode 100644 src/Share/BackgroundJobs/Monad.hs create mode 100644 src/Share/BackgroundJobs/Workers.hs diff --git a/app/Env.hs b/app/Env.hs index 6cf8c90..f9b84cf 100644 --- a/app/Env.hs +++ b/app/Env.hs @@ -94,7 +94,7 @@ withEnv action = do Pool.acquire postgresConnMax Nothing (Text.encodeUtf8 postgresConfig) timeCache <- FL.newTimeCache FL.simpleTimeFormat -- E.g. 05/Sep/2023:13:23:56 -0700 sandboxedRuntime <- RT.startRuntime True RT.Persistent "share" - let requestCtx = () + let ctx = () -- We use a zero-width-space to separate log-lines on ingestion, this allows us to use newlines for -- formatting, but without affecting log-grouping. let zeroWidthSpace = "\x200B" diff --git a/package.yaml b/package.yaml index 708f1bc..51bdac4 100644 --- a/package.yaml +++ b/package.yaml @@ -62,6 +62,7 @@ dependencies: - http-media - http-types - jose +- ki-unlifted - lens - megaparsec - memory diff --git a/share-api.cabal b/share-api.cabal index 0a10282..a72936a 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -27,6 +27,10 @@ library Share Share.App Share.Backend + Share.BackgroundJobs + Share.BackgroundJobs.Errors + Share.BackgroundJobs.Monad + Share.BackgroundJobs.Workers Share.Branch Share.Codebase Share.Codebase.Types @@ -218,6 +222,7 @@ library , http-media , http-types , jose + , ki-unlifted , lens , megaparsec , memory @@ -357,6 +362,7 @@ executable share-api , http-media , http-types , jose + , ki-unlifted , lens , megaparsec , memory diff --git a/src/Share.hs b/src/Share.hs index 8a41f9e..7bb6681 100644 --- a/src/Share.hs +++ b/src/Share.hs @@ -21,7 +21,20 @@ import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime) import Data.Typeable qualified as Typeable import Data.UUID (UUID) import Data.Vault.Lazy as Vault +import Ki.Unlifted qualified as Ki +import Network.HTTP.Types (HeaderName, statusCode) +import Network.HTTP.Types qualified as HTTP +import Network.Wai +import Network.Wai qualified as Wai +import Network.Wai.Handler.Warp (run) +import Network.Wai.Internal qualified as Wai +import Network.Wai.Middleware.Cors +import Network.Wai.Middleware.Gzip qualified as Gzip +import Network.Wai.Middleware.RequestLogger (logStdoutDev) +import Network.Wai.Middleware.Routed (routedMiddleware) +import Servant import Share.App +import Share.BackgroundJobs qualified as BackgroundJobs import Share.Env qualified as Env import Share.IDs (RequestId, UserId) import Share.IDs qualified as IDs @@ -41,17 +54,6 @@ import Share.Web.App (WebApp, localRequestCtx) import Share.Web.App qualified as WebApp import Share.Web.Errors import Share.Web.Impl qualified as Web -import Network.HTTP.Types (HeaderName, statusCode) -import Network.HTTP.Types qualified as HTTP -import Network.Wai -import Network.Wai qualified as Wai -import Network.Wai.Handler.Warp (run) -import Network.Wai.Internal qualified as Wai -import Network.Wai.Middleware.Cors -import Network.Wai.Middleware.Gzip qualified as Gzip -import Network.Wai.Middleware.RequestLogger (logStdoutDev) -import Network.Wai.Middleware.Routed (routedMiddleware) -import Servant import System.Log.FastLogger (FastLogger, FormattedTime, LogStr) import System.Log.Raven qualified as Sentry import System.Log.Raven.Types qualified as Sentry @@ -60,8 +62,10 @@ import UnliftIO.STM qualified as STM startApp :: Env.Env () -> IO () startApp env = do - app <- mkApp env - run (Env.serverPort env) app + app <- mkShareServer env + Ki.scoped \scope -> do + Ki.fork_ scope $ runAppM env $ BackgroundJobs.worker scope + run (Env.serverPort env) app newtype UncaughtException err = UncaughtException err deriving stock (Show) @@ -99,13 +103,13 @@ toServantHandler env appM = in Handler . ExceptT $ do -- fresh request ctx for each request. reqCtx <- WebApp.freshRequestCtx - runReaderT (unAppM $ catchErrors appM) (env {Env.requestCtx = reqCtx}) + runAppM (env {Env.ctx = reqCtx}) $ catchErrors appM -- | Uses context from the request to set up an appropriate RequestCtx type WrapperAPI = (RawRequest :> Header "X-NO-CACHE" Text :> Cookies.Cookie "NO-CACHE" Text :> Header "X-RequestID" RequestId :> MaybeAuthenticatedUserId :> Web.API) -mkApp :: Env.Env () -> IO Application -mkApp env = do +mkShareServer :: Env.Env () -> IO Application +mkShareServer env = do reqTagsKey <- Vault.newKey let reqLoggerMiddleware = mkReqLogger reqTagsKey (Env.timeCache env) (Env.logger env) metricsMiddleware <- serveMetricsMiddleware env @@ -158,7 +162,7 @@ mkApp env = do } ) do - reqTagsVar <- asks (WebApp.reqTagsVar . Env.requestCtx) + reqTagsVar <- asks (WebApp.reqTagsVar . Env.ctx) STM.atomically $ STM.modifyTVar' reqTagsVar (<> reqTags) m diff --git a/src/Share/App.hs b/src/Share/App.hs index 9e12f59..acc62a5 100644 --- a/src/Share/App.hs +++ b/src/Share/App.hs @@ -1,6 +1,12 @@ {-# LANGUAGE InstanceSigs #-} -module Share.App where +module Share.App + ( AppM, + runAppM, + shareIssuer, + shareAud, + ) +where import Control.Monad.Except import Control.Monad.Random.Strict @@ -15,10 +21,11 @@ import Share.Env qualified as Env import Share.Prelude import Share.Utils.Logging qualified as Logging -newtype AppM reqCtx a = AppM {unAppM :: ReaderT (Env reqCtx) IO a} +newtype AppM reqCtx a = AppM {_unAppM :: ReaderT (Env reqCtx) IO a} deriving newtype (Functor, Applicative, Monad, MonadReader (Env reqCtx), MonadRandom, MonadIO, MonadUnliftIO) -type CloudApp = AppM () +runAppM :: Env reqCtx -> AppM reqCtx a -> IO a +runAppM env (AppM m) = runReaderT m env instance Logging.MonadLogger (AppM ()) where logMsg msg = do @@ -32,9 +39,6 @@ instance Cryptonite.MonadRandom (AppM reqCtx) where getRandomBytes = liftIO . Cryptonite.getRandomBytes -runAppM :: Env reqCtx -> AppM reqCtx a -> IO a -runAppM env (AppM m) = runReaderT m env - instance R.MonadRedis (AppM reqCtx) where liftRedis m = do redis <- asks Env.redisConnection diff --git a/src/Share/BackgroundJobs.hs b/src/Share/BackgroundJobs.hs new file mode 100644 index 0000000..0a0f8f9 --- /dev/null +++ b/src/Share/BackgroundJobs.hs @@ -0,0 +1,12 @@ +module Share.BackgroundJobs (startWorkers) where + +import Data.Void (Void) +import Ki.Unlifted qualified as Ki +import Share.App +import Share.BackgroundJobs.Monad (Background) +import Share.Prelude + +-- | Kicks off all background workers. +startWorkers :: Ki.Scope -> Background () +startWorkers scope = do + _ diff --git a/src/Share/BackgroundJobs/Errors.hs b/src/Share/BackgroundJobs/Errors.hs new file mode 100644 index 0000000..e0bf413 --- /dev/null +++ b/src/Share/BackgroundJobs/Errors.hs @@ -0,0 +1,27 @@ +module Share.BackgroundJobs.Errors () where + +import Control.Exception (SomeException) +import Data.Typeable qualified as Typeable +import Share.BackgroundJobs.Monad +import Share.Monitoring qualified as Monitoring +import Share.Prelude +import Share.Utils.Logging (Loggable) +import Share.Utils.Logging qualified as Logging +import Share.Web.Errors (ToServerError) +import UnliftIO qualified + +-- | Log any exceptions which occur within the worker. +-- Catch and return synchronous exceptions, but just log and rethrow async exceptions. +reportException :: SomeException -> Background a -> Background (Either SomeException a) +reportException e bg = flip UnliftIO.withException reportError do + UnliftIO.tryAny m >>= \case + Left (UnliftIO.SomeException err) -> reportError $ UncaughtException err + Right a -> pure a + +-- | Logs the error with a call stack, but doesn't abort the request or render an error to the client. +reportError :: (HasCallStack, Loggable e) => e -> Background () +reportError e = do + asks (workerName . req) + let errLog = Logging.toLog e + Monitoring.reportError env coreTags extraTags errID e + logMsg (withSeverity Error $ errLog) diff --git a/src/Share/BackgroundJobs/Monad.hs b/src/Share/BackgroundJobs/Monad.hs new file mode 100644 index 0000000..8f7b541 --- /dev/null +++ b/src/Share/BackgroundJobs/Monad.hs @@ -0,0 +1,23 @@ +-- | Background worker monad +module Share.BackgroundJobs.Monad + ( Background, + BackgroundCtx (..), + withWorkerName, + ) +where + +import Share.App +import Share.Env +import Share.Prelude + +data BackgroundCtx = BackgroundCtx + { workerName :: Text + } + +type Background = AppM BackgroundCtx + +localBackgroundCtx :: (MonadReader (Env BackgroundCtx) m) => (BackgroundCtx -> BackgroundCtx) -> m a -> m a +localBackgroundCtx f = local \env -> env {ctx = f (ctx env)} + +withWorkerName :: Text -> Background a -> Background a +withWorkerName name = localBackgroundCtx \ctx -> ctx {workerName = name} diff --git a/src/Share/BackgroundJobs/Workers.hs b/src/Share/BackgroundJobs/Workers.hs new file mode 100644 index 0000000..39a7cdb --- /dev/null +++ b/src/Share/BackgroundJobs/Workers.hs @@ -0,0 +1,31 @@ +module Share.BackgroundJobs.Workers (newWorker) where + +import Data.HashMap.Lazy qualified as HM +import Data.Map qualified as Map +import Ki.Unlifted qualified as Ki +import Share.App +import Share.BackgroundJobs.Monad (Background, BackgroundCtx (..), withWorkerName, workerName) +import Share.Env qualified as Env +import Share.Monitoring qualified as Monitoring +import Share.Prelude +import Share.Web.Errors (InternalServerError (..)) +import UnliftIO qualified + +-- | Creates a worker that runs forever. +-- Any exceptions will be caught and logged, then the worker will be restarted. +newWorker :: Ki.Scope -> Text -> Background Void -> Background () +newWorker scope workerName worker = withWorkerName workerName do + Ki.fork_ scope do + -- Log any async exceptions, but don't try to catch them. + flip UnliftIO.withException logException . forever $ do + UnliftIO.tryAny worker >>= \case + Left e -> logException e + Right void -> absurd void + where + logException :: UnliftIO.SomeException -> Background () + logException e = do + env <- ask + let coreTags = HM.fromList [("workerName", into @String workerName)] + let extraTags = HM.fromList [] + let errID = "background-job:" <> workerName + Monitoring.reportError env coreTags extraTags errID (InternalServerError e) diff --git a/src/Share/Env.hs b/src/Share/Env.hs index df0155f..e201b21 100644 --- a/src/Share/Env.hs +++ b/src/Share/Env.hs @@ -4,19 +4,19 @@ module Share.Env where import Database.Redis qualified as R +import Hasql.Pool qualified as Hasql +import Network.URI (URI) +import Servant qualified as S import Share.JWT qualified as JWT import Share.Prelude import Share.Utils.Logging.Types qualified as Logging import Share.Utils.Servant.Cookies qualified as Cookies -import Hasql.Pool qualified as Hasql -import Network.URI (URI) -import Servant qualified as S import System.Log.FastLogger (FormattedTime, LogStr) import System.Log.Raven.Types (SentryService) import Unison.Codebase.Runtime (Runtime) import Unison.Symbol (Symbol) -data Env reqCtx = Env +data Env ctx = Env { redisConnection :: R.Connection, pgConnectionPool :: Hasql.Pool, logger :: LogStr -> IO (), @@ -41,7 +41,7 @@ data Env reqCtx = Env sentryService :: SentryService, -- The commit hash of the currently running version of Share commitHash :: Text, - requestCtx :: reqCtx, + ctx :: ctx, shouldCheckForMigration :: Bool, -- The maximum number of workers to use for concurrent work on a single request. -- E.g. sync can parallelize signing/verifying JWTs or run multiple transactions against diff --git a/src/Share/Web/App.hs b/src/Share/Web/App.hs index ee92108..da1411b 100644 --- a/src/Share/Web/App.hs +++ b/src/Share/Web/App.hs @@ -81,7 +81,7 @@ freshRequestCtx = do -- | Get the tags associated with the current request. getTags :: (MonadReader (Env RequestCtx) m, MonadIO m) => m (Map Text Text) getTags = do - RequestCtx {reqTagsVar, localTags} <- asks requestCtx + RequestCtx {reqTagsVar, localTags} <- asks ctx reqTags <- liftIO $ readTVarIO reqTagsVar -- local tags take precedence over request tags pure $ localTags <> reqTags @@ -89,7 +89,7 @@ getTags = do -- | Add a tag to the current request. This tag will be used in logging and error reports addRequestTag :: (MonadReader (Env RequestCtx) m, MonadIO m) => Text -> Text -> m () addRequestTag k v = do - RequestCtx {reqTagsVar} <- asks requestCtx + RequestCtx {reqTagsVar} <- asks ctx atomically $ modifyTVar' reqTagsVar (Map.insert k v) addServerTag :: HasServer api '[] => Proxy api -> Text -> Text -> ServerT api WebApp -> ServerT api WebApp @@ -113,11 +113,11 @@ withLocalTag k v action = do localRequestCtx (\ctx -> ctx {localTags = Map.insert k v $ localTags ctx}) action localRequestCtx :: (MonadReader (Env RequestCtx) m) => (RequestCtx -> RequestCtx) -> m a -> m a -localRequestCtx f = local \env -> env {requestCtx = f (requestCtx env)} +localRequestCtx f = local \env -> env {ctx = f (ctx env)} shouldUseCaching :: (MonadReader (Env RequestCtx) m) => m Bool shouldUseCaching = - asks (useCaching . requestCtx) + asks (useCaching . ctx) -- | Construct a full URI to a path within share, with provided query params. sharePathQ :: [Text] -> Map Text Text -> AppM reqCtx URI diff --git a/src/Share/Web/Errors.hs b/src/Share/Web/Errors.hs index cf276ad..227267a 100644 --- a/src/Share/Web/Errors.hs +++ b/src/Share/Web/Errors.hs @@ -43,6 +43,9 @@ import Data.String (IsString) import Data.Text (pack) import Data.Text qualified as Text import Data.Text.Encoding qualified as Text +import GHC.Stack qualified as GHC +import Servant +import Servant.Client import Share.Env qualified as Env import Share.Monitoring qualified as Monitoring import Share.OAuth.Errors (OAuth2Error (..), OAuth2ErrorCode (..), OAuth2ErrorRedirect (..)) @@ -51,9 +54,6 @@ import Share.Prelude import Share.Utils.Logging import Share.Utils.URI (URIParam (..), addQueryParam) import Share.Web.App -import GHC.Stack qualified as GHC -import Servant -import Servant.Client import Unison.Server.Backend qualified as Backend import Unison.Server.Errors qualified as Backend import Unison.Sync.Types qualified as Sync @@ -128,7 +128,7 @@ reportError :: (HasCallStack, ToServerError e, Loggable e) => e -> WebApp () reportError e = do let (ErrorID errID, serverErr) = toServerError e env <- ask - RequestCtx {pathInfo, rawURI} <- asks Env.requestCtx + RequestCtx {pathInfo, rawURI} <- asks Env.ctx reqTags <- getTags let errLog@LogMsg {msg} = withTag ("error-id", errID) $ toLog e -- We emit a separate log message for each error, but it's also From 8395ee5072e2d8aa3a56bcd5d759bf3e6840c2c7 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 6 May 2024 11:17:52 -0700 Subject: [PATCH 04/48] Set up Definition Sync worker --- share-api.cabal | 3 ++ src/Share.hs | 3 +- src/Share/BackgroundJobs.hs | 6 +-- src/Share/BackgroundJobs/Errors.hs | 47 +++++++++++++++---- src/Share/BackgroundJobs/Monad.hs | 19 ++++++++ .../BackgroundJobs/Search/DefinitionSync.hs | 44 +++++++++++++++++ src/Share/BackgroundJobs/Workers.hs | 25 ++-------- 7 files changed, 112 insertions(+), 35 deletions(-) create mode 100644 src/Share/BackgroundJobs/Search/DefinitionSync.hs diff --git a/share-api.cabal b/share-api.cabal index a72936a..7841220 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -30,6 +30,8 @@ library Share.BackgroundJobs Share.BackgroundJobs.Errors Share.BackgroundJobs.Monad + Share.BackgroundJobs.Search.DefinitionSync + Share.BackgroundJobs.Search.DefinitionSync.Types Share.BackgroundJobs.Workers Share.Branch Share.Codebase @@ -68,6 +70,7 @@ library Share.Postgres.Projects.Queries Share.Postgres.Queries Share.Postgres.Refs.Types + Share.Postgres.Search.DefinitionSync Share.Postgres.Serialization Share.Postgres.Sync.Conversions Share.Postgres.Sync.Queries diff --git a/src/Share.hs b/src/Share.hs index 7bb6681..1413cc5 100644 --- a/src/Share.hs +++ b/src/Share.hs @@ -35,6 +35,7 @@ import Network.Wai.Middleware.Routed (routedMiddleware) import Servant import Share.App import Share.BackgroundJobs qualified as BackgroundJobs +import Share.BackgroundJobs.Monad (runBackground) import Share.Env qualified as Env import Share.IDs (RequestId, UserId) import Share.IDs qualified as IDs @@ -64,7 +65,7 @@ startApp :: Env.Env () -> IO () startApp env = do app <- mkShareServer env Ki.scoped \scope -> do - Ki.fork_ scope $ runAppM env $ BackgroundJobs.worker scope + runBackground env "background-jobs" $ BackgroundJobs.startWorkers scope run (Env.serverPort env) app newtype UncaughtException err = UncaughtException err diff --git a/src/Share/BackgroundJobs.hs b/src/Share/BackgroundJobs.hs index 0a0f8f9..06b79d9 100644 --- a/src/Share/BackgroundJobs.hs +++ b/src/Share/BackgroundJobs.hs @@ -1,12 +1,10 @@ module Share.BackgroundJobs (startWorkers) where -import Data.Void (Void) import Ki.Unlifted qualified as Ki -import Share.App import Share.BackgroundJobs.Monad (Background) -import Share.Prelude +import Share.BackgroundJobs.Search.DefinitionSync qualified as DefnSearch -- | Kicks off all background workers. startWorkers :: Ki.Scope -> Background () startWorkers scope = do - _ + DefnSearch.worker scope diff --git a/src/Share/BackgroundJobs/Errors.hs b/src/Share/BackgroundJobs/Errors.hs index e0bf413..6f446b9 100644 --- a/src/Share/BackgroundJobs/Errors.hs +++ b/src/Share/BackgroundJobs/Errors.hs @@ -1,27 +1,56 @@ -module Share.BackgroundJobs.Errors () where +{-# LANGUAGE GADTs #-} + +module Share.BackgroundJobs.Errors + ( reportException, + reportError, + throwSomeServerError, + ) +where import Control.Exception (SomeException) +import Control.Monad.Except +import Data.HashMap.Lazy qualified as HM import Data.Typeable qualified as Typeable import Share.BackgroundJobs.Monad +import Share.Env qualified as Env import Share.Monitoring qualified as Monitoring import Share.Prelude import Share.Utils.Logging (Loggable) import Share.Utils.Logging qualified as Logging -import Share.Web.Errors (ToServerError) +import Share.Web.Errors (InternalServerError (..), withCallstack) import UnliftIO qualified +data SomeBackgroundError where + SomeBackgroundError :: (Typeable e, Loggable e) => e -> SomeBackgroundError + +instance Loggable SomeBackgroundError where + toLog (SomeBackgroundError e) = Logging.toLog e + +throwSomeServerError :: (Show e, Loggable e, MonadError SomeBackgroundError m, HasCallStack, Typeable e) => e -> m a +throwSomeServerError = throwError . SomeBackgroundError . withCallstack + -- | Log any exceptions which occur within the worker. -- Catch and return synchronous exceptions, but just log and rethrow async exceptions. -reportException :: SomeException -> Background a -> Background (Either SomeException a) -reportException e bg = flip UnliftIO.withException reportError do - UnliftIO.tryAny m >>= \case - Left (UnliftIO.SomeException err) -> reportError $ UncaughtException err - Right a -> pure a +reportException :: Background a -> Background (Either SomeException a) +reportException bg = flip (UnliftIO.withException @_ @SomeException) (reportError . InternalServerError "uncaught-async-exception") do + UnliftIO.tryAny bg >>= \case + Left e@(UnliftIO.SomeException err) + | Just (SomeBackgroundError bgErr) <- Typeable.cast err -> do + reportError bgErr + pure $ Left e + | otherwise -> do + reportError $ InternalServerError "unknown-exception" err + pure $ Left e + Right a -> pure $ Right a -- | Logs the error with a call stack, but doesn't abort the request or render an error to the client. reportError :: (HasCallStack, Loggable e) => e -> Background () reportError e = do - asks (workerName . req) + env <- ask + let BackgroundCtx {workerName} = Env.ctx env + let coreTags = HM.fromList [("workerName", into @String workerName)] + let extraTags = HM.fromList [] + let errID = "background-job:" <> workerName let errLog = Logging.toLog e Monitoring.reportError env coreTags extraTags errID e - logMsg (withSeverity Error $ errLog) + Logging.logMsg (Logging.withSeverity Logging.Error $ errLog) diff --git a/src/Share/BackgroundJobs/Monad.hs b/src/Share/BackgroundJobs/Monad.hs index 8f7b541..26f2abb 100644 --- a/src/Share/BackgroundJobs/Monad.hs +++ b/src/Share/BackgroundJobs/Monad.hs @@ -3,12 +3,16 @@ module Share.BackgroundJobs.Monad ( Background, BackgroundCtx (..), withWorkerName, + runBackground, ) where +import Data.Map qualified as Map import Share.App import Share.Env +import Share.Env qualified as Env import Share.Prelude +import Share.Utils.Logging qualified as Logging data BackgroundCtx = BackgroundCtx { workerName :: Text @@ -21,3 +25,18 @@ localBackgroundCtx f = local \env -> env {ctx = f (ctx env)} withWorkerName :: Text -> Background a -> Background a withWorkerName name = localBackgroundCtx \ctx -> ctx {workerName = name} + +instance Logging.MonadLogger Background where + logMsg msg = do + log <- asks Env.logger + BackgroundCtx {workerName} <- asks ctx + let currentTags = Map.singleton "workerName" workerName + msg <- pure $ msg {Logging.tags = Logging.tags msg `Map.union` currentTags} + minSeverity <- asks Env.minLogSeverity + when (Logging.severity msg >= minSeverity) $ do + timestamp <- asks timeCache >>= liftIO + liftIO . log . Logging.logFmtFormatter timestamp $ msg + +runBackground :: Env () -> Text -> Background a -> IO a +runBackground env workerName bg = + runAppM env {ctx = BackgroundCtx {workerName}} bg diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs new file mode 100644 index 0000000..5ce9967 --- /dev/null +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -0,0 +1,44 @@ +module Share.BackgroundJobs.Search.DefinitionSync (worker) where + +import Ki.Unlifted qualified as Ki +import Share.BackgroundJobs.Monad (Background) +import Share.BackgroundJobs.Workers (newWorker) +import Share.IDs (ReleaseId) +import Share.Postgres qualified as PG +import Share.Postgres.Cursors qualified as Cursors +import Share.Postgres.Hashes.Queries qualified as HashQ +import Share.Postgres.NameLookups.Ops (ensureNameLookupForBranchId) +import Share.Postgres.NameLookups.Ops qualified as NLOps +import Share.Postgres.Queries qualified as PG +import Share.Postgres.Search.DefinitionSync qualified as DefnSyncQ +import Share.Prelude +import Share.Project (Project (..), ProjectVisibility (..)) +import Share.Release (Release (..)) +import Share.User (User (..), UserVisibility (..)) +import Share.Utils.Logging qualified as Logging +import UnliftIO.Concurrent qualified as UnliftIO + +-- | How often to poll for new releases to sync in seconds. +pollingIntervalSeconds :: Int +pollingIntervalSeconds = 10 + +worker :: Ki.Scope -> Background () +worker scope = newWorker scope "search:defn-sync" $ forever do + liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 + Logging.logInfoText "Syncing definitions..." + PG.runTransaction $ do + mayReleaseId <- DefnSyncQ.claimUnsyncedRelease + for_ mayReleaseId syncRelease + +syncRelease :: ReleaseId -> PG.Transaction e () +syncRelease releaseId = fmap (fromMaybe ()) . runMaybeT $ do + Release {projectId, squashedCausal} <- lift $ PG.expectReleaseById releaseId + Project {slug, ownerUserId, visibility = projectVis} <- lift $ PG.expectProjectById projectId + User {handle, visibility = userVis} <- PG.expectUserByUserId ownerUserId + -- Don't sync private projects + guard $ projectVis == ProjectPublic + -- Don't sync private users + guard $ userVis == UserPublic + bhId <- HashQ.expectNamespaceIdsByCausalIdsOf id squashedCausal + NLOps.ensureNameLookupForBranchId bhId + _ diff --git a/src/Share/BackgroundJobs/Workers.hs b/src/Share/BackgroundJobs/Workers.hs index 39a7cdb..8c24977 100644 --- a/src/Share/BackgroundJobs/Workers.hs +++ b/src/Share/BackgroundJobs/Workers.hs @@ -1,31 +1,14 @@ module Share.BackgroundJobs.Workers (newWorker) where -import Data.HashMap.Lazy qualified as HM -import Data.Map qualified as Map import Ki.Unlifted qualified as Ki -import Share.App -import Share.BackgroundJobs.Monad (Background, BackgroundCtx (..), withWorkerName, workerName) -import Share.Env qualified as Env -import Share.Monitoring qualified as Monitoring +import Share.BackgroundJobs.Errors (reportException) +import Share.BackgroundJobs.Monad (Background, withWorkerName) import Share.Prelude -import Share.Web.Errors (InternalServerError (..)) -import UnliftIO qualified -- | Creates a worker that runs forever. -- Any exceptions will be caught and logged, then the worker will be restarted. newWorker :: Ki.Scope -> Text -> Background Void -> Background () newWorker scope workerName worker = withWorkerName workerName do Ki.fork_ scope do - -- Log any async exceptions, but don't try to catch them. - flip UnliftIO.withException logException . forever $ do - UnliftIO.tryAny worker >>= \case - Left e -> logException e - Right void -> absurd void - where - logException :: UnliftIO.SomeException -> Background () - logException e = do - env <- ask - let coreTags = HM.fromList [("workerName", into @String workerName)] - let extraTags = HM.fromList [] - let errID = "background-job:" <> workerName - Monitoring.reportError env coreTags extraTags errID (InternalServerError e) + -- Run the worker forever, catching and logging any syncronous exceptions, but then restarting. + forever $ reportException worker From 2dc21b0f8feea479eecea5fc4b9ff008ed63ece1 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 6 May 2024 11:17:52 -0700 Subject: [PATCH 05/48] Add types for Definition Documents --- .../Search/DefinitionSync/Types.hs | 142 ++++++++++++++++++ src/Share/IDs.hs | 11 +- src/Share/Prelude/Orphans.hs | 5 + 3 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs new file mode 100644 index 0000000..841c6bc --- /dev/null +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -0,0 +1,142 @@ +{-# LANGUAGE DataKinds #-} + +module Share.BackgroundJobs.Search.DefinitionSync.Types + ( TermOrTypeSummary (..), + DefinitionDocument (..), + ) +where + +import Data.Aeson +import Data.Text qualified as Text +import Share.IDs (PrefixedHash (..), ProjectShortHand, ReleaseVersion) +import Share.IDs qualified as IDs +import Share.Prelude +import U.Codebase.HashTags (ComponentHash) +import Unison.Hash qualified as Hash +import Unison.Name (Name) +import Unison.Server.Share.DefinitionSummary (TermSummary, TypeSummary) +import Unison.ShortHash (ShortHash) +import Unison.ShortHash qualified as SH +import Unison.Syntax.Name qualified as Name + +data TermOrTypeSummary = TermSummary TermSummary | TypeSummary TypeSummary + deriving (Show) + +instance ToJSON TermOrTypeSummary where + toJSON (TermSummary ts) = object ["kind" .= ("term" :: Text), "payload" .= ts] + toJSON (TypeSummary ts) = object ["kind" .= ("type" :: Text), "payload" .= ts] + +instance FromJSON TermOrTypeSummary where + parseJSON = withObject "TermOrTypeSummary" $ \o -> do + kind :: Text <- o .: "kind" + case kind of + "term" -> TermSummary <$> o .: "payload" + "type" -> TypeSummary <$> o .: "payload" + _ -> fail $ "Invalid kind: " <> Text.unpack kind + +-- | The number of occurences of this token in the search query. +-- E.g. for the query: 'Text -> Text -> Text', the Text type mention token would +-- occur 3 times, and the set would be: +-- {NameMention "Text" (Occurrence 1), NameMention "Text" (Occurrence 2), NameMention "Text" (Occurrence 3)} +newtype Occurrence = Occurrence Int + deriving newtype (Show, Read, Eq, Ord, Num, ToJSON) + +-- | An id for identifying unique type variables mentioned in a query. +-- E.g. 'map : (a -> b) -> List a -> List b' would have two type var Ids, one for a, one +-- for b, and would have occurrences 1 and 2 for each respectively. +newtype VarId = VarId Int + deriving newtype (Show, Read, Eq, Ord, Num, ToJSON) + +data DefnToken + = -- Allows searching by literal name + NameToken Name + | -- Also includes ability mentions + NameMentionToken Name Occurrence + | -- Allows searching for type sigs with type variables + TypeVarToken VarId Occurrence + | -- Allows searching by component hash + HashToken ComponentHash + deriving (Show, Eq, Ord) + +-- | Converts a DefnToken to a prefix-searchable text string. +-- +-- >>> tokenToText (NameToken (Name.unsafeParseText "List.map")) +-- "List.map:name" +-- +-- >>> tokenToText (NameMentionToken (Name.unsafeParseText "List.map") (Occurrence 1)) +-- "List.map:mention:1" +-- +-- >>> tokenToText (TypeVarToken (VarId 1) (Occurrence 1)) +-- "_:var:1:1" +-- +-- >>> import Unison.Hash qualified as Hash +-- >>> import U.Codebase.HashTags (ComponentHash (..)) +-- >>> hash = ComponentHash $ Hash.unsafeFromBase32HexText "abcd" +-- >>> tokenToText (HashToken hash) +-- "#abc0:hash" +tokenToText :: DefnToken -> Text +tokenToText = \case + (NameToken n) -> Text.intercalate ":" [Name.toText n, "name"] + (NameMentionToken n o) -> Text.intercalate ":" [Name.toText n, "mention", tShow o] + (TypeVarToken v o) -> Text.intercalate ":" ["_", "var", tShow v, tShow o] + (HashToken h) -> Text.intercalate ":" [into @Text $ PrefixedHash @"#" h, "hash"] + +tokenFromText :: Text -> Maybe DefnToken +tokenFromText t = case Text.splitOn ":" t of + [name, "name"] -> NameToken <$> Name.parseText name + [name, "mention", occ] -> NameMentionToken <$> (Name.parseText name) <*> readMaybe (Text.unpack occ) + [_, "var", vid, occ] -> TypeVarToken <$> readMaybe (Text.unpack vid) <*> readMaybe (Text.unpack occ) + [prefixedHash, "hash"] -> + case Text.stripPrefix "#" prefixedHash of + Just hash -> HashToken . into @ComponentHash <$> Hash.fromBase32HexText hash + Nothing -> Nothing + _ -> Nothing + +data DefinitionDocument = DefinitionDocument + { projectShortHand :: ProjectShortHand, + releaseVersion :: ReleaseVersion, + fqn :: Name, + hash :: ShortHash, + tokens :: Set DefnToken, + payload :: TermOrTypeSummary + } + deriving (Show) + +instance ToJSON DefnToken where + toJSON = String . tokenToText + +instance FromJSON DefnToken where + parseJSON = withText "DefnToken" $ \t -> + maybe (fail $ "Invalid DefnToken: " <> Text.unpack t) pure $ tokenFromText t + +-- | Formats a DefinitionDocument into a documentName +-- +-- >>> projectShortHand = IDs.ProjectShortHand "unison" "base" +-- >>> releaseVersion = IDs.ReleaseVersion 1 2 3 +-- >>> fqn = Name.unsafeFromText "data.List.map" +-- >>> hash = ShortHash "abcdef" +-- >>> formatDocName DefinitionDocument {projectShortHand, releaseVersion, fqn, hash, tokens = mempty, payload = undefined} +formatDocName :: DefinitionDocument -> Text +formatDocName DefinitionDocument {projectShortHand, fqn, hash} = + Text.unwords [IDs.toText projectShortHand, Name.toText fqn, SH.toText hash] + +instance ToJSON DefinitionDocument where + toJSON dd@DefinitionDocument {releaseVersion, tokens, payload} = + object + [ "documentName" .= formatDocName dd, + "releaseVersion" .= IDs.toText releaseVersion, + "tokens" .= tokens, + "metadata" .= payload + ] + +-- [{ "documentName": "@unison/base data.List.map #abcdef" +-- , "releaseVersion": "1.0.0" +-- , "tokens": ["Remote:1", "Optional:1", "map:name", "List.map:name", "data.List.map:name"] +-- , "metadata": { +-- "project": "base" +-- , "branchRef": "releases/1.2.3" +-- , "definitionKind": "data|ability|term (or constructors)" +-- , "definitionName": "data.Optional.map" +-- , "definitionTypeSignature": "(a -> b) -> Optional a -> Optional b" +-- } +-- }] diff --git a/src/Share/IDs.hs b/src/Share/IDs.hs index fdff53c..68ff0b0 100644 --- a/src/Share/IDs.hs +++ b/src/Share/IDs.hs @@ -54,6 +54,9 @@ import Data.Char qualified as Char import Data.List (intercalate) import Data.Text qualified as Text import Data.UUID (UUID) +import GHC.TypeLits (KnownSymbol, Symbol, symbolVal) +import Hasql.Interpolate qualified as Hasql +import Servant (FromHttpApiData (..), ToHttpApiData (..)) import Share.OAuth.Types ( JTI (..), PendingSessionId (..), @@ -62,9 +65,6 @@ import Share.OAuth.Types ) import Share.Prelude import Share.Utils.IDs (CaseInsensitiveID (..), IsID (..), PrefixedID (..), UsingID (..), fromId, fromUUID, idFrom) -import GHC.TypeLits (KnownSymbol, Symbol, symbolVal) -import Hasql.Interpolate qualified as Hasql -import Servant (FromHttpApiData (..), ToHttpApiData (..)) import Text.Megaparsec ( ErrorFancy (..), MonadParsec (eof, takeWhileP), @@ -650,6 +650,11 @@ newtype PrefixedHash (prefix :: Symbol) h = PrefixedHash h deriving newtype (Eq, Ord) deriving (Show) +instance (From h Text, KnownSymbol prefix) => From (PrefixedHash prefix h) Text where + from (PrefixedHash h) = + let prefix = Text.pack $ symbolVal (Proxy @prefix) + in prefix <> into @Text h + instance (KnownSymbol prefix, ToHttpApiData h) => ToHttpApiData (PrefixedHash prefix h) where toUrlPiece (PrefixedHash h) = let prefix = Text.pack $ symbolVal (Proxy @prefix) diff --git a/src/Share/Prelude/Orphans.hs b/src/Share/Prelude/Orphans.hs index 16a7e2a..6a4925e 100644 --- a/src/Share/Prelude/Orphans.hs +++ b/src/Share/Prelude/Orphans.hs @@ -14,6 +14,8 @@ import Data.UUID qualified as UUID import GHC.TypeLits qualified as TypeError import Hasql.Interpolate qualified as Interp import Unison.Server.Orphans () +import Unison.ShortHash (ShortHash) +import Unison.ShortHash qualified as SH import Witch instance {-# OVERLAPPING #-} TypeError.TypeError ('TypeError.Text "A String will be encoded as char[], Did you mean to use Text instead?") => Interp.EncodeValue String where @@ -36,3 +38,6 @@ instance Semialign f => Semialign (Cofree f) where instance From UUID Text where from = UUID.toText + +instance From ShortHash Text where + from = SH.toText From 67b582c9b813d4538cfe67372db54c4ba7a79bb5 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 6 May 2024 11:17:52 -0700 Subject: [PATCH 06/48] Add cursor queries for streaming name lookup definitions --- src/Share/Postgres/NameLookups/Queries.hs | 32 ++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/Share/Postgres/NameLookups/Queries.hs b/src/Share/Postgres/NameLookups/Queries.hs index 543a6f2..540882a 100644 --- a/src/Share/Postgres/NameLookups/Queries.hs +++ b/src/Share/Postgres/NameLookups/Queries.hs @@ -11,6 +11,10 @@ module Share.Postgres.NameLookups.Queries fuzzySearchTypes, FuzzySearchScore, + -- * Cursors + termsWithinNamespace, + typesWithinNamespace, + -- * Name lookup management listNameLookupMounts, checkBranchHashNameLookupExists, @@ -22,12 +26,16 @@ import Control.Lens hiding (from) import Data.Foldable qualified as Foldable import Data.List.NonEmpty qualified as NEL import Data.Text qualified as Text +import Share.Postgres import Share.Postgres qualified as PG +import Share.Postgres.Cursors (PGCursor) +import Share.Postgres.Cursors qualified as Cursors import Share.Postgres.IDs import Share.Postgres.NameLookups.Types import Share.Postgres.Refs.Types (PGReference, PGReferent, referenceFields, referentFields) import Share.Prelude -import U.Codebase.Referent (ConstructorType) +import U.Codebase.Reference (Reference) +import U.Codebase.Referent (ConstructorType, Referent) import Unison.Util.Monoid qualified as Monoid -- | Get the list of term names and suffixifications for a given Referent within a given namespace. @@ -453,3 +461,25 @@ toNamespacePrefix = \case -- "foo.bar." toReversedNamePrefix :: ReversedName -> Text toReversedNamePrefix suffix = Text.intercalate "." (into @[Text] suffix) <> "." + +termsWithinNamespace :: NameLookupReceipt -> BranchHashId -> Transaction e (PGCursor (NamedRef Referent)) +termsWithinNamespace !_nlReceipt bhId = do + Cursors.newRowCursor @(NamedRef Referent) + "termsForSearchSyncCursor" + [sql| + SELECT reversed_name, referent_builtin, referent_component_hash.base32, referent_component_index, referent_constructor_index + FROM scoped_term_name_lookup + JOIN component_hashes referent_component_hash ON component_hashes.id = referent_component_hash_id + WHERE root_branch_hash_id = #{bhId} + |] + +typesWithinNamespace :: NameLookupReceipt -> BranchHashId -> Transaction e (PGCursor (NamedRef Reference)) +typesWithinNamespace !_nlReceipt bhId = do + Cursors.newRowCursor @(NamedRef Reference) + "typesForSearchSyncCursor" + [sql| + SELECT reversed_name, reference_builtin, reference_component_hash.base32, reference_component_index + FROM scoped_type_name_lookup + JOIN component_hashes reference_component_hash ON component_hashes.id = reference_component_hash_id + WHERE root_branch_hash_id = #{bhId} + |] From 6ee23c9cfe5f4287cc9b8fe8406f8945635607f6 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 6 May 2024 11:17:52 -0700 Subject: [PATCH 07/48] Start implementing defn sync process --- sql/2024-05-07-00-00_defn_search_sync.sql | 7 ++++ .../BackgroundJobs/Search/DefinitionSync.hs | 22 ++++++++++- .../Search/DefinitionSync/Types.hs | 16 +++----- src/Share/Postgres/Cursors.hs | 4 +- src/Share/Postgres/NameLookups/Ops.hs | 2 + src/Share/Postgres/NameLookups/Queries.hs | 7 +++- src/Share/Postgres/Ops.hs | 3 -- src/Share/Postgres/Queries.hs | 22 ++++++++++- src/Share/Postgres/Search/DefinitionSync.hs | 37 +++++++++++++++++++ src/Share/Web/Share/Branches/Impl.hs | 4 +- src/Share/Web/Share/Releases/Impl.hs | 8 ++-- src/Share/Web/UCM/Projects/Impl.hs | 2 +- src/Unison/Server/Share/DefinitionSummary.hs | 20 +++++++++- 13 files changed, 122 insertions(+), 32 deletions(-) create mode 100644 sql/2024-05-07-00-00_defn_search_sync.sql create mode 100644 src/Share/Postgres/Search/DefinitionSync.hs diff --git a/sql/2024-05-07-00-00_defn_search_sync.sql b/sql/2024-05-07-00-00_defn_search_sync.sql new file mode 100644 index 0000000..f2cb8dd --- /dev/null +++ b/sql/2024-05-07-00-00_defn_search_sync.sql @@ -0,0 +1,7 @@ +-- New table for coordinating background job for syncing global definitions for search. + +-- Table of all releases which have been published, but not yet synced to the global definition search index. +CREATE TABLE global_definition_search_release_queue ( + release_id UUID PRIMARY KEY REFERENCES releases(id) ON DELETE CASCADE, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP +); diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 5ce9967..c9b6775 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -16,12 +16,18 @@ import Share.Project (Project (..), ProjectVisibility (..)) import Share.Release (Release (..)) import Share.User (User (..), UserVisibility (..)) import Share.Utils.Logging qualified as Logging +import U.Codebase.Referent (Referent) +import Unison.Name (Name) import UnliftIO.Concurrent qualified as UnliftIO -- | How often to poll for new releases to sync in seconds. pollingIntervalSeconds :: Int pollingIntervalSeconds = 10 +-- | How many definitions to hold in memory at a time while syncing +defnBatchSize :: Int32 +defnBatchSize = 10 + worker :: Ki.Scope -> Background () worker scope = newWorker scope "search:defn-sync" $ forever do liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 @@ -40,5 +46,17 @@ syncRelease releaseId = fmap (fromMaybe ()) . runMaybeT $ do -- Don't sync private users guard $ userVis == UserPublic bhId <- HashQ.expectNamespaceIdsByCausalIdsOf id squashedCausal - NLOps.ensureNameLookupForBranchId bhId - _ + nlReceipt <- NLOps.ensureNameLookupForBranchId bhId + termsCursor <- lift $ NLOps.termsWithinNamespace nlReceipt bhId + lift $ syncTerms termsCursor + typesCursor <- lift $ NLOps.typesWithinNamespace nlReceipt bhId + lift $ syncTypes typesCursor + +syncTerms :: Cursors.PGCursor (Name, Referent) -> PG.Transaction e () +syncTerms termsCursor = + Cursors.fetchN defnBatchSize termsCursor >>= \case + Nothing -> pure () + Just terms -> do + for terms termSummary + syncDefinitionToCloud + pure () diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index 841c6bc..ff75f92 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -129,14 +129,8 @@ instance ToJSON DefinitionDocument where "metadata" .= payload ] --- [{ "documentName": "@unison/base data.List.map #abcdef" --- , "releaseVersion": "1.0.0" --- , "tokens": ["Remote:1", "Optional:1", "map:name", "List.map:name", "data.List.map:name"] --- , "metadata": { --- "project": "base" --- , "branchRef": "releases/1.2.3" --- , "definitionKind": "data|ability|term (or constructors)" --- , "definitionName": "data.Optional.map" --- , "definitionTypeSignature": "(a -> b) -> Optional a -> Optional b" --- } --- }] +data SearchDefinition = SearchDefinition + { fqn :: Name, + hash :: ShortHash + } + deriving (Show) diff --git a/src/Share/Postgres/Cursors.hs b/src/Share/Postgres/Cursors.hs index 0f25716..23aa9fc 100644 --- a/src/Share/Postgres/Cursors.hs +++ b/src/Share/Postgres/Cursors.hs @@ -53,8 +53,8 @@ newRowCursor namePrefix query = do pure $ PGCursor cursorName id -- | Fetch UP TO the next N results from the cursor. If there are no more rows, returns Nothing. -fetchN :: PGCursor r -> Int32 -> Transaction e (Maybe (NonEmpty r)) -fetchN (PGCursor cursorName f) n = do +fetchN :: Int32 -> PGCursor r -> Transaction e (Maybe (NonEmpty r)) +fetchN n (PGCursor cursorName f) = do rows <- queryListRows [sql| FETCH FORWARD #{n} FROM #{cursorName} diff --git a/src/Share/Postgres/NameLookups/Ops.hs b/src/Share/Postgres/NameLookups/Ops.hs index c51ce27..3310eca 100644 --- a/src/Share/Postgres/NameLookups/Ops.hs +++ b/src/Share/Postgres/NameLookups/Ops.hs @@ -9,6 +9,8 @@ module Share.Postgres.NameLookups.Ops checkBranchHashNameLookupExists, deleteNameLookupsExceptFor, ensureNameLookupForBranchId, + Q.termsWithinNamespace, + Q.typesWithinNamespace, ) where diff --git a/src/Share/Postgres/NameLookups/Queries.hs b/src/Share/Postgres/NameLookups/Queries.hs index 540882a..643bcb8 100644 --- a/src/Share/Postgres/NameLookups/Queries.hs +++ b/src/Share/Postgres/NameLookups/Queries.hs @@ -36,6 +36,7 @@ import Share.Postgres.Refs.Types (PGReference, PGReferent, referenceFields, refe import Share.Prelude import U.Codebase.Reference (Reference) import U.Codebase.Referent (ConstructorType, Referent) +import Unison.Name (Name) import Unison.Util.Monoid qualified as Monoid -- | Get the list of term names and suffixifications for a given Referent within a given namespace. @@ -462,7 +463,7 @@ toNamespacePrefix = \case toReversedNamePrefix :: ReversedName -> Text toReversedNamePrefix suffix = Text.intercalate "." (into @[Text] suffix) <> "." -termsWithinNamespace :: NameLookupReceipt -> BranchHashId -> Transaction e (PGCursor (NamedRef Referent)) +termsWithinNamespace :: NameLookupReceipt -> BranchHashId -> Transaction e (PGCursor (Name, Referent)) termsWithinNamespace !_nlReceipt bhId = do Cursors.newRowCursor @(NamedRef Referent) "termsForSearchSyncCursor" @@ -472,8 +473,9 @@ termsWithinNamespace !_nlReceipt bhId = do JOIN component_hashes referent_component_hash ON component_hashes.id = referent_component_hash_id WHERE root_branch_hash_id = #{bhId} |] + <&> fmap (\NamedRef {reversedSegments, ref} -> (reversedNameToName reversedSegments, ref)) -typesWithinNamespace :: NameLookupReceipt -> BranchHashId -> Transaction e (PGCursor (NamedRef Reference)) +typesWithinNamespace :: NameLookupReceipt -> BranchHashId -> Transaction e (PGCursor (Name, Reference)) typesWithinNamespace !_nlReceipt bhId = do Cursors.newRowCursor @(NamedRef Reference) "typesForSearchSyncCursor" @@ -483,3 +485,4 @@ typesWithinNamespace !_nlReceipt bhId = do JOIN component_hashes reference_component_hash ON component_hashes.id = reference_component_hash_id WHERE root_branch_hash_id = #{bhId} |] + <&> fmap (\NamedRef {reversedSegments, ref} -> (reversedNameToName reversedSegments, ref)) diff --git a/src/Share/Postgres/Ops.hs b/src/Share/Postgres/Ops.hs index 4d4209d..5f6b796 100644 --- a/src/Share/Postgres/Ops.hs +++ b/src/Share/Postgres/Ops.hs @@ -45,9 +45,6 @@ projectIdByUserHandleAndSlug :: UserHandle -> ProjectSlug -> WebApp ProjectId projectIdByUserHandleAndSlug userHandle projectSlug = do PG.runTransaction (Q.projectIDFromHandleAndSlug userHandle projectSlug) `or404` (EntityMissing (ErrorID "no-project-for-handle-and-slug") $ "Project not found: " <> IDs.toText userHandle <> "/" <> IDs.toText projectSlug) -expectProjectById :: ProjectId -> WebApp Project -expectProjectById projectId = PG.runTransaction (Q.projectById projectId) `or404` (EntityMissing (ErrorID "no-project-for-project-id") $ "Project not found for id: " <> IDs.toText projectId) - createProject :: UserId -> ProjectSlug -> Maybe Text -> Set ProjectTag -> ProjectVisibility -> WebApp ProjectId createProject ownerUserId slug summary tags visibility = do PG.runTransactionOrRespondError do diff --git a/src/Share/Postgres/Queries.hs b/src/Share/Postgres/Queries.hs index 1f822c6..b76885b 100644 --- a/src/Share/Postgres/Queries.hs +++ b/src/Share/Postgres/Queries.hs @@ -21,6 +21,7 @@ import Share.Github import Share.IDs import Share.IDs qualified as IDs import Share.OAuth.Types +import Share.Postgres (QueryM (unrecoverableError)) import Share.Postgres qualified as PG import Share.Postgres.IDs import Share.Postgres.LooseCode.Queries qualified as LCQ @@ -33,12 +34,19 @@ import Share.Ticket qualified as Ticket import Share.User import Share.Utils.API import Share.Web.Authorization qualified as AuthZ +import Share.Web.Errors (EntityMissing (EntityMissing), ErrorID (..)) import Share.Web.Share.Branches.Types (BranchKindFilter (..)) import Share.Web.Share.Projects.Types (ContributionStats (..), DownloadStats (..), FavData, ProjectOwner, TicketStats (..)) import Share.Web.Share.Releases.Types (ReleaseStatusFilter (..), StatusUpdate (..)) import Unison.Util.List qualified as Utils import Unison.Util.Monoid (intercalateMap) +expectUserByUserId :: PG.QueryM m => UserId -> m User +expectUserByUserId uid = do + userByUserId uid >>= \case + Just user -> pure user + Nothing -> unrecoverableError $ EntityMissing (ErrorID "user:missing") ("User with id " <> IDs.toText uid <> " not found") + userByUserId :: PG.QueryM m => UserId -> m (Maybe User) userByUserId uid = do PG.query1Row @@ -87,6 +95,11 @@ projectById projectId = do WHERE p.id = #{projectId} |] +expectProjectById :: ProjectId -> PG.Transaction e Project +expectProjectById projectId = do + mayResult <- projectById projectId + whenNothing mayResult $ unrecoverableError $ EntityMissing (ErrorID "project:missing") ("Project with id " <> IDs.toText projectId <> " not found") + -- | returns (project, favData, projectOwner, default branch, latest release version) projectByIdWithMetadata :: Maybe UserId -> ProjectId -> PG.Transaction e (Maybe (Project, FavData, ProjectOwner, Maybe BranchName, Maybe ReleaseVersion)) projectByIdWithMetadata caller projectId = do @@ -490,7 +503,7 @@ updateProject projectId newSummary tagChanges newVisibility = -- This method is a bit naive, we just get the old project, update the fields accordingly, -- then save the entire project again. isJust <$> runMaybeT do - Project {..} <- MaybeT $ projectById projectId + Project {..} <- lift $ expectProjectById projectId let updatedSummary = fromNullableUpdate summary newSummary let updatedTags = Set.toList $ applySetUpdate tags tagChanges let updatedVisibility = fromMaybe visibility newVisibility @@ -1085,6 +1098,11 @@ releaseById releaseId = do WHERE release.id = #{releaseId} |] +expectReleaseById :: ReleaseId -> PG.Transaction e (Release CausalId UserId) +expectReleaseById releaseId = do + mayRow <- releaseById releaseId + whenNothing mayRow $ unrecoverableError $ EntityMissing (ErrorID "release:missing") ("Release with id " <> IDs.toText releaseId <> " not found") + releaseByProjectReleaseShortHand :: ProjectReleaseShortHand -> PG.Transaction e (Maybe (Release CausalId UserId)) releaseByProjectReleaseShortHand ProjectReleaseShortHand {userHandle, projectSlug, releaseVersion = ReleaseVersion {major, minor, patch}} = do PG.query1Row @@ -1252,7 +1270,7 @@ data UpdateReleaseResult updateRelease :: UserId -> ReleaseId -> Maybe StatusUpdate -> PG.Transaction e UpdateReleaseResult updateRelease caller releaseId newStatus = do fromMaybe UpdateRelease'NotFound <$> runMaybeT do - Release {..} <- MaybeT $ releaseById releaseId + Release {..} <- lift $ expectReleaseById releaseId -- Can go from draft -> published -> deprecated -- or straight from draft -> deprecated -- but can't go from published -> draft or deprecated -> draft. diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs new file mode 100644 index 0000000..44f321c --- /dev/null +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE TypeOperators #-} + +module Share.Postgres.Search.DefinitionSync + ( submitReleaseToBeSynced, + claimUnsyncedRelease, + ) +where + +import Share.IDs (ReleaseId) +import Share.Postgres + +submitReleaseToBeSynced :: ReleaseId -> Transaction e () +submitReleaseToBeSynced releaseId = do + execute_ + [sql| + INSERT INTO global_definition_search_release_queue (id) + VALUES (#{releaseId}) + |] + +-- | Claim the oldest unsynced release to be indexed. +claimUnsyncedRelease :: Transaction e (Maybe ReleaseId) +claimUnsyncedRelease = do + query1Col + [sql| + WITH chosen_release AS ( + SELECT q.id + FROM global_definition_search_release_queue q + ORDER BY q.created_at ASC + LIMIT 1 + -- Skip any that are being synced by other workers. + FOR UPDATE SKIP LOCKED + ) + DELETE FROM global_definition_search_release_queue + USING chosen_release + WHERE global_definition_search_release_queue.id = chosen_release.id + RETURNING chosen_release.id + |] diff --git a/src/Share/Web/Share/Branches/Impl.hs b/src/Share/Web/Share/Branches/Impl.hs index 25b20ae..d4b3209 100644 --- a/src/Share/Web/Share/Branches/Impl.hs +++ b/src/Share/Web/Share/Branches/Impl.hs @@ -11,6 +11,7 @@ import Data.List.NonEmpty qualified as NonEmpty import Data.Set qualified as Set import Data.Text qualified as Text import Data.Time (UTCTime) +import Servant import Share.Branch (Branch (..), branchCausals_) import Share.Codebase qualified as Codebase import Share.IDs (BranchId, BranchShortHand (..), ProjectBranchShortHand (..), ProjectShortHand (..), ProjectSlug (..), UserHandle, UserId) @@ -38,7 +39,6 @@ import Share.Web.Share.Branches.Types qualified as API import Share.Web.Share.CodeBrowsing.API qualified as API import Share.Web.Share.Projects.Types (projectToAPI) import Share.Web.Share.Types -import Servant import U.Codebase.HashTags (CausalHash) import Unison.Codebase.Path qualified as Path import Unison.HashQualified qualified as HQ @@ -64,7 +64,7 @@ getProjectBranch :: getProjectBranch projectBranchShortHand = do onNothingM missingBranch . PG.runTransaction . runMaybeT $ do branch@Branch {projectId} <- MaybeT $ Q.branchByProjectBranchShortHand projectBranchShortHand - project <- MaybeT $ Q.projectById projectId + project <- lift $ Q.expectProjectById projectId pure (project, branch) where missingBranch = respondError (EntityMissing (ErrorID "missing-project-branch") "Branch could not be found") diff --git a/src/Share/Web/Share/Releases/Impl.hs b/src/Share/Web/Share/Releases/Impl.hs index c678267..5ad30e1 100644 --- a/src/Share/Web/Share/Releases/Impl.hs +++ b/src/Share/Web/Share/Releases/Impl.hs @@ -14,6 +14,7 @@ import Control.Monad.Except import Control.Monad.Trans.Maybe import Data.List.NonEmpty qualified as NonEmpty import Data.Set qualified as Set +import Servant import Share.Codebase qualified as Codebase import Share.IDs import Share.IDs qualified as IDs @@ -42,7 +43,6 @@ import Share.Web.Share.Releases.Types import Share.Web.Share.Releases.Types qualified as API import Share.Web.Share.Types import Share.Web.UCM.Sync.Impl qualified as SyncQ -import Servant import Unison.Codebase.Path qualified as Path import Unison.HashQualified qualified as HQ import Unison.Name (Name) @@ -97,7 +97,7 @@ getProjectRelease projectReleaseShortHand = do addRequestTag "release" (IDs.toText projectReleaseShortHand) onNothingM missingRelease . PG.runTransaction . runMaybeT $ do release@Release {projectId} <- MaybeT $ Q.releaseByProjectReleaseShortHand projectReleaseShortHand - project <- MaybeT $ Q.projectById projectId + project <- lift $ Q.expectProjectById projectId pure (project, release) where missingRelease = respondError (EntityMissing (ErrorID "missing-project-release") "Release could not be found") @@ -285,9 +285,7 @@ getProjectReleaseEndpoint (AuthN.MaybeAuthedUserID callerUserId) userHandle proj `whenNothingM` throwError (EntityMissing (ErrorID "missing-project-release") "Release could not be found") releaseWithHandle <- forOf releaseUsers_ release \uid -> do User.handle <$> Q.userByUserId uid `whenNothingM` throwError (EntityMissing (ErrorID "missing-user") "User could not be found") - project <- - Q.projectById projectId - `whenNothingM` throwError (EntityMissing (ErrorID "missing-project") "Project could not be found") + project <- Q.expectProjectById projectId releaseWithCausalHashes <- CausalQ.expectCausalHashesByIdsOf releaseCausals_ releaseWithHandle pure (project, releaseWithCausalHashes) _authZReceipt <- AuthZ.permissionGuard $ AuthZ.checkReleaseGet callerUserId project release diff --git a/src/Share/Web/UCM/Projects/Impl.hs b/src/Share/Web/UCM/Projects/Impl.hs index d40ec76..713180a 100644 --- a/src/Share/Web/UCM/Projects/Impl.hs +++ b/src/Share/Web/UCM/Projects/Impl.hs @@ -7,6 +7,7 @@ module Share.Web.UCM.Projects.Impl (server, createProjectRelease, getBestNameLoo import Control.Monad.Except import Control.Monad.Trans.Maybe import Data.List.Extra qualified as List +import Servant import Share.Branch import Share.Branch qualified as Branch import Share.Codebase qualified as Codebase @@ -38,7 +39,6 @@ import Share.Web.Errors qualified as Errors import Share.Web.Share.Contributions.MergeDetection qualified as MergeDetection import Share.Web.UCM.Sync.HashJWT qualified as HashJWT import Share.Web.UCM.Sync.Impl qualified as SyncQ -import Servant import Unison.Share.API.Projects qualified as UCMProjects import UnliftIO qualified import UnliftIO.Concurrent qualified as UnliftIO diff --git a/src/Unison/Server/Share/DefinitionSummary.hs b/src/Unison/Server/Share/DefinitionSummary.hs index 5680afd..0b48673 100644 --- a/src/Unison/Server/Share/DefinitionSummary.hs +++ b/src/Unison/Server/Share/DefinitionSummary.hs @@ -19,6 +19,8 @@ module Unison.Server.Share.DefinitionSummary where import Data.Aeson +import Servant (Capture, QueryParam, (:>)) +import Servant.Server (err500) import Share.Backend qualified as Backend import Share.Codebase qualified as Codebase import Share.Codebase.Types (CodebaseM) @@ -29,8 +31,6 @@ import Share.Postgres.NameLookups.Ops qualified as NLOps import Share.Postgres.NameLookups.Types (PathSegments (..)) import Share.Utils.Logging qualified as Logging import Share.Web.Errors (ToServerError (..)) -import Servant (Capture, QueryParam, (:>)) -import Servant.Server (err500) import Unison.Codebase.Editor.DisplayObject (DisplayObject (..)) import Unison.Codebase.Path qualified as Path import Unison.Codebase.ShortCausalHash (ShortCausalHash) @@ -100,6 +100,14 @@ instance ToJSON TermSummary where "tag" .= tag ] +instance FromJSON TermSummary where + parseJSON = withObject "TermSummary" $ \o -> do + displayName <- o .: "displayName" + hash <- o .: "hash" + summary <- o .: "summary" + tag <- o .: "tag" + pure $ TermSummary {..} + serveTermSummary :: Referent -> Maybe Name -> @@ -166,6 +174,14 @@ instance ToJSON TypeSummary where "tag" .= tag ] +instance FromJSON TypeSummary where + parseJSON = withObject "TypeSummary" $ \o -> do + displayName <- o .: "displayName" + hash <- o .: "hash" + summary <- o .: "summary" + tag <- o .: "tag" + pure $ TypeSummary {..} + serveTypeSummary :: Reference -> Maybe Name -> From c3aad71b52f318403f9722b391f2dceb58980006 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 8 May 2024 16:29:33 -0700 Subject: [PATCH 08/48] Refactor summary code for reuse --- src/Unison/Server/Share/DefinitionSummary.hs | 32 +++++++++++++++----- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/Unison/Server/Share/DefinitionSummary.hs b/src/Unison/Server/Share/DefinitionSummary.hs index 5680afd..f91a8fd 100644 --- a/src/Unison/Server/Share/DefinitionSummary.hs +++ b/src/Unison/Server/Share/DefinitionSummary.hs @@ -11,26 +11,28 @@ module Unison.Server.Share.DefinitionSummary ( TermSummaryAPI, serveTermSummary, + termSummaryForReferent, TermSummary (..), TypeSummaryAPI, serveTypeSummary, + typeSummaryForReference, TypeSummary (..), ) where import Data.Aeson +import Servant (Capture, QueryParam, (:>)) +import Servant.Server (err500) import Share.Backend qualified as Backend import Share.Codebase qualified as Codebase import Share.Codebase.Types (CodebaseM) import Share.Postgres (QueryM (unrecoverableError)) import Share.Postgres.Hashes.Queries qualified as HashQ -import Share.Postgres.IDs (CausalId) +import Share.Postgres.IDs (BranchHashId, CausalId) import Share.Postgres.NameLookups.Ops qualified as NLOps import Share.Postgres.NameLookups.Types (PathSegments (..)) import Share.Utils.Logging qualified as Logging import Share.Web.Errors (ToServerError (..)) -import Servant (Capture, QueryParam, (:>)) -import Servant.Server (err500) import Unison.Codebase.Editor.DisplayObject (DisplayObject (..)) import Unison.Codebase.Path qualified as Path import Unison.Codebase.ShortCausalHash (ShortCausalHash) @@ -108,12 +110,22 @@ serveTermSummary :: Maybe Width -> CodebaseM e TermSummary serveTermSummary referent mayName rootCausalId relativeTo mayWidth = do + rootBranchHashId <- HashQ.expectNamespaceIdsByCausalIdsOf id rootCausalId + termSummaryForReferent referent mayName rootBranchHashId relativeTo mayWidth + +termSummaryForReferent :: + Referent -> + Maybe Name -> + BranchHashId -> + Maybe Path.Path -> + Maybe Width -> + CodebaseM e TermSummary +termSummaryForReferent referent mayName rootBranchHashId relativeTo mayWidth = do let shortHash = Referent.toShortHash referent let displayName = maybe (HQ.HashOnly shortHash) HQ.NameOnly mayName let relativeToPath = fromMaybe Path.empty relativeTo let termReference = Referent.toReference referent let v2Referent = Cv.referent1to2 referent - rootBranchHashId <- HashQ.expectNamespaceIdsByCausalIdsOf id rootCausalId sig <- Codebase.loadTypeOfReferent v2Referent case sig of Nothing -> @@ -169,11 +181,17 @@ instance ToJSON TypeSummary where serveTypeSummary :: Reference -> Maybe Name -> - CausalId -> - Maybe Path.Path -> Maybe Width -> CodebaseM e TypeSummary -serveTypeSummary reference mayName _root _relativeTo mayWidth = do +serveTypeSummary reference mayName mayWidth = do + typeSummaryForReference reference mayName mayWidth + +typeSummaryForReference :: + Reference -> + Maybe Name -> + Maybe Width -> + CodebaseM e TypeSummary +typeSummaryForReference reference mayName mayWidth = do let shortHash = Reference.toShortHash reference let displayName = maybe (HQ.HashOnly shortHash) HQ.NameOnly mayName tag <- Backend.getTypeTag reference From df1b52c273c6c2f8ec2d5bb1dfd95d624a6d857a Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 8 May 2024 17:28:39 -0700 Subject: [PATCH 09/48] Move unsafeTransactionIO into QueryM --- src/Share/Postgres.hs | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/Share/Postgres.hs b/src/Share/Postgres.hs index 68b1680..64f1b3d 100644 --- a/src/Share/Postgres.hs +++ b/src/Share/Postgres.hs @@ -37,7 +37,6 @@ module Share.Postgres runSessionWithPool, tryRunSessionWithPool, unliftSession, - transactionUnsafeIO, defaultIsolationLevel, -- * query Helpers @@ -71,6 +70,13 @@ import Data.Map qualified as Map import Data.Maybe import Data.Text qualified as Text import Data.Void +import Hasql.Decoders qualified as Decoders +import Hasql.Encoders qualified as Encoders +import Hasql.Interpolate qualified as Interp +import Hasql.Pool qualified as Pool +import Hasql.Session qualified as Hasql +import Hasql.Session qualified as Session +import Hasql.Statement qualified as Hasql import Share.App import Share.Env qualified as Env import Share.Postgres.Orphans () @@ -79,13 +85,6 @@ import Share.Utils.Logging (Loggable (..)) import Share.Utils.Logging qualified as Logging import Share.Web.App import Share.Web.Errors (ErrorID (..), SomeServerError, ToServerError (..), internalServerError, respondError, someServerError) -import Hasql.Decoders qualified as Decoders -import Hasql.Encoders qualified as Encoders -import Hasql.Interpolate qualified as Interp -import Hasql.Pool qualified as Pool -import Hasql.Session qualified as Hasql -import Hasql.Session qualified as Session -import Hasql.Statement qualified as Hasql debug :: Bool debug = False @@ -144,9 +143,6 @@ data IsolationLevel | Serializable deriving stock (Show, Eq) -transactionUnsafeIO :: IO a -> Transaction e a -transactionUnsafeIO io = Transaction (Right <$> liftIO io) - -- | Run a transaction in a session transaction :: forall e a. IsolationLevel -> Mode -> Transaction e a -> Session e a transaction isoLevel mode (Transaction t) = do @@ -296,6 +292,10 @@ runSessionOrRespondError t = tryRunSession t >>= either respondError pure class Monad m => QueryM m where statement :: q -> Hasql.Statement q r -> m r + -- | Allow running IO actions in a transaction. These actions may be run multiple times if + -- the transaction is retried. + transactionUnsafeIO :: IO a -> m a + -- | Fail the transaction and whole request with an unrecoverable server error. unrecoverableError :: (HasCallStack, ToServerError e, Loggable e, Show e) => e -> m a @@ -304,6 +304,8 @@ instance QueryM (Transaction e) where when debug $ transactionUnsafeIO $ BSC.putStrLn bs transactionStatement q s + transactionUnsafeIO io = Transaction (Right <$> liftIO io) + unrecoverableError e = Transaction (pure (Left (Unrecoverable (someServerError e)))) instance QueryM (Session e) where @@ -311,16 +313,22 @@ instance QueryM (Session e) where when debug $ liftIO $ BSC.putStrLn bs lift $ Session.statement q s + transactionUnsafeIO io = lift $ liftIO io + unrecoverableError e = throwError (Unrecoverable (someServerError e)) instance QueryM m => QueryM (ReaderT e m) where statement q s = lift $ statement q s + transactionUnsafeIO io = lift $ transactionUnsafeIO io + unrecoverableError e = lift $ unrecoverableError e instance QueryM m => QueryM (MaybeT m) where statement q s = lift $ statement q s + transactionUnsafeIO io = lift $ transactionUnsafeIO io + unrecoverableError e = lift $ unrecoverableError e prepareStatements :: Bool From 92203f9be88578c447a5d6dc952df542cba29769 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 8 May 2024 17:58:33 -0700 Subject: [PATCH 10/48] Compute search tokens --- .../BackgroundJobs/Search/DefinitionSync.hs | 77 ++++++++++++++++++- .../Search/DefinitionSync/Types.hs | 18 +++-- src/Share/Web/Share/Branches/Impl.hs | 2 +- src/Share/Web/Share/Releases/Impl.hs | 2 +- 4 files changed, 87 insertions(+), 12 deletions(-) diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 3c5b351..722acd2 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -1,8 +1,14 @@ +{-# LANGUAGE DataKinds #-} + module Share.BackgroundJobs.Search.DefinitionSync (worker) where +import Control.Lens +import Data.Generics.Product (HasField (..)) +import Data.Map qualified as Map +import Data.Set qualified as Set import Ki.Unlifted qualified as Ki import Share.BackgroundJobs.Monad (Background) -import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), TermOrTypeSummary (..)) +import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence, TermOrTypeSummary (..), VarId) import Share.BackgroundJobs.Workers (newWorker) import Share.Codebase (CodebaseM) import Share.Codebase qualified as Codebase @@ -11,7 +17,7 @@ import Share.Postgres (QueryM (transactionUnsafeIO)) import Share.Postgres qualified as PG import Share.Postgres.Cursors qualified as Cursors import Share.Postgres.Hashes.Queries qualified as HashQ -import Share.Postgres.IDs (BranchHashId) +import Share.Postgres.IDs (BranchHashId, ComponentHash) import Share.Postgres.NameLookups.Ops qualified as NLOps import Share.Postgres.Queries qualified as PG import Share.Postgres.Search.DefinitionSync qualified as DefnSyncQ @@ -23,12 +29,15 @@ import Share.Utils.Logging qualified as Logging import Share.Web.Authorization qualified as AuthZ import U.Codebase.Referent (Referent) import U.Codebase.Referent qualified as Referent +import Unison.ABT qualified as ABT import Unison.Name (Name) import Unison.Parser.Ann (Ann) +import Unison.Reference (TypeReference) import Unison.Server.Share.DefinitionSummary qualified as Summary import Unison.ShortHash (ShortHash) import Unison.Symbol (Symbol) import Unison.Type qualified as Type +import Unison.Util.Monoid qualified as Monoid import UnliftIO qualified import UnliftIO.Concurrent qualified as UnliftIO @@ -99,6 +108,70 @@ syncTerms syncDD bhId projectShortHand releaseVersion termsCursor = syncDD dd pure () +-- | Compute the search tokens for a term given its name, hash, and type signature +tokensForTerm :: Name -> ComponentHash -> (Type.Type Symbol Ann) -> CodebaseM e (Set (DefnSearchToken Name)) +tokensForTerm name sh sig = do + sigTokens <- typeSigTokens + let baseTokens = Set.fromList [NameToken name, HashToken sh] + pure $ baseTokens <> sigTokens + where + typeSigTokens :: CodebaseM e (Set (DefnSearchToken Name)) + typeSigTokens = undefined + +data TokenGenState = TokenGenState + { typeRefOccs :: Map TypeReference Occurrence, + varOccs :: Map VarId Occurrence, + nextVarId :: VarId + } + deriving stock (Show, Generic) + +typeSigRefTokens :: forall v ann. Type.Type v ann -> Set (DefnSearchToken TypeReference) +typeSigRefTokens = flip evalStateT initState $ ABT.cata alg + where + initState = TokenGenState mempty mempty 0 + -- Cata algebra for collecting type reference tokens from a type signature. + alg :: + ann -> + ABT.ABT Type.F v (State TokenGenState (Set (DefnSearchToken TypeReference))) -> + State TokenGenState (Set (DefnSearchToken TypeReference)) + alg _ann = \case + ABT.Var v -> do + -- TODO: Figure out how Abs handles var scoping with foralls and such so I can + -- make sure to handle these right. + _ + ABT.Cycle a -> a + ABT.Abs v r -> + -- TODO: Handle var scoping + _ + ABT.Tm tf -> case tf of + Type.Ref typeRef -> do + -- Bump the occurrence count for this type reference returning the old count. Start + -- with 0 if unset. + occ <- nextTypeRefOcc typeRef + pure $ Set.singleton $ NameMentionToken typeRef occ + Type.Arrow a b -> do + aTokens <- a + bTokens <- b + pure $ aTokens <> bTokens + Type.Ann a _kind -> a + -- At the moment we don't handle higher kinded type applications differently than regular + -- type mentions. + Type.App a b -> do + aTokens <- a + bTokens <- b + pure $ aTokens <> bTokens + Type.Effect a b -> do + aTokens <- a + bTokens <- b + pure $ aTokens <> bTokens + Type.Effects as -> Monoid.foldMapM id as + Type.Forall a -> a + Type.IntroOuter a -> a + nextTypeRefOcc :: TypeReference -> State TokenGenState Occurrence + nextTypeRefOcc typeRef = field @"typeRefOccs" . at typeRef . non 0 <<%= succ + nextVarId :: State TokenGenState VarId + nextVarId = field @"nextVarId" <<%= succ + syncTypes = undefined syncDefinitionToCloud :: DefinitionDocument -> Background () diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index 091030a..f4b97ea 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -4,6 +4,8 @@ module Share.BackgroundJobs.Search.DefinitionSync.Types ( TermOrTypeSummary (..), DefinitionDocument (..), DefnSearchToken (..), + Occurrence (..), + VarId (..), ) where @@ -48,11 +50,11 @@ newtype Occurrence = Occurrence Int newtype VarId = VarId Int deriving newtype (Show, Read, Eq, Ord, Num, ToJSON) -data DefnSearchToken +data DefnSearchToken r = -- Allows searching by literal name NameToken Name - | -- Also includes ability mentions - NameMentionToken Name Occurrence + | -- A mention of some external type or ability + NameMentionToken r Occurrence | -- Allows searching for type sigs with type variables TypeVarToken VarId Occurrence | -- Allows searching by component hash @@ -75,14 +77,14 @@ data DefnSearchToken -- >>> hash = ComponentHash $ Hash.unsafeFromBase32HexText "abcd" -- >>> tokenToText (HashToken hash) -- "#abc0:hash" -tokenToText :: DefnSearchToken -> Text +tokenToText :: DefnSearchToken Name -> Text tokenToText = \case (NameToken n) -> Text.intercalate ":" [Name.toText n, "name"] (NameMentionToken n o) -> Text.intercalate ":" [Name.toText n, "mention", tShow o] (TypeVarToken v o) -> Text.intercalate ":" ["_", "var", tShow v, tShow o] (HashToken h) -> Text.intercalate ":" [into @Text $ PrefixedHash @"#" h, "hash"] -tokenFromText :: Text -> Maybe DefnSearchToken +tokenFromText :: Text -> Maybe (DefnSearchToken Name) tokenFromText t = case Text.splitOn ":" t of [name, "name"] -> NameToken <$> Name.parseText name [name, "mention", occ] -> NameMentionToken <$> (Name.parseText name) <*> readMaybe (Text.unpack occ) @@ -98,15 +100,15 @@ data DefinitionDocument = DefinitionDocument releaseVersion :: ReleaseVersion, fqn :: Name, hash :: ShortHash, - tokens :: Set DefnSearchToken, + tokens :: Set (DefnSearchToken Name), payload :: TermOrTypeSummary } deriving (Show) -instance ToJSON DefnSearchToken where +instance ToJSON (DefnSearchToken Name) where toJSON = String . tokenToText -instance FromJSON DefnSearchToken where +instance FromJSON (DefnSearchToken Name) where parseJSON = withText "DefnSearchToken" $ \t -> maybe (fail $ "Invalid DefnSearchToken: " <> Text.unpack t) pure $ tokenFromText t diff --git a/src/Share/Web/Share/Branches/Impl.hs b/src/Share/Web/Share/Branches/Impl.hs index d4b3209..6a9b344 100644 --- a/src/Share/Web/Share/Branches/Impl.hs +++ b/src/Share/Web/Share/Branches/Impl.hs @@ -221,7 +221,7 @@ projectBranchTypeSummaryEndpoint (AuthN.MaybeAuthedUserID callerUserId) userHand causalId <- resolveRootHash codebase branchHead rootHash Codebase.cachedCodebaseResponse authZReceipt codebaseLoc "project-branch-type-summary" cacheParams causalId $ do Codebase.runCodebaseTransaction codebase $ do - serveTypeSummary ref mayName causalId relativeTo renderWidth + serveTypeSummary ref mayName renderWidth where projectBranchShortHand = ProjectBranchShortHand {userHandle, projectSlug, contributorHandle, branchName} cacheParams = [IDs.toText projectBranchShortHand, toUrlPiece ref, maybe "" Name.toText mayName, tShow $ fromMaybe Path.empty relativeTo, foldMap toUrlPiece renderWidth] diff --git a/src/Share/Web/Share/Releases/Impl.hs b/src/Share/Web/Share/Releases/Impl.hs index 5ad30e1..34f4b76 100644 --- a/src/Share/Web/Share/Releases/Impl.hs +++ b/src/Share/Web/Share/Releases/Impl.hs @@ -218,7 +218,7 @@ projectReleaseTypeSummaryEndpoint (AuthN.MaybeAuthedUserID callerUserId) userHan let codebase = Codebase.codebaseEnv authZReceipt codebaseLoc Codebase.cachedCodebaseResponse authZReceipt codebaseLoc "project-release-type-summary" cacheParams releaseHead $ do Codebase.runCodebaseTransaction codebase $ do - serveTypeSummary ref mayName releaseHead relativeTo renderWidth + serveTypeSummary ref mayName renderWidth where projectReleaseShortHand = ProjectReleaseShortHand {userHandle, projectSlug, releaseVersion} cacheParams = [IDs.toText projectReleaseShortHand, toUrlPiece ref, maybe "" Name.toText mayName, tShow $ fromMaybe Path.empty relativeTo, foldMap toUrlPiece renderWidth] From 63412603185a82b19015035f81d660f2eed35dd3 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 17 Jul 2024 11:20:14 -0700 Subject: [PATCH 11/48] Implement type -> tokens converter --- package.yaml | 1 + share-api.cabal | 2 + .../BackgroundJobs/Search/DefinitionSync.hs | 119 ++++++++++-------- .../Search/DefinitionSync/Types.hs | 45 +++++-- src/Share/Postgres/Cursors.hs | 20 ++- 5 files changed, 121 insertions(+), 66 deletions(-) diff --git a/package.yaml b/package.yaml index ddb6db3..a120944 100644 --- a/package.yaml +++ b/package.yaml @@ -68,6 +68,7 @@ dependencies: - memory - mmorph - monad-validate +- monoidal-containers - mtl - network - network-simple diff --git a/share-api.cabal b/share-api.cabal index d96722c..b3a6719 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -231,6 +231,7 @@ library , memory , mmorph , monad-validate + , monoidal-containers , mtl , network , network-simple @@ -371,6 +372,7 @@ executable share-api , memory , mmorph , monad-validate + , monoidal-containers , mtl , network , network-simple diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 722acd2..c648af5 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -3,12 +3,16 @@ module Share.BackgroundJobs.Search.DefinitionSync (worker) where import Control.Lens +import Control.Monad.Except +import Data.Either (fromLeft) import Data.Generics.Product (HasField (..)) import Data.Map qualified as Map +import Data.Map.Monoidal.Strict (MonoidalMap) +import Data.Map.Monoidal.Strict qualified as MonMap import Data.Set qualified as Set import Ki.Unlifted qualified as Ki import Share.BackgroundJobs.Monad (Background) -import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence, TermOrTypeSummary (..), VarId) +import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence (Occurrence), TermOrTypeSummary (..), VarId) import Share.BackgroundJobs.Workers (newWorker) import Share.Codebase (CodebaseM) import Share.Codebase qualified as Codebase @@ -38,9 +42,13 @@ import Unison.ShortHash (ShortHash) import Unison.Symbol (Symbol) import Unison.Type qualified as Type import Unison.Util.Monoid qualified as Monoid +import Unison.Var qualified as Var import UnliftIO qualified import UnliftIO.Concurrent qualified as UnliftIO +data DefnIndexingFailure + = NoTypeSigForTerm Name Referent + -- | How often to poll for new releases to sync in seconds. pollingIntervalSeconds :: Int pollingIntervalSeconds = 10 @@ -82,73 +90,78 @@ syncRelease authZReceipt syncDD releaseId = fmap (fromMaybe ()) . runMaybeT $ do syncTypes syncDD projectShortHand releaseVersion typesCursor syncTerms :: + _ -> (DefinitionDocument -> CodebaseM e ()) -> BranchHashId -> ProjectShortHand -> ReleaseVersion -> Cursors.PGCursor (Name, Referent) -> - CodebaseM e () -syncTerms syncDD bhId projectShortHand releaseVersion termsCursor = - Cursors.fetchN defnBatchSize termsCursor >>= \case - Nothing -> pure () - Just terms -> do - for terms \(fqn, ref) -> do - let sh = Referent.toShortHash ref - sig <- Codebase.loadTypeOfReferent ref - termSummary <- Summary.termSummaryForReferent ref sig (Just fqn) bhId Nothing Nothing - let dd = - DefinitionDocument - { projectShortHand, - releaseVersion, - fqn, - hash = sh, - tokens, - payload = TermSummary termSummary - } - syncDD dd - pure () + CodebaseM e [DefnIndexingFailure] +syncTerms np syncDD bhId projectShortHand releaseVersion termsCursor = + Cursors.foldBatched termsCursor defnBatchSize \terms -> do + terms & foldMapM \(fqn, ref) -> fmap (either (pure @[]) (const [])) . runExceptT $ do + typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) + termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing + let sh = Referent.toShortHash ref + let refTokens = tokensForTerm fqn ref typ termSummary + ppedForReferences + let dd = + DefinitionDocument + { projectShortHand, + releaseVersion, + fqn, + hash = sh, + tokens, + payload = TermSummary termSummary + } + syncDD dd -- | Compute the search tokens for a term given its name, hash, and type signature -tokensForTerm :: Name -> ComponentHash -> (Type.Type Symbol Ann) -> CodebaseM e (Set (DefnSearchToken Name)) -tokensForTerm name sh sig = do - sigTokens <- typeSigTokens - let baseTokens = Set.fromList [NameToken name, HashToken sh] - pure $ baseTokens <> sigTokens - where - typeSigTokens :: CodebaseM e (Set (DefnSearchToken Name)) - typeSigTokens = undefined +tokensForTerm :: Name -> Referent -> Type.Type v a -> Summary.TermSummary -> Set (DefnSearchToken TypeReference) +tokensForTerm name ref typ (Summary.TermSummary {tag}) = do + let sh = Referent.toShortHash ref + baseTokens = Set.fromList [NameToken name, HashToken sh] + tagTokens = Set.singleton $ TermTagToken tag + in baseTokens <> typeSigTokens typ <> tagTokens + +data TokenGenEnv v = TokenGenEnv + { varIds :: Map v VarId + } + deriving stock (Show, Generic) -data TokenGenState = TokenGenState - { typeRefOccs :: Map TypeReference Occurrence, - varOccs :: Map VarId Occurrence, - nextVarId :: VarId +data TokenGenState v = TokenGenState + { nextVarId :: VarId } deriving stock (Show, Generic) -typeSigRefTokens :: forall v ann. Type.Type v ann -> Set (DefnSearchToken TypeReference) -typeSigRefTokens = flip evalStateT initState $ ABT.cata alg +type TokenGenM v = ReaderT (TokenGenEnv v) (State (TokenGenState v)) + +-- | Compute var occurrence and type ref occurrence search tokens from a type signature. +typeSigTokens :: forall v ann. (Var.Var v) => Type.Type v ann -> Set (DefnSearchToken TypeReference) +typeSigTokens typ = + let occMap :: MonoidalMap (Either VarId TypeReference) Occurrence + occMap = flip evalState initState . runReaderT mempty $ ABT.cata alg typ + in MonMap.toList occMap & foldMap \case + (Left vId, occ) -> Set.singleton (TypeVarToken vId occ) + (Right typeRef, occ) -> Set.singleton (TypeMentionToken typeRef occ) where - initState = TokenGenState mempty mempty 0 + initState = TokenGenState 0 -- Cata algebra for collecting type reference tokens from a type signature. alg :: ann -> - ABT.ABT Type.F v (State TokenGenState (Set (DefnSearchToken TypeReference))) -> - State TokenGenState (Set (DefnSearchToken TypeReference)) + ABT.ABT Type.F v (TokenGenM v (MonoidalMap (Either VarId TypeReference) Occurrence)) -> + TokenGenM v (MonoidalMap (Either VarId TypeReference) Occurrence) alg _ann = \case ABT.Var v -> do - -- TODO: Figure out how Abs handles var scoping with foralls and such so I can - -- make sure to handle these right. - _ + vId <- varIdFor v + pure $ MonMap.singleton (Left vId) 1 ABT.Cycle a -> a - ABT.Abs v r -> - -- TODO: Handle var scoping - _ + ABT.Abs v r -> do + vId <- nextVarId + local (field @"varIds" . at v ?~ vId) r ABT.Tm tf -> case tf of Type.Ref typeRef -> do - -- Bump the occurrence count for this type reference returning the old count. Start - -- with 0 if unset. - occ <- nextTypeRefOcc typeRef - pure $ Set.singleton $ NameMentionToken typeRef occ + pure $ MonMap.singleton (Right typeRef) 1 Type.Arrow a b -> do aTokens <- a bTokens <- b @@ -167,10 +180,14 @@ typeSigRefTokens = flip evalStateT initState $ ABT.cata alg Type.Effects as -> Monoid.foldMapM id as Type.Forall a -> a Type.IntroOuter a -> a - nextTypeRefOcc :: TypeReference -> State TokenGenState Occurrence - nextTypeRefOcc typeRef = field @"typeRefOccs" . at typeRef . non 0 <<%= succ - nextVarId :: State TokenGenState VarId + nextVarId :: TokenGenM v VarId nextVarId = field @"nextVarId" <<%= succ + varIdFor :: v -> TokenGenM v VarId + varIdFor v = do + asks (Map.lookup v . varIds) >>= \case + Just vid -> pure vid + Nothing -> do + error "typeRefTokens: Found variable without corresponding Abs in type signature" syncTypes = undefined diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index f4b97ea..d862971 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -1,4 +1,5 @@ {-# LANGUAGE DataKinds #-} +{-# LANGUAGE RecordWildCards #-} module Share.BackgroundJobs.Search.DefinitionSync.Types ( TermOrTypeSummary (..), @@ -10,6 +11,7 @@ module Share.BackgroundJobs.Search.DefinitionSync.Types where import Data.Aeson +import Data.Monoid (Sum (..)) import Data.Text qualified as Text import Share.IDs (PrefixedHash (..), ProjectShortHand, ReleaseVersion) import Share.IDs qualified as IDs @@ -17,24 +19,39 @@ import Share.Prelude import U.Codebase.HashTags (ComponentHash) import Unison.Hash qualified as Hash import Unison.Name (Name) -import Unison.Server.Share.DefinitionSummary (TermSummary, TypeSummary) +import Unison.Server.Share.DefinitionSummary (TermSummary (..), TypeSummary (..)) +import Unison.Server.Types (TermTag, TypeTag) import Unison.ShortHash (ShortHash) import Unison.ShortHash qualified as SH import Unison.Syntax.Name qualified as Name -data TermOrTypeSummary = TermSummary TermSummary | TypeSummary TypeSummary +data TermOrTypeSummary = ToTTermSummary TermSummary | ToTTypeSummary TypeSummary deriving (Show) instance ToJSON TermOrTypeSummary where - toJSON (TermSummary ts) = object ["kind" .= ("term" :: Text), "payload" .= ts] - toJSON (TypeSummary ts) = object ["kind" .= ("type" :: Text), "payload" .= ts] + toJSON (ToTTermSummary ts) = object ["kind" .= ("term" :: Text), "payload" .= ts] + toJSON (ToTTypeSummary ts) = object ["kind" .= ("type" :: Text), "payload" .= ts] instance FromJSON TermOrTypeSummary where parseJSON = withObject "TermOrTypeSummary" $ \o -> do kind :: Text <- o .: "kind" case kind of - "term" -> TermSummary <$> o .: "payload" - "type" -> TypeSummary <$> o .: "payload" + "term" -> do + ts <- o .: "payload" + ts & withObject "TermSummary" \o -> do + displayName <- o .: "displayName" + hash <- o .: "hash" + summary <- o .: "summary" + tag <- o .: "tag" + pure $ ToTTermSummary $ TermSummary {..} + "type" -> do + ts <- o .: "payload" + ts & withObject "TypeSummary" \o -> do + displayName <- o .: "displayName" + hash <- o .: "hash" + summary <- o .: "summary" + tag <- o .: "tag" + pure $ ToTTypeSummary $ TypeSummary {..} _ -> fail $ "Invalid kind: " <> Text.unpack kind -- | The number of occurences of this token in the search query. @@ -43,6 +60,8 @@ instance FromJSON TermOrTypeSummary where -- {NameMention "Text" (Occurrence 1), NameMention "Text" (Occurrence 2), NameMention "Text" (Occurrence 3)} newtype Occurrence = Occurrence Int deriving newtype (Show, Read, Eq, Ord, Num, ToJSON) + deriving (Semigroup) via Sum Int + deriving (Monoid) via Sum Int -- | An id for identifying unique type variables mentioned in a query. -- E.g. 'map : (a -> b) -> List a -> List b' would have two type var Ids, one for a, one @@ -54,11 +73,15 @@ data DefnSearchToken r = -- Allows searching by literal name NameToken Name | -- A mention of some external type or ability - NameMentionToken r Occurrence + TypeMentionToken r Occurrence | -- Allows searching for type sigs with type variables TypeVarToken VarId Occurrence | -- Allows searching by component hash - HashToken ComponentHash + -- Note: not actually a _short_ hash, it's a full hash with the referent info tagged + -- on. + HashToken ShortHash + | TermTagToken TermTag + | TypeTagToken TypeTag deriving (Show, Eq, Ord) -- | Converts a DefnSearchToken to a prefix-searchable text string. @@ -66,7 +89,7 @@ data DefnSearchToken r -- >>> tokenToText (NameToken (Name.unsafeParseText "List.map")) -- "List.map:name" -- --- >>> tokenToText (NameMentionToken (Name.unsafeParseText "List.map") (Occurrence 1)) +-- >>> tokenToText (TypeMentionToken (Name.unsafeParseText "List.map") (Occurrence 1)) -- "List.map:mention:1" -- -- >>> tokenToText (TypeVarToken (VarId 1) (Occurrence 1)) @@ -80,14 +103,14 @@ data DefnSearchToken r tokenToText :: DefnSearchToken Name -> Text tokenToText = \case (NameToken n) -> Text.intercalate ":" [Name.toText n, "name"] - (NameMentionToken n o) -> Text.intercalate ":" [Name.toText n, "mention", tShow o] + (TypeMentionToken n o) -> Text.intercalate ":" [Name.toText n, "mention", tShow o] (TypeVarToken v o) -> Text.intercalate ":" ["_", "var", tShow v, tShow o] (HashToken h) -> Text.intercalate ":" [into @Text $ PrefixedHash @"#" h, "hash"] tokenFromText :: Text -> Maybe (DefnSearchToken Name) tokenFromText t = case Text.splitOn ":" t of [name, "name"] -> NameToken <$> Name.parseText name - [name, "mention", occ] -> NameMentionToken <$> (Name.parseText name) <*> readMaybe (Text.unpack occ) + [name, "mention", occ] -> TypeMentionToken <$> (Name.parseText name) <*> readMaybe (Text.unpack occ) [_, "var", vid, occ] -> TypeVarToken <$> readMaybe (Text.unpack vid) <*> readMaybe (Text.unpack occ) [prefixedHash, "hash"] -> case Text.stripPrefix "#" prefixedHash of diff --git a/src/Share/Postgres/Cursors.hs b/src/Share/Postgres/Cursors.hs index d9b4139..b6fc2c7 100644 --- a/src/Share/Postgres/Cursors.hs +++ b/src/Share/Postgres/Cursors.hs @@ -5,6 +5,7 @@ module Share.Postgres.Cursors ( newRowCursor, newColCursor, fetchN, + foldBatched, PGCursor, ) where @@ -20,7 +21,7 @@ import System.Random (randomIO) data PGCursor result where PGCursor :: forall row result. - DecodeRow row {- decoder for original row -} => + (DecodeRow row {- decoder for original row -}) => Text {- cursor name -} -> (row -> result {- mapper for Functor instance -}) -> PGCursor result @@ -38,7 +39,7 @@ newColCursor namePrefix query = do -- -- This cursor will be closed when the transaction ends, and must not be used outside of the -- transaction in which it was created. -newRowCursor :: forall r m. QueryM m => DecodeRow r => Text -> Sql -> m (PGCursor r) +newRowCursor :: forall r m. (QueryM m) => (DecodeRow r) => Text -> Sql -> m (PGCursor r) newRowCursor namePrefix query = do uuid <- transactionUnsafeIO $ randomIO @UUID let cursorName = namePrefix <> "_" <> into @Text uuid @@ -53,10 +54,21 @@ newRowCursor namePrefix query = do pure $ PGCursor cursorName id -- | Fetch UP TO the next N results from the cursor. If there are no more rows, returns Nothing. -fetchN :: forall r m. QueryM m => Int32 -> PGCursor r -> m (Maybe (NonEmpty r)) -fetchN n (PGCursor cursorName f) = do +fetchN :: forall r m. (QueryM m) => PGCursor r -> Int32 -> m (Maybe (NonEmpty r)) +fetchN (PGCursor cursorName f) n = do rows <- queryListRows [sql| FETCH FORWARD #{n} FROM #{cursorName} |] pure $ NEL.nonEmpty (f <$> rows) + +-- | Fold over the cursor in batches of N rows. +-- N.B. Fold is strict in the accumulator. +foldBatched :: forall r m a. (QueryM m, Monoid a) => PGCursor r -> Int32 -> (NonEmpty r -> m a) -> m a +foldBatched cursor batchSize f = do + batch <- fetchN cursor batchSize + case batch of + Nothing -> pure mempty + Just rows -> do + acc <- f rows + (acc <>) <$!> foldBatched cursor batchSize f From 20914b31774c43b2f8a64ff88dac9dd7d80ac5f6 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 17 Jul 2024 11:30:26 -0700 Subject: [PATCH 12/48] Bump unison dep --- unison | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unison b/unison index 605e062..cdab05d 160000 --- a/unison +++ b/unison @@ -1 +1 @@ -Subproject commit 605e062bcfc3118ee83ca35c7d86a4036adea22f +Subproject commit cdab05d25992537ea1ce699a1dbe2d4413810b41 From 13fc6c8d9875446961554eadf1398e9865fdb2f3 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 17 Jul 2024 11:30:32 -0700 Subject: [PATCH 13/48] Mostly finished creating term definition docs --- .../BackgroundJobs/Search/DefinitionSync.hs | 74 +++++++++++-------- .../Search/DefinitionSync/Types.hs | 46 ++++++------ src/Share/Postgres/Search/DefinitionSync.hs | 6 ++ 3 files changed, 71 insertions(+), 55 deletions(-) diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index c648af5..24e1c80 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -10,6 +10,7 @@ import Data.Map qualified as Map import Data.Map.Monoidal.Strict (MonoidalMap) import Data.Map.Monoidal.Strict qualified as MonMap import Data.Set qualified as Set +import Data.Set.Lens (setOf) import Ki.Unlifted qualified as Ki import Share.BackgroundJobs.Monad (Background) import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence (Occurrence), TermOrTypeSummary (..), VarId) @@ -23,6 +24,7 @@ import Share.Postgres.Cursors qualified as Cursors import Share.Postgres.Hashes.Queries qualified as HashQ import Share.Postgres.IDs (BranchHashId, ComponentHash) import Share.Postgres.NameLookups.Ops qualified as NLOps +import Share.Postgres.NameLookups.Types qualified as NL import Share.Postgres.Queries qualified as PG import Share.Postgres.Search.DefinitionSync qualified as DefnSyncQ import Share.Prelude @@ -34,14 +36,21 @@ import Share.Web.Authorization qualified as AuthZ import U.Codebase.Referent (Referent) import U.Codebase.Referent qualified as Referent import Unison.ABT qualified as ABT +import Unison.HashQualifiedPrime qualified as HQ' +import Unison.LabeledDependency qualified as LD import Unison.Name (Name) +import Unison.Name qualified as Name import Unison.Parser.Ann (Ann) +import Unison.PrettyPrintEnv qualified as PPE +import Unison.PrettyPrintEnvDecl qualified as PPED +import Unison.PrettyPrintEnvDecl.Postgres qualified as PPEPostgres import Unison.Reference (TypeReference) import Unison.Server.Share.DefinitionSummary qualified as Summary import Unison.ShortHash (ShortHash) import Unison.Symbol (Symbol) import Unison.Type qualified as Type import Unison.Util.Monoid qualified as Monoid +import Unison.Util.Set qualified as Set import Unison.Var qualified as Var import UnliftIO qualified import UnliftIO.Concurrent qualified as UnliftIO @@ -62,14 +71,15 @@ worker scope = newWorker scope "search:defn-sync" $ forever do liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 Logging.logInfoText "Syncing definitions..." authZReceipt <- AuthZ.backgroundJobAuthZ - toIO <- UnliftIO.askRunInIO - let syncDD = transactionUnsafeIO . toIO . syncDefinitionToCloud PG.runTransaction $ do mayReleaseId <- DefnSyncQ.claimUnsyncedRelease for_ mayReleaseId (syncRelease authZReceipt syncDD) -syncRelease :: AuthZ.AuthZReceipt -> (DefinitionDocument -> CodebaseM e ()) -> ReleaseId -> PG.Transaction e () -syncRelease authZReceipt syncDD releaseId = fmap (fromMaybe ()) . runMaybeT $ do +syncRelease :: + AuthZ.AuthZReceipt -> + ReleaseId -> + PG.Transaction e () +syncRelease authZReceipt releaseId = fmap (fromMaybe ()) . runMaybeT $ do Release {projectId, squashedCausal, version = releaseVersion} <- lift $ PG.expectReleaseById releaseId Project {slug, ownerUserId, visibility = projectVis} <- lift $ PG.expectProjectById projectId User {handle, visibility = userVis} <- PG.expectUserByUserId ownerUserId @@ -79,42 +89,50 @@ syncRelease authZReceipt syncDD releaseId = fmap (fromMaybe ()) . runMaybeT $ do guard $ userVis == UserPublic lift $ do bhId <- HashQ.expectNamespaceIdsByCausalIdsOf id squashedCausal - nlReceipt <- NLOps.ensureNameLookupForBranchId bhId + namesPerspective <- NLOps.namesPerspectiveForRootAndPath bhId (NL.PathSegments []) + let nlReceipt = NL.nameLookupReceipt namesPerspective let codebaseLoc = Codebase.codebaseLocationForProjectRelease ownerUserId let codebase = Codebase.codebaseEnv authZReceipt codebaseLoc Codebase.codebaseMToTransaction codebase $ do termsCursor <- lift $ NLOps.termsWithinNamespace nlReceipt bhId let projectShortHand = ProjectShortHand handle slug - syncTerms syncDD bhId projectShortHand releaseVersion termsCursor + syncTerms namesPerspective bhId projectShortHand releaseVersion termsCursor typesCursor <- lift $ NLOps.typesWithinNamespace nlReceipt bhId - syncTypes syncDD projectShortHand releaseVersion typesCursor + syncTypes projectShortHand releaseVersion typesCursor syncTerms :: - _ -> - (DefinitionDocument -> CodebaseM e ()) -> + NL.NamesPerspective -> BranchHashId -> ProjectShortHand -> ReleaseVersion -> Cursors.PGCursor (Name, Referent) -> CodebaseM e [DefnIndexingFailure] -syncTerms np syncDD bhId projectShortHand releaseVersion termsCursor = +syncTerms namesPerspective bhId projectShortHand releaseVersion termsCursor = Cursors.foldBatched termsCursor defnBatchSize \terms -> do - terms & foldMapM \(fqn, ref) -> fmap (either (pure @[]) (const [])) . runExceptT $ do - typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) - termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing - let sh = Referent.toShortHash ref - let refTokens = tokensForTerm fqn ref typ termSummary - ppedForReferences - let dd = - DefinitionDocument - { projectShortHand, - releaseVersion, - fqn, - hash = sh, - tokens, - payload = TermSummary termSummary - } - syncDD dd + docs <- + terms & foldMapM \(fqn, ref) -> fmap (either (pure @[]) (const [])) . runExceptT $ do + typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) + termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing + let sh = Referent.toShortHash ref + let refTokens = tokensForTerm fqn ref typ termSummary + let deps = setOf (folded . folded . to LD.TypeReference) refTokens + pped <- PPEPostgres.ppedForReferences namesPerspective deps + let ppe = PPED.unsuffixifiedPPE pped + let namedTokens = + refTokens & Set.mapMaybe \token -> do + fqn <- traverse (PPE.types ppe) token + pure $ Name.lastSegment . HQ'.toName <$> fqn + let dd = + DefinitionDocument + { projectShortHand, + releaseVersion, + fqn, + hash = sh, + tokens = namedTokens, + payload = ToTTermSummary termSummary + } + pure dd + PG.insertDefinitionDocuments docs -- | Compute the search tokens for a term given its name, hash, and type signature tokensForTerm :: Name -> Referent -> Type.Type v a -> Summary.TermSummary -> Set (DefnSearchToken TypeReference) @@ -190,7 +208,3 @@ typeSigTokens typ = error "typeRefTokens: Found variable without corresponding Abs in type signature" syncTypes = undefined - -syncDefinitionToCloud :: DefinitionDocument -> Background () -syncDefinitionToCloud dd = do - _ diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index d862971..e7cddca 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -19,6 +19,7 @@ import Share.Prelude import U.Codebase.HashTags (ComponentHash) import Unison.Hash qualified as Hash import Unison.Name (Name) +import Unison.NameSegment (NameSegment) import Unison.Server.Share.DefinitionSummary (TermSummary (..), TypeSummary (..)) import Unison.Server.Types (TermTag, TypeTag) import Unison.ShortHash (ShortHash) @@ -100,41 +101,36 @@ data DefnSearchToken r -- >>> hash = ComponentHash $ Hash.unsafeFromBase32HexText "abcd" -- >>> tokenToText (HashToken hash) -- "#abc0:hash" -tokenToText :: DefnSearchToken Name -> Text -tokenToText = \case - (NameToken n) -> Text.intercalate ":" [Name.toText n, "name"] - (TypeMentionToken n o) -> Text.intercalate ":" [Name.toText n, "mention", tShow o] - (TypeVarToken v o) -> Text.intercalate ":" ["_", "var", tShow v, tShow o] - (HashToken h) -> Text.intercalate ":" [into @Text $ PrefixedHash @"#" h, "hash"] - -tokenFromText :: Text -> Maybe (DefnSearchToken Name) -tokenFromText t = case Text.splitOn ":" t of - [name, "name"] -> NameToken <$> Name.parseText name - [name, "mention", occ] -> TypeMentionToken <$> (Name.parseText name) <*> readMaybe (Text.unpack occ) - [_, "var", vid, occ] -> TypeVarToken <$> readMaybe (Text.unpack vid) <*> readMaybe (Text.unpack occ) - [prefixedHash, "hash"] -> - case Text.stripPrefix "#" prefixedHash of - Just hash -> HashToken . into @ComponentHash <$> Hash.fromBase32HexText hash - Nothing -> Nothing - _ -> Nothing +-- tokenToText :: DefnSearchToken Name -> Text +-- tokenToText = \case +-- (NameToken n) -> Text.intercalate ":" [Name.toText n, "name"] +-- (TypeMentionToken n o) -> Text.intercalate ":" [Name.toText n, "mention", tShow o] +-- (TypeVarToken v o) -> Text.intercalate ":" ["_", "var", tShow v, tShow o] +-- (HashToken h) -> Text.intercalate ":" [into @Text $ PrefixedHash @"#" h, "hash"] + +-- tokenFromText :: Text -> Maybe (DefnSearchToken Name) +-- tokenFromText t = case Text.splitOn ":" t of +-- [name, "name"] -> NameToken <$> Name.parseText name +-- [name, "mention", occ] -> TypeMentionToken <$> (Name.parseText name) <*> readMaybe (Text.unpack occ) +-- [_, "var", vid, occ] -> TypeVarToken <$> readMaybe (Text.unpack vid) <*> readMaybe (Text.unpack occ) +-- [prefixedHash, "hash"] -> +-- case Text.stripPrefix "#" prefixedHash of +-- Just hash -> HashToken . into @ComponentHash <$> Hash.fromBase32HexText hash +-- Nothing -> Nothing +-- _ -> Nothing data DefinitionDocument = DefinitionDocument { projectShortHand :: ProjectShortHand, releaseVersion :: ReleaseVersion, fqn :: Name, hash :: ShortHash, - tokens :: Set (DefnSearchToken Name), + -- For now we only index types by their final name segment, may need to revisit this + -- in the future. + tokens :: Set (DefnSearchToken NameSegment), payload :: TermOrTypeSummary } deriving (Show) -instance ToJSON (DefnSearchToken Name) where - toJSON = String . tokenToText - -instance FromJSON (DefnSearchToken Name) where - parseJSON = withText "DefnSearchToken" $ \t -> - maybe (fail $ "Invalid DefnSearchToken: " <> Text.unpack t) pure $ tokenFromText t - -- | Formats a DefinitionDocument into a documentName -- -- >>> projectShortHand = IDs.ProjectShortHand "unison" "base" diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs index 44f321c..1e0ddbb 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -6,6 +6,7 @@ module Share.Postgres.Search.DefinitionSync ) where +import Share.BackgroundJobs.Search.DefinitionSync.Types import Share.IDs (ReleaseId) import Share.Postgres @@ -35,3 +36,8 @@ claimUnsyncedRelease = do WHERE global_definition_search_release_queue.id = chosen_release.id RETURNING chosen_release.id |] + +-- | Save definition documents to be indexed for search. +insertDefinitionDocuments :: [DefinitionDocument] -> Transaction e () +insertDefinitionDocuments docs = do + _ From b6ae6838dd63a170f47fde7730cbd177140aa188 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 17 Jul 2024 15:28:10 -0700 Subject: [PATCH 14/48] Mostly done syncTerms --- sql/2024-05-07-00-00_defn_search_sync.sql | 23 +++++ .../BackgroundJobs/Search/DefinitionSync.hs | 26 ++--- .../Search/DefinitionSync/Types.hs | 35 +------ src/Share/Postgres/Search/DefinitionSync.hs | 96 ++++++++++++++++++- 4 files changed, 135 insertions(+), 45 deletions(-) diff --git a/sql/2024-05-07-00-00_defn_search_sync.sql b/sql/2024-05-07-00-00_defn_search_sync.sql index f2cb8dd..b7f96a4 100644 --- a/sql/2024-05-07-00-00_defn_search_sync.sql +++ b/sql/2024-05-07-00-00_defn_search_sync.sql @@ -1,3 +1,7 @@ +-- Allows us to create composite indexes over traditionally non-GIN indexable types. +-- In this case it allows us to include the project_id and release_id in the GIN index for search tokens. +CREATE EXTENSION IF NOT EXISTS btree_gin; + -- New table for coordinating background job for syncing global definitions for search. -- Table of all releases which have been published, but not yet synced to the global definition search index. @@ -5,3 +9,22 @@ CREATE TABLE global_definition_search_release_queue ( release_id UUID PRIMARY KEY REFERENCES releases(id) ON DELETE CASCADE, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP ); + +CREATE TABLE global_definition_search_docs ( + project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + release_id UUID NOT NULL REFERENCES releases(id) ON DELETE CASCADE, + -- Fully qualified name + name TEXT NOT NULL, + search_tokens TSVECTOR NOT NULL, + -- Contains the rendered type signature, type, hash, etc. + -- so we don't need to look up types for hundreds of search results on the fly. + metadata JSONB NOT NULL, + + -- Ostensibly there's the possibility of name conflicts, + -- but those are rare enough we don't care, we just insert with ON CONFLICT DO NOTHING. + PRIMARY KEY (project_id, release_id, name) +); + +-- Index for searching global definitions by 'search token', with an optional project/release filter. +-- P.s. there's a search token type for name, so we don't need to index that separately. +CREATE INDEX global_definition_search_tokens ON global_definition_search_docs USING GIN(search_tokens, project_id, release_id); diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 24e1c80..bb41b5f 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -17,7 +17,7 @@ import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..) import Share.BackgroundJobs.Workers (newWorker) import Share.Codebase (CodebaseM) import Share.Codebase qualified as Codebase -import Share.IDs (ProjectShortHand (ProjectShortHand), ReleaseId, ReleaseVersion) +import Share.IDs (ProjectId, ProjectShortHand (ProjectShortHand), ReleaseId, ReleaseVersion) import Share.Postgres (QueryM (transactionUnsafeIO)) import Share.Postgres qualified as PG import Share.Postgres.Cursors qualified as Cursors @@ -26,6 +26,7 @@ import Share.Postgres.IDs (BranchHashId, ComponentHash) import Share.Postgres.NameLookups.Ops qualified as NLOps import Share.Postgres.NameLookups.Types qualified as NL import Share.Postgres.Queries qualified as PG +import Share.Postgres.Search.DefinitionSync qualified as DDQ import Share.Postgres.Search.DefinitionSync qualified as DefnSyncQ import Share.Prelude import Share.Project (Project (..), ProjectVisibility (..)) @@ -80,7 +81,7 @@ syncRelease :: ReleaseId -> PG.Transaction e () syncRelease authZReceipt releaseId = fmap (fromMaybe ()) . runMaybeT $ do - Release {projectId, squashedCausal, version = releaseVersion} <- lift $ PG.expectReleaseById releaseId + Release {projectId, releaseId, squashedCausal, version = releaseVersion} <- lift $ PG.expectReleaseById releaseId Project {slug, ownerUserId, visibility = projectVis} <- lift $ PG.expectProjectById projectId User {handle, visibility = userVis} <- PG.expectUserByUserId ownerUserId -- Don't sync private projects @@ -96,21 +97,21 @@ syncRelease authZReceipt releaseId = fmap (fromMaybe ()) . runMaybeT $ do Codebase.codebaseMToTransaction codebase $ do termsCursor <- lift $ NLOps.termsWithinNamespace nlReceipt bhId let projectShortHand = ProjectShortHand handle slug - syncTerms namesPerspective bhId projectShortHand releaseVersion termsCursor + syncTerms namesPerspective bhId projectId releaseId termsCursor typesCursor <- lift $ NLOps.typesWithinNamespace nlReceipt bhId syncTypes projectShortHand releaseVersion typesCursor syncTerms :: NL.NamesPerspective -> BranchHashId -> - ProjectShortHand -> - ReleaseVersion -> + ProjectId -> + ReleaseId -> Cursors.PGCursor (Name, Referent) -> CodebaseM e [DefnIndexingFailure] -syncTerms namesPerspective bhId projectShortHand releaseVersion termsCursor = +syncTerms namesPerspective bhId projectId releaseId termsCursor = Cursors.foldBatched termsCursor defnBatchSize \terms -> do - docs <- - terms & foldMapM \(fqn, ref) -> fmap (either (pure @[]) (const [])) . runExceptT $ do + (errs, docs) <- + terms & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing let sh = Referent.toShortHash ref @@ -124,15 +125,16 @@ syncTerms namesPerspective bhId projectShortHand releaseVersion termsCursor = pure $ Name.lastSegment . HQ'.toName <$> fqn let dd = DefinitionDocument - { projectShortHand, - releaseVersion, + { project = projectId, + release = releaseId, fqn, hash = sh, tokens = namedTokens, - payload = ToTTermSummary termSummary + metadata = ToTTermSummary termSummary } pure dd - PG.insertDefinitionDocuments docs + lift $ DDQ.insertDefinitionDocuments docs + pure errs -- | Compute the search tokens for a term given its name, hash, and type signature tokensForTerm :: Name -> Referent -> Type.Type v a -> Summary.TermSummary -> Set (DefnSearchToken TypeReference) diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index e7cddca..56deaf3 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -13,18 +13,12 @@ where import Data.Aeson import Data.Monoid (Sum (..)) import Data.Text qualified as Text -import Share.IDs (PrefixedHash (..), ProjectShortHand, ReleaseVersion) -import Share.IDs qualified as IDs import Share.Prelude -import U.Codebase.HashTags (ComponentHash) -import Unison.Hash qualified as Hash import Unison.Name (Name) import Unison.NameSegment (NameSegment) import Unison.Server.Share.DefinitionSummary (TermSummary (..), TypeSummary (..)) import Unison.Server.Types (TermTag, TypeTag) import Unison.ShortHash (ShortHash) -import Unison.ShortHash qualified as SH -import Unison.Syntax.Name qualified as Name data TermOrTypeSummary = ToTTermSummary TermSummary | ToTTypeSummary TypeSummary deriving (Show) @@ -70,6 +64,7 @@ newtype Occurrence = Occurrence Int newtype VarId = VarId Int deriving newtype (Show, Read, Eq, Ord, Num, ToJSON) +-- | Represents the possible ways we can search the global definitions index. data DefnSearchToken r = -- Allows searching by literal name NameToken Name @@ -119,38 +114,18 @@ data DefnSearchToken r -- Nothing -> Nothing -- _ -> Nothing -data DefinitionDocument = DefinitionDocument - { projectShortHand :: ProjectShortHand, - releaseVersion :: ReleaseVersion, +data DefinitionDocument proj release = DefinitionDocument + { project :: proj, + release :: release, fqn :: Name, hash :: ShortHash, -- For now we only index types by their final name segment, may need to revisit this -- in the future. tokens :: Set (DefnSearchToken NameSegment), - payload :: TermOrTypeSummary + metadata :: TermOrTypeSummary } deriving (Show) --- | Formats a DefinitionDocument into a documentName --- --- >>> projectShortHand = IDs.ProjectShortHand "unison" "base" --- >>> releaseVersion = IDs.ReleaseVersion 1 2 3 --- >>> fqn = Name.unsafeFromText "data.List.map" --- >>> hash = ShortHash "abcdef" --- >>> formatDocName DefinitionDocument {projectShortHand, releaseVersion, fqn, hash, tokens = mempty, payload = undefined} -formatDocName :: DefinitionDocument -> Text -formatDocName DefinitionDocument {projectShortHand, fqn, hash} = - Text.unwords [IDs.toText projectShortHand, Name.toText fqn, SH.toText hash] - -instance ToJSON DefinitionDocument where - toJSON dd@DefinitionDocument {releaseVersion, tokens, payload} = - object - [ "documentName" .= formatDocName dd, - "releaseVersion" .= IDs.toText releaseVersion, - "tokens" .= tokens, - "metadata" .= payload - ] - data SearchDefinition = SearchDefinition { fqn :: Name, hash :: ShortHash diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs index 1e0ddbb..ce7121c 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -3,12 +3,23 @@ module Share.Postgres.Search.DefinitionSync ( submitReleaseToBeSynced, claimUnsyncedRelease, + insertDefinitionDocuments, ) where +import Data.Aeson qualified as Aeson +import Data.Set qualified as Set +import Data.Text qualified as Text +import Hasql.Interpolate qualified as Hasql import Share.BackgroundJobs.Search.DefinitionSync.Types -import Share.IDs (ReleaseId) +import Share.IDs (ProjectId, ReleaseId) import Share.Postgres +import Share.Postgres qualified as PG +import Share.Prelude +import Unison.NameSegment (NameSegment) +import Unison.Server.Types (TermTag (..), TypeTag (..)) +import Unison.Syntax.Name qualified as Name +import Unison.Syntax.NameSegment qualified as NameSegment submitReleaseToBeSynced :: ReleaseId -> Transaction e () submitReleaseToBeSynced releaseId = do @@ -38,6 +49,85 @@ claimUnsyncedRelease = do |] -- | Save definition documents to be indexed for search. -insertDefinitionDocuments :: [DefinitionDocument] -> Transaction e () +insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId] -> Transaction e () insertDefinitionDocuments docs = do - _ + let docsTable = docRow <$> docs + execute_ $ + [sql| + INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, metadata) + SELECT * FROM ^{PG.toTable docsTable} + ON CONFLICT DO NOTHING + |] + where + docRow :: DefinitionDocument ProjectId ReleaseId -> (ProjectId, ReleaseId, Text, [Text], Hasql.Jsonb) + docRow DefinitionDocument {project, release, fqn, tokens, metadata} = + ( project, + release, + Name.toText fqn, + searchTokensToTSVectorArray $ Set.toList tokens, + Hasql.Jsonb $ Aeson.toJSON metadata + ) + +-- | Prepare search tokens in a standard format for indexing. +searchTokensToTSVectorArray :: [DefnSearchToken NameSegment] -> [Text] +searchTokensToTSVectorArray tokens = do + searchTokensToTSVector <$> tokens + +-- | Convert a search token to a TSVector. +-- +-- >>> import Unison.Syntax.Name qualified as Name +-- >>> searchTokensToTSVector (NameToken (Name.unsafeParseText "my.cool.name")) +-- "n:my.cool.name" +-- +-- >>> searchTokensToTSVector (TypeMentionToken (NameSegment.unsafeParseText "Nat") (Occurrence 1)) +-- "m:Nat:1" +-- +-- >>> searchTokensToTSVector (TypeVarToken (VarId 1) (Occurrence 1)) +-- "v:1:1" +-- +-- >>> searchTokensToTSVector (TermTagToken Doc) +-- "t:doc" +-- +-- >>> searchTokensToTSVector (TermTagToken (Constructor Data)) +-- "t:data-con" +-- +-- >>> searchTokensToTSVector (TypeTagToken Data) +-- "t:data" +searchTokensToTSVector :: DefnSearchToken NameSegment -> Text +searchTokensToTSVector = \case + NameToken name -> makeSearchToken nameType (Name.toText name) Nothing + TypeMentionToken t occ -> makeSearchToken typeMentionType (NameSegment.toEscapedText t) (Just occ) + TypeVarToken varId occ -> makeSearchToken typeVarType (varIdText varId) (Just occ) + HashToken sh -> makeSearchToken hashType (into @Text sh) Nothing + TermTagToken termTag -> makeSearchToken tagType (termTagText termTag) Nothing + TypeTagToken typTag -> makeSearchToken tagType (typeTagText typTag) Nothing + where + varIdText :: VarId -> Text + varIdText (VarId n) = tShow n + termTagText :: TermTag -> Text + termTagText = \case + Doc -> "doc" + Test -> "test" + Plain -> "plain" + Constructor typeTag -> typeTagText typeTag <> "-con" + typeTagText :: TypeTag -> Text + typeTagText = \case + Data -> "data" + Ability -> "ability" + nameType :: Text + nameType = "n" + typeMentionType :: Text + typeMentionType = "m" + typeVarType :: Text + typeVarType = "v" + hashType :: Text + hashType = "h" + tagType :: Text + tagType = "t" + makeSearchToken :: Text -> Text -> Maybe Occurrence -> Text + makeSearchToken kind txt occ = do + Text.intercalate ":" $ + [kind, Text.replace ":" "" txt] + <> case occ of + Just (Occurrence n) -> [tShow n] + Nothing -> [] From 669f21c7a91a02620ef2767ca29bfc2548b320fc Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 18 Jul 2024 10:38:54 -0700 Subject: [PATCH 15/48] WIP --- .../BackgroundJobs/Search/DefinitionSync.hs | 153 ++++++++++++++---- .../Search/DefinitionSync/Types.hs | 19 +-- src/Share/Postgres/Search/DefinitionSync.hs | 74 +++++---- 3 files changed, 171 insertions(+), 75 deletions(-) diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index bb41b5f..e6b5233 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -4,8 +4,8 @@ module Share.BackgroundJobs.Search.DefinitionSync (worker) where import Control.Lens import Control.Monad.Except -import Data.Either (fromLeft) import Data.Generics.Product (HasField (..)) +import Data.List qualified as List import Data.Map qualified as Map import Data.Map.Monoidal.Strict (MonoidalMap) import Data.Map.Monoidal.Strict qualified as MonMap @@ -13,16 +13,15 @@ import Data.Set qualified as Set import Data.Set.Lens (setOf) import Ki.Unlifted qualified as Ki import Share.BackgroundJobs.Monad (Background) -import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence (Occurrence), TermOrTypeSummary (..), VarId) +import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence, TermOrTypeSummary (..), VarId (VarId)) import Share.BackgroundJobs.Workers (newWorker) import Share.Codebase (CodebaseM) import Share.Codebase qualified as Codebase -import Share.IDs (ProjectId, ProjectShortHand (ProjectShortHand), ReleaseId, ReleaseVersion) -import Share.Postgres (QueryM (transactionUnsafeIO)) +import Share.IDs (ProjectId, ReleaseId) import Share.Postgres qualified as PG import Share.Postgres.Cursors qualified as Cursors import Share.Postgres.Hashes.Queries qualified as HashQ -import Share.Postgres.IDs (BranchHashId, ComponentHash) +import Share.Postgres.IDs (BranchHashId) import Share.Postgres.NameLookups.Ops qualified as NLOps import Share.Postgres.NameLookups.Types qualified as NL import Share.Postgres.Queries qualified as PG @@ -37,27 +36,30 @@ import Share.Web.Authorization qualified as AuthZ import U.Codebase.Referent (Referent) import U.Codebase.Referent qualified as Referent import Unison.ABT qualified as ABT +import Unison.ConstructorType qualified as CT +import Unison.DataDeclaration qualified as DD import Unison.HashQualifiedPrime qualified as HQ' import Unison.LabeledDependency qualified as LD import Unison.Name (Name) import Unison.Name qualified as Name -import Unison.Parser.Ann (Ann) +import Unison.NameSegment (NameSegment) import Unison.PrettyPrintEnv qualified as PPE import Unison.PrettyPrintEnvDecl qualified as PPED import Unison.PrettyPrintEnvDecl.Postgres qualified as PPEPostgres import Unison.Reference (TypeReference) +import Unison.Reference qualified as Reference import Unison.Server.Share.DefinitionSummary qualified as Summary +import Unison.Server.Types qualified as Server.Types import Unison.ShortHash (ShortHash) -import Unison.Symbol (Symbol) import Unison.Type qualified as Type import Unison.Util.Monoid qualified as Monoid import Unison.Util.Set qualified as Set import Unison.Var qualified as Var -import UnliftIO qualified import UnliftIO.Concurrent qualified as UnliftIO data DefnIndexingFailure = NoTypeSigForTerm Name Referent + | NoDeclForType Name TypeReference -- | How often to poll for new releases to sync in seconds. pollingIntervalSeconds :: Int @@ -74,16 +76,16 @@ worker scope = newWorker scope "search:defn-sync" $ forever do authZReceipt <- AuthZ.backgroundJobAuthZ PG.runTransaction $ do mayReleaseId <- DefnSyncQ.claimUnsyncedRelease - for_ mayReleaseId (syncRelease authZReceipt syncDD) + for_ mayReleaseId (syncRelease authZReceipt) syncRelease :: AuthZ.AuthZReceipt -> ReleaseId -> - PG.Transaction e () -syncRelease authZReceipt releaseId = fmap (fromMaybe ()) . runMaybeT $ do - Release {projectId, releaseId, squashedCausal, version = releaseVersion} <- lift $ PG.expectReleaseById releaseId - Project {slug, ownerUserId, visibility = projectVis} <- lift $ PG.expectProjectById projectId - User {handle, visibility = userVis} <- PG.expectUserByUserId ownerUserId + PG.Transaction e [DefnIndexingFailure] +syncRelease authZReceipt releaseId = fmap (fromMaybe []) . runMaybeT $ do + Release {projectId, releaseId, squashedCausal} <- lift $ PG.expectReleaseById releaseId + Project {ownerUserId, visibility = projectVis} <- lift $ PG.expectProjectById projectId + User {visibility = userVis} <- PG.expectUserByUserId ownerUserId -- Don't sync private projects guard $ projectVis == ProjectPublic -- Don't sync private users @@ -96,10 +98,10 @@ syncRelease authZReceipt releaseId = fmap (fromMaybe ()) . runMaybeT $ do let codebase = Codebase.codebaseEnv authZReceipt codebaseLoc Codebase.codebaseMToTransaction codebase $ do termsCursor <- lift $ NLOps.termsWithinNamespace nlReceipt bhId - let projectShortHand = ProjectShortHand handle slug - syncTerms namesPerspective bhId projectId releaseId termsCursor + termErrs <- syncTerms namesPerspective bhId projectId releaseId termsCursor typesCursor <- lift $ NLOps.typesWithinNamespace nlReceipt bhId - syncTypes projectShortHand releaseVersion typesCursor + typeErrs <- syncTypes namesPerspective projectId releaseId typesCursor + pure (termErrs <> typeErrs) syncTerms :: NL.NamesPerspective -> @@ -108,36 +110,41 @@ syncTerms :: ReleaseId -> Cursors.PGCursor (Name, Referent) -> CodebaseM e [DefnIndexingFailure] -syncTerms namesPerspective bhId projectId releaseId termsCursor = +syncTerms namesPerspective bhId projectId releaseId termsCursor = do Cursors.foldBatched termsCursor defnBatchSize \terms -> do - (errs, docs) <- + (errs, refDocs) <- terms & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing let sh = Referent.toShortHash ref let refTokens = tokensForTerm fqn ref typ termSummary - let deps = setOf (folded . folded . to LD.TypeReference) refTokens - pped <- PPEPostgres.ppedForReferences namesPerspective deps - let ppe = PPED.unsuffixifiedPPE pped - let namedTokens = - refTokens & Set.mapMaybe \token -> do - fqn <- traverse (PPE.types ppe) token - pure $ Name.lastSegment . HQ'.toName <$> fqn let dd = DefinitionDocument { project = projectId, release = releaseId, fqn, hash = sh, - tokens = namedTokens, + tokens = refTokens, metadata = ToTTermSummary termSummary } pure dd - lift $ DDQ.insertDefinitionDocuments docs + + -- It's much more efficient to build only one PPE per batch. + let allDeps = setOf (folded . folding tokens . folded . to LD.TypeReference) refDocs + pped <- PPEPostgres.ppedForReferences namesPerspective allDeps + let ppe = PPED.unsuffixifiedPPE pped + let namedDocs :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] + namedDocs = + refDocs + & traversed . field @"tokens" %~ Set.mapMaybe \token -> do + for token \ref -> do + name <- PPE.types ppe ref + pure $ (Name.lastSegment . HQ'.toName $ name, Reference.toShortHash ref) + lift $ DDQ.insertDefinitionDocuments namedDocs pure errs -- | Compute the search tokens for a term given its name, hash, and type signature -tokensForTerm :: Name -> Referent -> Type.Type v a -> Summary.TermSummary -> Set (DefnSearchToken TypeReference) +tokensForTerm :: (Var.Var v) => Name -> Referent -> Type.Type v a -> Summary.TermSummary -> Set (DefnSearchToken TypeReference) tokensForTerm name ref typ (Summary.TermSummary {tag}) = do let sh = Referent.toShortHash ref baseTokens = Set.fromList [NameToken name, HashToken sh] @@ -160,10 +167,33 @@ type TokenGenM v = ReaderT (TokenGenEnv v) (State (TokenGenState v)) typeSigTokens :: forall v ann. (Var.Var v) => Type.Type v ann -> Set (DefnSearchToken TypeReference) typeSigTokens typ = let occMap :: MonoidalMap (Either VarId TypeReference) Occurrence - occMap = flip evalState initState . runReaderT mempty $ ABT.cata alg typ - in MonMap.toList occMap & foldMap \case - (Left vId, occ) -> Set.singleton (TypeVarToken vId occ) - (Right typeRef, occ) -> Set.singleton (TypeMentionToken typeRef occ) + occMap = flip evalState initState . flip runReaderT (TokenGenEnv mempty) $ ABT.cata alg typ + (varIds, typeRefs) = + MonMap.toList occMap & foldMap \case + (Left vId, occ) -> ([(vId, occ)], []) + (Right typeRef, occ) -> ([], [(typeRef, occ)]) + expandedVarTokens = + varIds + -- Rewrite varIds normalized by number of occurrences, + -- this is necessary to ensure that order of type variables + -- in a type signature don't actually matter. + -- + -- E.g. BOTH of (a -> b -> a) and (b -> a -> a) turn into [(VarId 1, 1), (VarId 2, 2)] + & List.sortOn snd + -- Expand a token for each occurrence of a variable, this way + -- a -> a -> a still matches the type a -> a, since the user may not have typed the + -- whole thing. + & imap (\i (_vId, occ) -> (VarId i, occ)) + -- Expand a token for each occurrence of a variable, this way + -- 'Text' still matches the type 'Text -> Text' + & foldMap (\(vId, occ) -> (TypeVarToken vId) <$> [1 .. occ]) + & Set.fromList + expandedTypeRefTokens = + typeRefs + & foldMap \(typeRef, occ) -> + TypeMentionToken typeRef <$> [1 .. occ] + & Set.fromList + in expandedVarTokens <> expandedTypeRefTokens where initState = TokenGenState 0 -- Cata algebra for collecting type reference tokens from a type signature. @@ -209,4 +239,59 @@ typeSigTokens typ = Nothing -> do error "typeRefTokens: Found variable without corresponding Abs in type signature" -syncTypes = undefined +syncTypes :: + NL.NamesPerspective -> + ProjectId -> + ReleaseId -> + Cursors.PGCursor (Name, TypeReference) -> + CodebaseM e [DefnIndexingFailure] +syncTypes namesPerspective projectId releaseId typesCursor = do + Cursors.foldBatched typesCursor defnBatchSize \types -> do + (errs, refDocs) <- + types & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do + declTokens <- case ref of + Reference.Builtin _ -> pure mempty + Reference.DerivedId refId -> do + decl <- lift (Codebase.loadTypeDeclaration refId) `whenNothingM` throwError (NoDeclForType fqn ref) + pure $ tokensForDecl refId decl + let basicTokens = Set.fromList [NameToken fqn, HashToken $ Reference.toShortHash ref] + + typeSummary <- lift $ Summary.typeSummaryForReference ref (Just fqn) Nothing + let sh = Reference.toShortHash ref + let dd = + DefinitionDocument + { project = projectId, + release = releaseId, + fqn, + hash = sh, + tokens = declTokens <> basicTokens, + metadata = ToTTypeSummary typeSummary + } + pure dd + -- It's much more efficient to build only one PPE per batch. + let allDeps = setOf (folded . folding tokens . folded . to LD.TypeReference) refDocs + pped <- PPEPostgres.ppedForReferences namesPerspective allDeps + let ppe = PPED.unsuffixifiedPPE pped + let namedDocs :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] + namedDocs = + refDocs + & traversed . field @"tokens" %~ Set.mapMaybe \token -> do + for token \ref -> do + name <- PPE.types ppe ref + pure $ (Name.lastSegment . HQ'.toName $ name, Reference.toShortHash ref) + lift $ DDQ.insertDefinitionDocuments namedDocs + pure errs + +tokensForDecl :: Reference.TypeReferenceId -> DD.Decl v a -> Set (DefnSearchToken TypeReference) +tokensForDecl _typeRefId decl = do + let ddecl = DD.asDataDecl decl + tagToken = case DD.constructorType decl of + CT.Data -> TypeTagToken Server.Types.Data + CT.Effect -> TypeTagToken Server.Types.Ability + modToken = TypeModToken $ DD.modifier ddecl + in -- Include the constructors as Name Tokens + -- constructorTokens = + -- DD.declConstructorReferents typeRefId decl + -- & (fmap . fmap) Reference.DerivedId + -- <&> \constructorReferent -> ConstructorToken constructorReferent + Set.fromList $ [tagToken, modToken] diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index 56deaf3..537c0df 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -14,8 +14,8 @@ import Data.Aeson import Data.Monoid (Sum (..)) import Data.Text qualified as Text import Share.Prelude +import Unison.DataDeclaration qualified as DD import Unison.Name (Name) -import Unison.NameSegment (NameSegment) import Unison.Server.Share.DefinitionSummary (TermSummary (..), TypeSummary (..)) import Unison.Server.Types (TermTag, TypeTag) import Unison.ShortHash (ShortHash) @@ -54,7 +54,7 @@ instance FromJSON TermOrTypeSummary where -- occur 3 times, and the set would be: -- {NameMention "Text" (Occurrence 1), NameMention "Text" (Occurrence 2), NameMention "Text" (Occurrence 3)} newtype Occurrence = Occurrence Int - deriving newtype (Show, Read, Eq, Ord, Num, ToJSON) + deriving newtype (Show, Read, Eq, Ord, Num, ToJSON, Enum) deriving (Semigroup) via Sum Int deriving (Monoid) via Sum Int @@ -62,14 +62,14 @@ newtype Occurrence = Occurrence Int -- E.g. 'map : (a -> b) -> List a -> List b' would have two type var Ids, one for a, one -- for b, and would have occurrences 1 and 2 for each respectively. newtype VarId = VarId Int - deriving newtype (Show, Read, Eq, Ord, Num, ToJSON) + deriving newtype (Show, Read, Eq, Ord, Num, ToJSON, Enum) -- | Represents the possible ways we can search the global definitions index. -data DefnSearchToken r +data DefnSearchToken typeRef = -- Allows searching by literal name NameToken Name | -- A mention of some external type or ability - TypeMentionToken r Occurrence + TypeMentionToken typeRef Occurrence | -- Allows searching for type sigs with type variables TypeVarToken VarId Occurrence | -- Allows searching by component hash @@ -78,7 +78,8 @@ data DefnSearchToken r HashToken ShortHash | TermTagToken TermTag | TypeTagToken TypeTag - deriving (Show, Eq, Ord) + | TypeModToken DD.Modifier + deriving stock (Show, Eq, Ord, Functor, Foldable, Traversable) -- | Converts a DefnSearchToken to a prefix-searchable text string. -- @@ -114,17 +115,17 @@ data DefnSearchToken r -- Nothing -> Nothing -- _ -> Nothing -data DefinitionDocument proj release = DefinitionDocument +data DefinitionDocument proj release name typeRef = DefinitionDocument { project :: proj, release :: release, fqn :: Name, hash :: ShortHash, -- For now we only index types by their final name segment, may need to revisit this -- in the future. - tokens :: Set (DefnSearchToken NameSegment), + tokens :: Set (DefnSearchToken typeRef), metadata :: TermOrTypeSummary } - deriving (Show) + deriving (Show, Generic) data SearchDefinition = SearchDefinition { fqn :: Name, diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs index ce7121c..11670e4 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -16,8 +16,11 @@ import Share.IDs (ProjectId, ReleaseId) import Share.Postgres import Share.Postgres qualified as PG import Share.Prelude +import Unison.DataDeclaration qualified as DD +import Unison.Name (Name) import Unison.NameSegment (NameSegment) import Unison.Server.Types (TermTag (..), TypeTag (..)) +import Unison.ShortHash (ShortHash) import Unison.Syntax.Name qualified as Name import Unison.Syntax.NameSegment qualified as NameSegment @@ -49,7 +52,7 @@ claimUnsyncedRelease = do |] -- | Save definition documents to be indexed for search. -insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId] -> Transaction e () +insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] -> Transaction e () insertDefinitionDocuments docs = do let docsTable = docRow <$> docs execute_ $ @@ -59,49 +62,52 @@ insertDefinitionDocuments docs = do ON CONFLICT DO NOTHING |] where - docRow :: DefinitionDocument ProjectId ReleaseId -> (ProjectId, ReleaseId, Text, [Text], Hasql.Jsonb) + docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Hasql.Jsonb) docRow DefinitionDocument {project, release, fqn, tokens, metadata} = ( project, release, Name.toText fqn, - searchTokensToTSVectorArray $ Set.toList tokens, + foldMap searchTokenToText $ Set.toList tokens, Hasql.Jsonb $ Aeson.toJSON metadata ) --- | Prepare search tokens in a standard format for indexing. -searchTokensToTSVectorArray :: [DefnSearchToken NameSegment] -> [Text] -searchTokensToTSVectorArray tokens = do - searchTokensToTSVector <$> tokens - -- | Convert a search token to a TSVector. -- -- >>> import Unison.Syntax.Name qualified as Name --- >>> searchTokensToTSVector (NameToken (Name.unsafeParseText "my.cool.name")) --- "n:my.cool.name" --- --- >>> searchTokensToTSVector (TypeMentionToken (NameSegment.unsafeParseText "Nat") (Occurrence 1)) --- "m:Nat:1" --- --- >>> searchTokensToTSVector (TypeVarToken (VarId 1) (Occurrence 1)) --- "v:1:1" +-- >>> searchTokenToText (NameToken (Name.unsafeParseText "my.cool.name")) +-- ["n:my.cool.name"] -- --- >>> searchTokensToTSVector (TermTagToken Doc) --- "t:doc" +-- >>> import Unison.ShortHash qualified as SH +-- >>> import Data.Maybe (fromJust) +-- >>> searchTokenToText (TypeMentionToken (NameSegment.unsafeParseText "Thing", fromJust $ SH.fromText "#2tWjVAuc7") (Occurrence 1)) +-- ["mn:Thing:1","mh:#2tWjVAuc7:1"] -- --- >>> searchTokensToTSVector (TermTagToken (Constructor Data)) --- "t:data-con" +-- >>> searchTokenToText (TypeVarToken (VarId 1) (Occurrence 1)) +-- ["v:1:1"] -- --- >>> searchTokensToTSVector (TypeTagToken Data) --- "t:data" -searchTokensToTSVector :: DefnSearchToken NameSegment -> Text -searchTokensToTSVector = \case - NameToken name -> makeSearchToken nameType (Name.toText name) Nothing - TypeMentionToken t occ -> makeSearchToken typeMentionType (NameSegment.toEscapedText t) (Just occ) - TypeVarToken varId occ -> makeSearchToken typeVarType (varIdText varId) (Just occ) - HashToken sh -> makeSearchToken hashType (into @Text sh) Nothing - TermTagToken termTag -> makeSearchToken tagType (termTagText termTag) Nothing - TypeTagToken typTag -> makeSearchToken tagType (typeTagText typTag) Nothing +-- >>> searchTokenToText (TermTagToken Doc) +-- ["t:doc"] +-- >>> searchTokenToText (TermTagToken (Constructor Data)) +-- WAS WAS WAS "t:data-con" +-- ["t:data-con"] +-- >>> searchTokenToText (TypeTagToken Data) +-- ["t:data"] +searchTokenToText :: DefnSearchToken (NameSegment, ShortHash) -> [Text] +searchTokenToText = \case + NameToken name -> [makeSearchToken nameType (Name.toText name) Nothing] + TypeMentionToken (ns, typeRef) occ -> + [ makeSearchToken typeMentionTypeByNameType (NameSegment.toEscapedText ns) (Just occ), + makeSearchToken typeMentionTypeByHashType (into @Text @ShortHash typeRef) (Just occ) + ] + TypeVarToken varId occ -> [makeSearchToken typeVarType (varIdText varId) (Just occ)] + HashToken sh -> [makeSearchToken hashType (into @Text sh) Nothing] + TermTagToken termTag -> [makeSearchToken tagType (termTagText termTag) Nothing] + TypeTagToken typTag -> [makeSearchToken tagType (typeTagText typTag) Nothing] + TypeModToken mod -> [makeSearchToken typeModType (typeModText mod) Nothing] where + typeModText = \case + DD.Structural -> "structural" + DD.Unique {} -> "unique" varIdText :: VarId -> Text varIdText (VarId n) = tShow n termTagText :: TermTag -> Text @@ -116,14 +122,18 @@ searchTokensToTSVector = \case Ability -> "ability" nameType :: Text nameType = "n" - typeMentionType :: Text - typeMentionType = "m" + typeMentionTypeByNameType :: Text + typeMentionTypeByNameType = "mn" + typeMentionTypeByHashType :: Text + typeMentionTypeByHashType = "mh" typeVarType :: Text typeVarType = "v" hashType :: Text hashType = "h" tagType :: Text tagType = "t" + typeModType :: Text + typeModType = "mod" makeSearchToken :: Text -> Text -> Maybe Occurrence -> Text makeSearchToken kind txt occ = do Text.intercalate ":" $ From b44c224c616c92bb0d2f09cae42abac093664681 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 18 Jul 2024 10:42:54 -0700 Subject: [PATCH 16/48] Fix up errors from new Unison --- src/Share/Web/Share/Impl.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Share/Web/Share/Impl.hs b/src/Share/Web/Share/Impl.hs index 0648348..8353cef 100644 --- a/src/Share/Web/Share/Impl.hs +++ b/src/Share/Web/Share/Impl.hs @@ -6,6 +6,7 @@ module Share.Web.Share.Impl where +import Servant import Share.Codebase qualified as Codebase import Share.Codebase.Types qualified as Codebase import Share.IDs (TourId, UserHandle) @@ -35,7 +36,6 @@ import Share.Web.Share.CodeBrowsing.API (CodeBrowseAPI) import Share.Web.Share.Contributions.Impl qualified as Contributions import Share.Web.Share.Projects.Impl qualified as Projects import Share.Web.Share.Types -import Servant import Unison.Codebase.Path qualified as Path import Unison.HashQualified qualified as HQ import Unison.Name (Name) @@ -213,7 +213,7 @@ typeSummaryEndpoint (AuthN.MaybeAuthedUserID callerUserId) userHandle ref mayNam (rootCausalId, _rootCausalHash) <- Codebase.runCodebaseTransaction codebase Codebase.expectLooseCodeRoot Codebase.cachedCodebaseResponse authZReceipt codebaseLoc "type-summary" cacheParams rootCausalId $ do Codebase.runCodebaseTransaction codebase $ do - serveTypeSummary ref mayName rootCausalId relativeTo renderWidth + serveTypeSummary ref mayName renderWidth where cacheParams = [toUrlPiece ref, maybe "" Name.toText mayName, tShow $ fromMaybe Path.empty relativeTo, foldMap toUrlPiece renderWidth] authPath :: Path.Path From 9ea0424004d61fbe1935a82c9cd4d2538cece526 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 18 Jul 2024 14:23:11 -0700 Subject: [PATCH 17/48] Fix up Cursor tech --- sql/2024-07-17-00-00_cursors.sql | 19 ++++++++++++ ... => 2024-07-18-00-00_defn_search_sync.sql} | 4 +-- .../BackgroundJobs/Search/DefinitionSync.hs | 2 +- src/Share/Postgres/Cursors.hs | 30 ++++++++++++------- src/Share/Postgres/NameLookups/Queries.hs | 20 ++++++------- src/Share/Postgres/Search/DefinitionSync.hs | 8 ++--- 6 files changed, 55 insertions(+), 28 deletions(-) create mode 100644 sql/2024-07-17-00-00_cursors.sql rename sql/{2024-05-07-00-00_defn_search_sync.sql => 2024-07-18-00-00_defn_search_sync.sql} (90%) diff --git a/sql/2024-07-17-00-00_cursors.sql b/sql/2024-07-17-00-00_cursors.sql new file mode 100644 index 0000000..8880c27 --- /dev/null +++ b/sql/2024-07-17-00-00_cursors.sql @@ -0,0 +1,19 @@ + +CREATE OR REPLACE FUNCTION create_cursor(cursor_name text) +RETURNS refcursor AS $$ +DECLARE + segments text[]; + input_segments text[]; + current_segment text; +BEGIN + input_segments := string_to_array(trim(trailing '.' from fqn), '.'); + segments := ARRAY[]::text[]; + + FOREACH current_segment IN ARRAY input_segments + LOOP + segments := array_append(segments, array_to_string(input_segments[1:array_position(input_segments, current_segment)], '.') || '.'); + END LOOP; + + RETURN segments; +END; +$$ LANGUAGE plpgsql; diff --git a/sql/2024-05-07-00-00_defn_search_sync.sql b/sql/2024-07-18-00-00_defn_search_sync.sql similarity index 90% rename from sql/2024-05-07-00-00_defn_search_sync.sql rename to sql/2024-07-18-00-00_defn_search_sync.sql index b7f96a4..68dbd99 100644 --- a/sql/2024-05-07-00-00_defn_search_sync.sql +++ b/sql/2024-07-18-00-00_defn_search_sync.sql @@ -6,13 +6,13 @@ CREATE EXTENSION IF NOT EXISTS btree_gin; -- Table of all releases which have been published, but not yet synced to the global definition search index. CREATE TABLE global_definition_search_release_queue ( - release_id UUID PRIMARY KEY REFERENCES releases(id) ON DELETE CASCADE, + release_id UUID PRIMARY KEY REFERENCES project_releases(id) ON DELETE CASCADE, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE global_definition_search_docs ( project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE, - release_id UUID NOT NULL REFERENCES releases(id) ON DELETE CASCADE, + release_id UUID NOT NULL REFERENCES project_releases(id) ON DELETE CASCADE, -- Fully qualified name name TEXT NOT NULL, search_tokens TSVECTOR NOT NULL, diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index e6b5233..9e6fd37 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -71,12 +71,12 @@ defnBatchSize = 10 worker :: Ki.Scope -> Background () worker scope = newWorker scope "search:defn-sync" $ forever do - liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 Logging.logInfoText "Syncing definitions..." authZReceipt <- AuthZ.backgroundJobAuthZ PG.runTransaction $ do mayReleaseId <- DefnSyncQ.claimUnsyncedRelease for_ mayReleaseId (syncRelease authZReceipt) + liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 syncRelease :: AuthZ.AuthZReceipt -> diff --git a/src/Share/Postgres/Cursors.hs b/src/Share/Postgres/Cursors.hs index b6fc2c7..e96d8ac 100644 --- a/src/Share/Postgres/Cursors.hs +++ b/src/Share/Postgres/Cursors.hs @@ -10,7 +10,10 @@ module Share.Postgres.Cursors ) where +import Data.Char qualified as Char import Data.List.NonEmpty qualified as NEL +import Data.String (IsString (fromString)) +import Data.Text qualified as Text import Data.UUID (UUID) import Share.Postgres import Share.Prelude @@ -40,26 +43,31 @@ newColCursor namePrefix query = do -- This cursor will be closed when the transaction ends, and must not be used outside of the -- transaction in which it was created. newRowCursor :: forall r m. (QueryM m) => (DecodeRow r) => Text -> Sql -> m (PGCursor r) -newRowCursor namePrefix query = do - uuid <- transactionUnsafeIO $ randomIO @UUID - let cursorName = namePrefix <> "_" <> into @Text uuid - execute_ - [sql| - DECLARE #{uuid} +newRowCursor namePrefix query = + do + uuid <- transactionUnsafeIO $ randomIO @UUID + -- We can't use a parameter for cursor names, so we make sure to clean it to prevent any + -- possible sql errors/injections. + let cursorName = Text.filter (\c -> Char.isAlphaNum c || c == '_') (namePrefix <> "_" <> into @Text uuid) + let declaration = fromString $ "DECLARE " <> Text.unpack cursorName <> "\n" + execute_ $ + declaration + <> [sql| NO SCROLL CURSOR WITHOUT HOLD FOR ^{query} |] - pure $ PGCursor cursorName id + pure $ PGCursor cursorName id -- | Fetch UP TO the next N results from the cursor. If there are no more rows, returns Nothing. fetchN :: forall r m. (QueryM m) => PGCursor r -> Int32 -> m (Maybe (NonEmpty r)) fetchN (PGCursor cursorName f) n = do - rows <- - queryListRows - [sql| FETCH FORWARD #{n} FROM #{cursorName} - |] + -- PG doesn't allow bind params for limits or cursor names. + -- We're safe from injection here because `n` is just an int, and we guarantee the + -- cursorName is safe at construction time. + let sql = fromString . Text.unpack $ Text.intercalate " " ["FETCH FORWARD", tShow n, "FROM", cursorName] + rows <- queryListRows sql pure $ NEL.nonEmpty (f <$> rows) -- | Fold over the cursor in batches of N rows. diff --git a/src/Share/Postgres/NameLookups/Queries.hs b/src/Share/Postgres/NameLookups/Queries.hs index 643bcb8..685d05b 100644 --- a/src/Share/Postgres/NameLookups/Queries.hs +++ b/src/Share/Postgres/NameLookups/Queries.hs @@ -41,7 +41,7 @@ import Unison.Util.Monoid qualified as Monoid -- | Get the list of term names and suffixifications for a given Referent within a given namespace. -- Considers one level of dependencies, but not transitive dependencies. -termNamesForRefWithinNamespace :: PG.QueryM m => NameLookupReceipt -> BranchHashId -> PathSegments -> PGReferent -> Maybe ReversedName -> m [(ReversedName, ReversedName)] +termNamesForRefWithinNamespace :: (PG.QueryM m) => NameLookupReceipt -> BranchHashId -> PathSegments -> PGReferent -> Maybe ReversedName -> m [(ReversedName, ReversedName)] termNamesForRefWithinNamespace !_nameLookupReceipt bhId namespaceRoot ref maySuffix = do let namespacePrefix = toNamespacePrefix namespaceRoot let reversedNamePrefix = case maySuffix of @@ -123,7 +123,7 @@ termNamesForRefWithinNamespace !_nameLookupReceipt bhId namespaceRoot ref maySuf -- | Get the list of type names for a given Reference within a given namespace. -- Considers one level of dependencies, but not transitive dependencies. -typeNamesForRefWithinNamespace :: PG.QueryM m => NameLookupReceipt -> BranchHashId -> PathSegments -> PGReference -> Maybe ReversedName -> m [(ReversedName, ReversedName)] +typeNamesForRefWithinNamespace :: (PG.QueryM m) => NameLookupReceipt -> BranchHashId -> PathSegments -> PGReference -> Maybe ReversedName -> m [(ReversedName, ReversedName)] typeNamesForRefWithinNamespace !_nameLookupReceipt bhId namespaceRoot ref maySuffix = do let namespacePrefix = toNamespacePrefix namespaceRoot let reversedNamePrefix = case maySuffix of @@ -224,7 +224,7 @@ transitiveDependenciesSql rootBranchHashId = -- id. It's the caller's job to select the correct name lookup for your exact name. -- -- See termRefsForExactName in U.Codebase.Sqlite.Operations -termRefsForExactName :: PG.QueryM m => NameLookupReceipt -> BranchHashId -> ReversedName -> m [NamedRef (PGReferent, Maybe ConstructorType)] +termRefsForExactName :: (PG.QueryM m) => NameLookupReceipt -> BranchHashId -> ReversedName -> m [NamedRef (PGReferent, Maybe ConstructorType)] termRefsForExactName !_nameLookupReceipt bhId reversedName = do results :: [NamedRef (PGReferent PG.:. PG.Only (Maybe ConstructorType))] <- PG.queryListRows @@ -243,7 +243,7 @@ termRefsForExactName !_nameLookupReceipt bhId reversedName = do -- id. It's the caller's job to select the correct name lookup for your exact name. -- -- See termRefsForExactName in U.Codebase.Sqlite.Operations -typeRefsForExactName :: PG.QueryM m => NameLookupReceipt -> BranchHashId -> ReversedName -> m [NamedRef PGReference] +typeRefsForExactName :: (PG.QueryM m) => NameLookupReceipt -> BranchHashId -> ReversedName -> m [NamedRef PGReference] typeRefsForExactName !_nameLookupReceipt bhId reversedName = do PG.queryListRows [PG.sql| @@ -254,7 +254,7 @@ typeRefsForExactName !_nameLookupReceipt bhId reversedName = do |] -- | Check if we've already got an index for the desired root branch hash. -checkBranchHashNameLookupExists :: PG.QueryM m => BranchHashId -> m Bool +checkBranchHashNameLookupExists :: (PG.QueryM m) => BranchHashId -> m Bool checkBranchHashNameLookupExists hashId = do PG.queryExpect1Col [PG.sql| @@ -288,7 +288,7 @@ deleteNameLookupsExceptFor hashIds = do |] -- | Fetch the name lookup mounts for a given name lookup index. -listNameLookupMounts :: PG.QueryM m => NameLookupReceipt -> BranchHashId -> m [(PathSegments, BranchHashId)] +listNameLookupMounts :: (PG.QueryM m) => NameLookupReceipt -> BranchHashId -> m [(PathSegments, BranchHashId)] listNameLookupMounts !_nameLookupReceipt rootBranchHashId = do PG.queryListRows @@ -308,7 +308,7 @@ type FuzzySearchScore = (Bool, Bool, Int64, Int64) -- | Searches for all names within the given name lookup which contain the provided list of segments -- in order. -- Search is case insensitive. -fuzzySearchTerms :: PG.QueryM m => NameLookupReceipt -> Bool -> BranchHashId -> Int64 -> PathSegments -> NonEmpty Text -> m [(FuzzySearchScore, NamedRef (PGReferent, Maybe ConstructorType))] +fuzzySearchTerms :: (PG.QueryM m) => NameLookupReceipt -> Bool -> BranchHashId -> Int64 -> PathSegments -> NonEmpty Text -> m [(FuzzySearchScore, NamedRef (PGReferent, Maybe ConstructorType))] fuzzySearchTerms !_nameLookupReceipt includeDependencies bhId limit namespace querySegments = do fmap unRow <$> PG.queryListRows @@ -368,7 +368,7 @@ fuzzySearchTerms !_nameLookupReceipt includeDependencies bhId limit namespace qu -- in order. -- -- Search is case insensitive. -fuzzySearchTypes :: PG.QueryM m => NameLookupReceipt -> Bool -> BranchHashId -> Int64 -> PathSegments -> NonEmpty Text -> m [(FuzzySearchScore, NamedRef PGReference)] +fuzzySearchTypes :: (PG.QueryM m) => NameLookupReceipt -> Bool -> BranchHashId -> Int64 -> PathSegments -> NonEmpty Text -> m [(FuzzySearchScore, NamedRef PGReference)] fuzzySearchTypes !_nameLookupReceipt includeDependencies bhId limit namespace querySegments = do fmap unRow <$> PG.queryListRows @@ -470,7 +470,7 @@ termsWithinNamespace !_nlReceipt bhId = do [sql| SELECT reversed_name, referent_builtin, referent_component_hash.base32, referent_component_index, referent_constructor_index FROM scoped_term_name_lookup - JOIN component_hashes referent_component_hash ON component_hashes.id = referent_component_hash_id + JOIN component_hashes referent_component_hash ON referent_component_hash.id = referent_component_hash_id WHERE root_branch_hash_id = #{bhId} |] <&> fmap (\NamedRef {reversedSegments, ref} -> (reversedNameToName reversedSegments, ref)) @@ -482,7 +482,7 @@ typesWithinNamespace !_nlReceipt bhId = do [sql| SELECT reversed_name, reference_builtin, reference_component_hash.base32, reference_component_index FROM scoped_type_name_lookup - JOIN component_hashes reference_component_hash ON component_hashes.id = reference_component_hash_id + JOIN component_hashes reference_component_hash ON reference_component_hash.id = reference_component_hash_id WHERE root_branch_hash_id = #{bhId} |] <&> fmap (\NamedRef {reversedSegments, ref} -> (reversedNameToName reversedSegments, ref)) diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs index 11670e4..929a3e4 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -37,8 +37,8 @@ claimUnsyncedRelease :: Transaction e (Maybe ReleaseId) claimUnsyncedRelease = do query1Col [sql| - WITH chosen_release AS ( - SELECT q.id + WITH chosen_release(release_id) AS ( + SELECT q.release_id FROM global_definition_search_release_queue q ORDER BY q.created_at ASC LIMIT 1 @@ -47,8 +47,8 @@ claimUnsyncedRelease = do ) DELETE FROM global_definition_search_release_queue USING chosen_release - WHERE global_definition_search_release_queue.id = chosen_release.id - RETURNING chosen_release.id + WHERE global_definition_search_release_queue.release_id = chosen_release.release_id + RETURNING chosen_release.release_id |] -- | Save definition documents to be indexed for search. From de78bd0d19d5a62185c2d532ac53794d78da5b9b Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 18 Jul 2024 15:17:18 -0700 Subject: [PATCH 18/48] Working (but slow) index sync --- .../BackgroundJobs/Search/DefinitionSync.hs | 10 +++++++++- src/Share/Codebase.hs | 15 +++++++++------ src/Share/Postgres/Search/DefinitionSync.hs | 17 ++++++++++------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 9e6fd37..524cf86 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -38,6 +38,7 @@ import U.Codebase.Referent qualified as Referent import Unison.ABT qualified as ABT import Unison.ConstructorType qualified as CT import Unison.DataDeclaration qualified as DD +import Unison.Debug qualified as Debug import Unison.HashQualifiedPrime qualified as HQ' import Unison.LabeledDependency qualified as LD import Unison.Name (Name) @@ -67,7 +68,7 @@ pollingIntervalSeconds = 10 -- | How many definitions to hold in memory at a time while syncing defnBatchSize :: Int32 -defnBatchSize = 10 +defnBatchSize = 1000 worker :: Ki.Scope -> Background () worker scope = newWorker scope "search:defn-sync" $ forever do @@ -75,6 +76,7 @@ worker scope = newWorker scope "search:defn-sync" $ forever do authZReceipt <- AuthZ.backgroundJobAuthZ PG.runTransaction $ do mayReleaseId <- DefnSyncQ.claimUnsyncedRelease + Debug.debugM Debug.Temp "Syncing release" mayReleaseId for_ mayReleaseId (syncRelease authZReceipt) liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 @@ -90,16 +92,21 @@ syncRelease authZReceipt releaseId = fmap (fromMaybe []) . runMaybeT $ do guard $ projectVis == ProjectPublic -- Don't sync private users guard $ userVis == UserPublic + Debug.debugM Debug.Temp "Syncing release" releaseId lift $ do bhId <- HashQ.expectNamespaceIdsByCausalIdsOf id squashedCausal + Debug.debugM Debug.Temp "bhId" bhId namesPerspective <- NLOps.namesPerspectiveForRootAndPath bhId (NL.PathSegments []) let nlReceipt = NL.nameLookupReceipt namesPerspective let codebaseLoc = Codebase.codebaseLocationForProjectRelease ownerUserId let codebase = Codebase.codebaseEnv authZReceipt codebaseLoc Codebase.codebaseMToTransaction codebase $ do + Debug.debugM Debug.Temp "Building cursor" releaseId termsCursor <- lift $ NLOps.termsWithinNamespace nlReceipt bhId + Debug.debugM Debug.Temp "Syncing terms" releaseId termErrs <- syncTerms namesPerspective bhId projectId releaseId termsCursor typesCursor <- lift $ NLOps.typesWithinNamespace nlReceipt bhId + Debug.debugM Debug.Temp "Syncing types" releaseId typeErrs <- syncTypes namesPerspective projectId releaseId typesCursor pure (termErrs <> typeErrs) @@ -112,6 +119,7 @@ syncTerms :: CodebaseM e [DefnIndexingFailure] syncTerms namesPerspective bhId projectId releaseId termsCursor = do Cursors.foldBatched termsCursor defnBatchSize \terms -> do + Debug.debugM Debug.Temp "Fetched N more terms" (length terms) (errs, refDocs) <- terms & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) diff --git a/src/Share/Codebase.hs b/src/Share/Codebase.hs index 0573429..90bb085 100644 --- a/src/Share/Codebase.hs +++ b/src/Share/Codebase.hs @@ -74,6 +74,8 @@ import Data.ByteString.Lazy.Char8 qualified as BL import Data.Map qualified as Map import Data.Set qualified as Set import Data.Text.Encoding qualified as Text +import Servant qualified +import Servant.Server (err500) import Share.Branch (Branch (..)) import Share.Codebase.Types import Share.Codebase.Types qualified as Codebase @@ -99,8 +101,6 @@ import Share.Web.App import Share.Web.Authorization (AuthZReceipt) import Share.Web.Authorization qualified as AuthZ import Share.Web.Errors -import Servant qualified -import Servant.Server (err500) import U.Codebase.Branch qualified as V2 import U.Codebase.Causal qualified as Causal import U.Codebase.Decl qualified as V2 @@ -113,6 +113,7 @@ import Unison.Codebase.SqliteCodebase.Conversions qualified as Cv import Unison.ConstructorType qualified as CT import Unison.DataDeclaration qualified as DD import Unison.DataDeclaration qualified as V1 +import Unison.Debug qualified as Debug import Unison.Parser.Ann import Unison.Parser.Ann qualified as Ann import Unison.Prelude (askUnliftIO) @@ -278,10 +279,10 @@ expectTypeOfReferents :: Traversal s t V2.Referent (V1.Type Symbol Ann) -> s -> expectTypeOfReferents trav s = do s & trav %%~ expectTypeOfReferent -expectDeclKind :: PG.QueryM m => Reference.TypeReference -> m CT.ConstructorType +expectDeclKind :: (PG.QueryM m) => Reference.TypeReference -> m CT.ConstructorType expectDeclKind r = loadDeclKind r `whenNothingM` (unrecoverableError (InternalServerError "missing-decl-kind" $ "Couldn't find the decl kind of " <> tShow r)) -expectDeclKindsOf :: PG.QueryM m => Traversal s t Reference.TypeReference CT.ConstructorType -> s -> m t +expectDeclKindsOf :: (PG.QueryM m) => Traversal s t Reference.TypeReference CT.ConstructorType -> s -> m t expectDeclKindsOf trav s = do s & unsafePartsOf trav %%~ \refs -> do @@ -290,10 +291,10 @@ expectDeclKindsOf trav s = do (r, Nothing) -> unrecoverableError (InternalServerError "missing-decl-kind" $ "Couldn't find the decl kind of " <> tShow r) (_, Just ct) -> pure ct -loadDeclKind :: PG.QueryM m => V2.TypeReference -> m (Maybe CT.ConstructorType) +loadDeclKind :: (PG.QueryM m) => V2.TypeReference -> m (Maybe CT.ConstructorType) loadDeclKind = loadDeclKindsOf id -loadDeclKindsOf :: PG.QueryM m => Traversal s t Reference.TypeReference (Maybe CT.ConstructorType) -> s -> m t +loadDeclKindsOf :: (PG.QueryM m) => Traversal s t Reference.TypeReference (Maybe CT.ConstructorType) -> s -> m t loadDeclKindsOf trav s = s & unsafePartsOf trav %%~ \refs -> do @@ -448,6 +449,7 @@ squashCausalAndAddToCodebase causalId = runMaybeT $ do causalBranch <- MaybeT (CausalQ.loadCausalNamespace causalId) (squashedCausalId, _squashedCausal) <- lift $ squashCausal causalBranch squashedBranchHashId <- CausalQ.expectNamespaceIdsByCausalIdsOf id squashedCausalId + Debug.debugLogM Debug.Temp "Squashed, making name lookup" lift . lift $ NLOps.ensureNameLookupForBranchId squashedBranchHashId pure squashedCausalId @@ -455,6 +457,7 @@ squashCausalAndAddToCodebase causalId = runMaybeT $ do -- Causal node at every level. squashCausal :: V2.CausalBranch (CodebaseM e) -> CodebaseM e (CausalId, V2.CausalBranch (CodebaseM e)) squashCausal Causal.Causal {valueHash = unsquashedBranchHash, value} = do + Debug.debugM Debug.Temp "Squashing" unsquashedBranchHash mayCachedSquashResult <- runMaybeT $ do causalId <- MaybeT (CausalQ.tryGetCachedSquashResult unsquashedBranchHash) fmap (causalId,) . MaybeT $ CausalQ.loadCausalNamespace causalId diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs index 929a3e4..0356b56 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -14,7 +14,6 @@ import Hasql.Interpolate qualified as Hasql import Share.BackgroundJobs.Search.DefinitionSync.Types import Share.IDs (ProjectId, ReleaseId) import Share.Postgres -import Share.Postgres qualified as PG import Share.Prelude import Unison.DataDeclaration qualified as DD import Unison.Name (Name) @@ -55,12 +54,16 @@ claimUnsyncedRelease = do insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] -> Transaction e () insertDefinitionDocuments docs = do let docsTable = docRow <$> docs - execute_ $ - [sql| - INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, metadata) - SELECT * FROM ^{PG.toTable docsTable} - ON CONFLICT DO NOTHING - |] + for_ docsTable \(projectId, releaseId, fqn, tokens, metadata) -> do + -- Ideally we'd do a bulk insert, but Hasql doesn't provide any method for passing arrays of + -- arrays as parameters, so instead we insert each record individually so we can use our + -- only level of array-ness for the tokens. + execute_ $ + [sql| + INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, metadata) + VALUES (#{projectId}, #{releaseId}, #{fqn}, array_to_tsvector(#{tokens}), #{metadata}::jsonb) + ON CONFLICT DO NOTHING + |] where docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Hasql.Jsonb) docRow DefinitionDocument {project, release, fqn, tokens, metadata} = From 14e9ddc161174cdd53b7a710bb8ea54c3cb3e63e Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 19 Jul 2024 09:30:56 -0700 Subject: [PATCH 19/48] Add Debug module and Postgres transaction timing --- share-utils/package.yaml | 1 + share-utils/share-utils.cabal | 4 +- share-utils/src/Share/Debug.hs | 85 ++++++++++++++++++++++++++++++++++ src/Share/Postgres.hs | 60 +++++++++++++++--------- 4 files changed, 128 insertions(+), 22 deletions(-) create mode 100644 share-utils/src/Share/Debug.hs diff --git a/share-utils/package.yaml b/share-utils/package.yaml index debe83c..519184e 100644 --- a/share-utils/package.yaml +++ b/share-utils/package.yaml @@ -71,6 +71,7 @@ dependencies: - jose - memory - network-uri +- pretty-simple - random - servant-auth - servant-server diff --git a/share-utils/share-utils.cabal b/share-utils/share-utils.cabal index 7b89c0a..b064344 100644 --- a/share-utils/share-utils.cabal +++ b/share-utils/share-utils.cabal @@ -1,6 +1,6 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.35.2. +-- This file has been generated from package.yaml by hpack version 0.36.0. -- -- see: https://github.com/sol/hpack @@ -24,6 +24,7 @@ source-repository head library exposed-modules: + Share.Debug Share.Utils.Binary Share.Utils.Deployment Share.Utils.IDs @@ -83,6 +84,7 @@ library , lens , memory , network-uri + , pretty-simple , random , servant-auth , servant-server diff --git a/share-utils/src/Share/Debug.hs b/share-utils/src/Share/Debug.hs new file mode 100644 index 0000000..f237854 --- /dev/null +++ b/share-utils/src/Share/Debug.hs @@ -0,0 +1,85 @@ +{-# LANGUAGE OverloadedStrings #-} +-- pTrace is marked deprecated so you get warnings when you use it. +{-# OPTIONS_GHC -Wno-deprecations #-} + +module Share.Debug + ( debug, + debugM, + whenDebug, + debugLog, + debugLogM, + shouldDebug, + DebugFlag (..), + ) +where + +import Control.Monad +import Data.Set (Set) +import Data.Set qualified as Set +import Data.Text qualified as Text +import Debug.Pretty.Simple (pTrace, pTraceM) +import Debug.Trace +import System.IO.Unsafe (unsafePerformIO) +import Text.Pretty.Simple (pShow) +import UnliftIO.Environment (lookupEnv) +import Witch (into) + +data DebugFlag + = Timing + deriving (Eq, Ord, Show, Bounded, Enum) + +debugFlags :: Set DebugFlag +debugFlags = case (unsafePerformIO (lookupEnv "SHARE_DEBUG")) of + Nothing -> Set.empty + Just "" -> Set.fromList [minBound .. maxBound] + Just s -> Set.fromList $ do + w <- (Text.splitOn "," . Text.pack $ s) + case Text.toUpper . Text.strip $ w of + "TIMING" -> pure Timing + _ -> mempty +{-# NOINLINE debugFlags #-} + +debugTiming :: Bool +debugTiming = Timing `Set.member` debugFlags +{-# NOINLINE debugTiming #-} + +-- | Use for trace-style selective debugging. +-- E.g. 1 + (debug Sync "The second number" 2) +-- +-- Or, use in pattern matching to view arguments. +-- E.g. +-- myFunc (debug Sync "argA" -> argA) = ... +debug :: (Show a) => DebugFlag -> String -> a -> a +debug flag msg a = + if shouldDebug flag + then (trace (msg <> ":\n" <> into @String (pShow a)) a) + else a + +-- | Use for selective debug logging in monadic contexts. +-- E.g. +-- do +-- debugM Sync "source repo" srcRepo +-- ... +debugM :: (Show a, Monad m) => DebugFlag -> String -> a -> m () +debugM flag msg a = + whenDebug flag do + traceM (msg <> ":\n" <> into @String (pShow a)) + +debugLog :: DebugFlag -> String -> a -> a +debugLog flag msg = + if shouldDebug flag + then pTrace msg + else id + +debugLogM :: (Monad m) => DebugFlag -> String -> m () +debugLogM flag msg = + whenDebug flag $ pTraceM msg + +-- | A 'when' block which is triggered if the given flag is being debugged. +whenDebug :: (Monad m) => DebugFlag -> m () -> m () +whenDebug flag action = do + when (shouldDebug flag) action + +shouldDebug :: DebugFlag -> Bool +shouldDebug = \case + Timing -> debugTiming diff --git a/src/Share/Postgres.hs b/src/Share/Postgres.hs index 64f1b3d..05d06e4 100644 --- a/src/Share/Postgres.hs +++ b/src/Share/Postgres.hs @@ -58,6 +58,9 @@ module Share.Postgres Interp.toTable, Interp.Sql, singleColumnTable, + + -- * Debugging + timeTransaction, ) where @@ -65,10 +68,12 @@ import Control.Lens import Control.Monad.Except import Control.Monad.Reader import Control.Monad.State -import Data.ByteString.Char8 qualified as BSC import Data.Map qualified as Map import Data.Maybe import Data.Text qualified as Text +import Data.Time.Clock (picosecondsToDiffTime) +import Data.Time.Clock.System (getSystemTime, systemToTAITime) +import Data.Time.Clock.TAI (diffAbsoluteTime) import Data.Void import Hasql.Decoders qualified as Decoders import Hasql.Encoders qualified as Encoders @@ -78,6 +83,7 @@ import Hasql.Session qualified as Hasql import Hasql.Session qualified as Session import Hasql.Statement qualified as Hasql import Share.App +import Share.Debug qualified as Debug import Share.Env qualified as Env import Share.Postgres.Orphans () import Share.Prelude @@ -85,9 +91,7 @@ import Share.Utils.Logging (Loggable (..)) import Share.Utils.Logging qualified as Logging import Share.Web.App import Share.Web.Errors (ErrorID (..), SomeServerError, ToServerError (..), internalServerError, respondError, someServerError) - -debug :: Bool -debug = False +import System.CPUTime (getCPUTime) data TransactionError e = Unrecoverable SomeServerError @@ -272,11 +276,11 @@ unliftSession s = do pure $ tryRunSessionWithPool pool s -- | Manually run an unfailing session using a connection pool. -runSessionWithPool :: HasCallStack => Pool.Pool -> Session Void a -> IO a +runSessionWithPool :: (HasCallStack) => Pool.Pool -> Session Void a -> IO a runSessionWithPool pool s = either absurd id <$> tryRunSessionWithPool pool s -- | Manually run a session using a connection pool, returning an Either error. -tryRunSessionWithPool :: HasCallStack => Pool.Pool -> Session e a -> IO (Either e a) +tryRunSessionWithPool :: (HasCallStack) => Pool.Pool -> Session e a -> IO (Either e a) tryRunSessionWithPool pool s = do liftIO (Pool.use pool (runExceptT s)) >>= \case Left err -> throwIO . someServerError $ PostgresError err @@ -289,7 +293,7 @@ runSessionOrRespondError :: (HasCallStack, ToServerError e, Loggable e) => Sessi runSessionOrRespondError t = tryRunSession t >>= either respondError pure -- | Represents any monad in which we can run a statement -class Monad m => QueryM m where +class (Monad m) => QueryM m where statement :: q -> Hasql.Statement q r -> m r -- | Allow running IO actions in a transaction. These actions may be run multiple times if @@ -300,8 +304,7 @@ class Monad m => QueryM m where unrecoverableError :: (HasCallStack, ToServerError e, Loggable e, Show e) => e -> m a instance QueryM (Transaction e) where - statement q s@(Hasql.Statement bs _ _ _) = do - when debug $ transactionUnsafeIO $ BSC.putStrLn bs + statement q s = do transactionStatement q s transactionUnsafeIO io = Transaction (Right <$> liftIO io) @@ -309,22 +312,21 @@ instance QueryM (Transaction e) where unrecoverableError e = Transaction (pure (Left (Unrecoverable (someServerError e)))) instance QueryM (Session e) where - statement q s@(Hasql.Statement bs _ _ _) = do - when debug $ liftIO $ BSC.putStrLn bs + statement q s = do lift $ Session.statement q s transactionUnsafeIO io = lift $ liftIO io unrecoverableError e = throwError (Unrecoverable (someServerError e)) -instance QueryM m => QueryM (ReaderT e m) where +instance (QueryM m) => QueryM (ReaderT e m) where statement q s = lift $ statement q s transactionUnsafeIO io = lift $ transactionUnsafeIO io unrecoverableError e = lift $ unrecoverableError e -instance QueryM m => QueryM (MaybeT m) where +instance (QueryM m) => QueryM (MaybeT m) where statement q s = lift $ statement q s transactionUnsafeIO io = lift $ transactionUnsafeIO io @@ -337,32 +339,32 @@ prepareStatements = True queryListRows :: forall r m. (Interp.DecodeRow r, QueryM m) => Interp.Sql -> m [r] queryListRows sql = statement () (Interp.interp prepareStatements sql) -query1Row :: forall r m. QueryM m => (Interp.DecodeRow r) => Interp.Sql -> m (Maybe r) +query1Row :: forall r m. (QueryM m) => (Interp.DecodeRow r) => Interp.Sql -> m (Maybe r) query1Row sql = listToMaybe <$> queryListRows sql query1Col :: forall a m. (QueryM m, Interp.DecodeField a) => Interp.Sql -> m (Maybe a) query1Col sql = listToMaybe <$> queryListCol sql -queryListCol :: forall a m. QueryM m => (Interp.DecodeField a) => Interp.Sql -> m [a] +queryListCol :: forall a m. (QueryM m) => (Interp.DecodeField a) => Interp.Sql -> m [a] queryListCol sql = queryListRows @(Interp.OneColumn a) sql <&> coerce @[Interp.OneColumn a] @[a] -execute_ :: QueryM m => Interp.Sql -> m () +execute_ :: (QueryM m) => Interp.Sql -> m () execute_ sql = statement () (Interp.interp prepareStatements sql) -queryExpect1Row :: forall r m. HasCallStack => (Interp.DecodeRow r, QueryM m) => Interp.Sql -> m r +queryExpect1Row :: forall r m. (HasCallStack) => (Interp.DecodeRow r, QueryM m) => Interp.Sql -> m r queryExpect1Row sql = query1Row sql >>= \case Nothing -> error "queryExpect1Row: expected 1 row, got 0" Just r -> pure r -queryExpect1Col :: forall a m. HasCallStack => (Interp.DecodeField a, QueryM m) => Interp.Sql -> m a +queryExpect1Col :: forall a m. (HasCallStack) => (Interp.DecodeField a, QueryM m) => Interp.Sql -> m a queryExpect1Col sql = query1Col sql >>= \case Nothing -> error "queryExpect1Col: expected 1 row, got 0" Just r -> pure r -- | Decode a single field as part of a Row -decodeField :: Interp.DecodeField a => Decoders.Row a +decodeField :: (Interp.DecodeField a) => Decoders.Row a decodeField = Decoders.column Interp.decodeField -- | Helper for decoding a row which contains many types which each have their own DecodeRow @@ -386,7 +388,7 @@ newtype Only a = Only {fromOnly :: a} deriving stock (Generic) deriving anyclass (Interp.EncodeRow) -instance Interp.DecodeField a => Interp.DecodeRow (Only a) where +instance (Interp.DecodeField a) => Interp.DecodeRow (Only a) where decodeRow = Only <$> decodeField likeEscape :: Text -> Text @@ -396,7 +398,7 @@ likeEscape = Text.replace "%" "\\%" . Text.replace "_" "\\_" -- -- The @EncodeRow (Only a)@ instance might seem strange, but without it we get overlapping -- instances on @EncodeValue a@. -singleColumnTable :: forall a. Interp.EncodeRow (Only a) => [a] -> Interp.Sql +singleColumnTable :: forall a. (Interp.EncodeRow (Only a)) => [a] -> Interp.Sql singleColumnTable cols = Interp.toTable (coerce @[a] @[Only a] cols) -- | Helper for looking things up, using a cache for keys we've already seen (within the same @@ -458,3 +460,19 @@ instance Interp.DecodeValue RawBytes where -- @@ whenNonEmpty :: forall m f a x. (Monad m, Foldable f, Monoid a) => f x -> m a -> m a whenNonEmpty f m = if null f then pure mempty else m + +timeTransaction :: (QueryM m) => String -> m a -> m a +timeTransaction label ma = + if Debug.shouldDebug Debug.Timing + then do + transactionUnsafeIO $ putStrLn $ "Timing Transaction: " ++ label ++ "..." + systemStart <- transactionUnsafeIO getSystemTime + cpuPicoStart <- transactionUnsafeIO getCPUTime + !a <- ma + cpuPicoEnd <- transactionUnsafeIO getCPUTime + systemEnd <- transactionUnsafeIO getSystemTime + let systemDiff = diffAbsoluteTime (systemToTAITime systemEnd) (systemToTAITime systemStart) + let cpuDiff = picosecondsToDiffTime (cpuPicoEnd - cpuPicoStart) + transactionUnsafeIO $ putStrLn $ "Finished " ++ label ++ " in " ++ show cpuDiff ++ " (cpu), " ++ show systemDiff ++ " (system)" + pure a + else ma From 26e4b121ca06eff7024da99be7103f8a52abc326 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 19 Jul 2024 10:04:46 -0700 Subject: [PATCH 20/48] Debugging module --- share-utils/src/Share/Debug.hs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/share-utils/src/Share/Debug.hs b/share-utils/src/Share/Debug.hs index f237854..0c6837e 100644 --- a/share-utils/src/Share/Debug.hs +++ b/share-utils/src/Share/Debug.hs @@ -26,6 +26,7 @@ import Witch (into) data DebugFlag = Timing + | Queries deriving (Eq, Ord, Show, Bounded, Enum) debugFlags :: Set DebugFlag @@ -36,6 +37,7 @@ debugFlags = case (unsafePerformIO (lookupEnv "SHARE_DEBUG")) of w <- (Text.splitOn "," . Text.pack $ s) case Text.toUpper . Text.strip $ w of "TIMING" -> pure Timing + "QUERIES" -> pure Queries _ -> mempty {-# NOINLINE debugFlags #-} @@ -43,6 +45,10 @@ debugTiming :: Bool debugTiming = Timing `Set.member` debugFlags {-# NOINLINE debugTiming #-} +debugQueries :: Bool +debugQueries = Queries `Set.member` debugFlags +{-# NOINLINE debugQueries #-} + -- | Use for trace-style selective debugging. -- E.g. 1 + (debug Sync "The second number" 2) -- @@ -83,3 +89,4 @@ whenDebug flag action = do shouldDebug :: DebugFlag -> Bool shouldDebug = \case Timing -> debugTiming + Queries -> debugQueries From 84f30f563d2529c12f8857ca9daa3ee965dd0c6e Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 19 Jul 2024 10:04:46 -0700 Subject: [PATCH 21/48] Add metrics for definition sync --- src/Share/Metrics.hs | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Share/Metrics.hs b/src/Share/Metrics.hs index 2c094f8..c43b16f 100644 --- a/src/Share/Metrics.hs +++ b/src/Share/Metrics.hs @@ -9,6 +9,7 @@ module Share.Metrics requestMetricsMiddleware, tickUserSignup, recordBackgroundImportDuration, + recordDefinitionSearchIndexDuration, ) where @@ -17,17 +18,17 @@ import Data.Set qualified as Set import Data.Text qualified as Text import Data.Text.Encoding import Data.Time qualified as Time +import Network.HTTP.Types qualified as HTTP +import Network.Wai qualified as Wai +import Network.Wai.Middleware.Prometheus qualified as Prom +import Prometheus qualified as Prom +import Prometheus.Metric.GHC qualified as Prom import Share.Env qualified as Env import Share.Postgres qualified as PG import Share.Postgres.Metrics.Queries qualified as Q import Share.Prelude import Share.Utils.Deployment qualified as Deployment import Share.Utils.Servant.PathInfo (HasPathInfo, normalizePath) -import Network.HTTP.Types qualified as HTTP -import Network.Wai qualified as Wai -import Network.Wai.Middleware.Prometheus qualified as Prom -import Prometheus qualified as Prom -import Prometheus.Metric.GHC qualified as Prom import System.Clock (Clock (..), diffTimeSpec, toNanoSecs) import System.Clock qualified as Clock import UnliftIO qualified @@ -83,7 +84,7 @@ serveMetricsMiddleware env = do } -- | Record an event to the middleware metric. -requestMetricsMiddleware :: HasPathInfo api => Proxy api -> Wai.Middleware +requestMetricsMiddleware :: (HasPathInfo api) => Proxy api -> Wai.Middleware requestMetricsMiddleware api app req handleResponse = do if recordRequest req then do @@ -269,7 +270,7 @@ numUsersWithTickets = info = Prom.Info "num_users_with_tickets" "The number of users who have interacted with tickets." -- | Adds one to the user-signup counter -tickUserSignup :: MonadIO m => m () +tickUserSignup :: (MonadIO m) => m () tickUserSignup = liftIO do Prom.withLabel userSignupsCounter (tShow Deployment.deployment, "share-api") Prom.incCounter @@ -385,6 +386,18 @@ backgroundImportDurationSeconds = "background_codebase_import_duration_seconds" "The time it took to import a pulled branch into the user's codebase." +{-# NOINLINE definitionSearchIndexDurationSeconds #-} +definitionSearchIndexDurationSeconds :: Prom.Vector Prom.Label2 Prom.Histogram +definitionSearchIndexDurationSeconds = + Prom.unsafeRegister $ + Prom.vector ("deployment", "service") $ + Prom.histogram info Prom.defaultBuckets + where + info = + Prom.Info + "definition_search_indexing_duration_seconds" + "The time it took to index a release for definition search" + timeActionIntoHistogram :: (Prom.Label l, MonadUnliftIO m) => (Prom.Vector l Prom.Histogram) -> l -> m c -> m c timeActionIntoHistogram histogram l m = do UnliftIO.bracket start end \_ -> m @@ -397,5 +410,9 @@ timeActionIntoHistogram histogram l m = do Prom.withLabel histogram l (flip Prom.observe latency) -- | Record the duration of a background import. -recordBackgroundImportDuration :: MonadUnliftIO m => m r -> m r +recordBackgroundImportDuration :: (MonadUnliftIO m) => m r -> m r recordBackgroundImportDuration = timeActionIntoHistogram backgroundImportDurationSeconds (deployment, service) + +-- | Record the duration of a background import. +recordDefinitionSearchIndexDuration :: (MonadUnliftIO m) => m r -> m r +recordDefinitionSearchIndexDuration = timeActionIntoHistogram definitionSearchIndexDurationSeconds (deployment, service) From a8feb84c05f1fed683f6b8e688afceaf1bf1389e Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 19 Jul 2024 10:04:46 -0700 Subject: [PATCH 22/48] Definition Sync updates --- .../BackgroundJobs/Search/DefinitionSync.hs | 54 ++++++++++--------- src/Share/Postgres.hs | 4 ++ src/Share/Postgres/Search/DefinitionSync.hs | 10 ++++ 3 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 524cf86..ad25b5c 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -18,6 +18,7 @@ import Share.BackgroundJobs.Workers (newWorker) import Share.Codebase (CodebaseM) import Share.Codebase qualified as Codebase import Share.IDs (ProjectId, ReleaseId) +import Share.Metrics qualified as Metrics import Share.Postgres qualified as PG import Share.Postgres.Cursors qualified as Cursors import Share.Postgres.Hashes.Queries qualified as HashQ @@ -71,14 +72,15 @@ defnBatchSize :: Int32 defnBatchSize = 1000 worker :: Ki.Scope -> Background () -worker scope = newWorker scope "search:defn-sync" $ forever do - Logging.logInfoText "Syncing definitions..." +worker scope = do authZReceipt <- AuthZ.backgroundJobAuthZ - PG.runTransaction $ do - mayReleaseId <- DefnSyncQ.claimUnsyncedRelease - Debug.debugM Debug.Temp "Syncing release" mayReleaseId - for_ mayReleaseId (syncRelease authZReceipt) - liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 + newWorker scope "search:defn-sync" $ forever do + Logging.logInfoText "Syncing definitions..." + Metrics.recordDefinitionSearchIndexDuration $ PG.runTransactionMode PG.ReadCommitted PG.ReadWrite $ do + mayReleaseId <- DefnSyncQ.claimUnsyncedRelease + Debug.debugM Debug.Temp "Syncing release" mayReleaseId + for_ mayReleaseId (syncRelease authZReceipt) + liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 syncRelease :: AuthZ.AuthZReceipt -> @@ -86,6 +88,9 @@ syncRelease :: PG.Transaction e [DefnIndexingFailure] syncRelease authZReceipt releaseId = fmap (fromMaybe []) . runMaybeT $ do Release {projectId, releaseId, squashedCausal} <- lift $ PG.expectReleaseById releaseId + -- Wipe out any existing rows for this release. Normally there should be none, but this + -- makes it easy to re-index later if we change how we tokenize things. + lift $ DDQ.cleanIndexForRelease releaseId Project {ownerUserId, visibility = projectVis} <- lift $ PG.expectProjectById projectId User {visibility = userVis} <- PG.expectUserByUserId ownerUserId -- Don't sync private projects @@ -121,25 +126,26 @@ syncTerms namesPerspective bhId projectId releaseId termsCursor = do Cursors.foldBatched termsCursor defnBatchSize \terms -> do Debug.debugM Debug.Temp "Fetched N more terms" (length terms) (errs, refDocs) <- - terms & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do - typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) - termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing - let sh = Referent.toShortHash ref - let refTokens = tokensForTerm fqn ref typ termSummary - let dd = - DefinitionDocument - { project = projectId, - release = releaseId, - fqn, - hash = sh, - tokens = refTokens, - metadata = ToTTermSummary termSummary - } - pure dd + PG.timeTransaction "Building terms" $ + terms & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do + typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) + termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing + let sh = Referent.toShortHash ref + let refTokens = tokensForTerm fqn ref typ termSummary + let dd = + DefinitionDocument + { project = projectId, + release = releaseId, + fqn, + hash = sh, + tokens = refTokens, + metadata = ToTTermSummary termSummary + } + pure dd -- It's much more efficient to build only one PPE per batch. let allDeps = setOf (folded . folding tokens . folded . to LD.TypeReference) refDocs - pped <- PPEPostgres.ppedForReferences namesPerspective allDeps + pped <- PG.timeTransaction "Build PPED" $ PPEPostgres.ppedForReferences namesPerspective allDeps let ppe = PPED.unsuffixifiedPPE pped let namedDocs :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] namedDocs = @@ -148,7 +154,7 @@ syncTerms namesPerspective bhId projectId releaseId termsCursor = do for token \ref -> do name <- PPE.types ppe ref pure $ (Name.lastSegment . HQ'.toName $ name, Reference.toShortHash ref) - lift $ DDQ.insertDefinitionDocuments namedDocs + lift $ PG.timeTransaction "Inserting Docs" $ DDQ.insertDefinitionDocuments namedDocs pure errs -- | Compute the search tokens for a term given its name, hash, and type signature diff --git a/src/Share/Postgres.hs b/src/Share/Postgres.hs index 05d06e4..0647d36 100644 --- a/src/Share/Postgres.hs +++ b/src/Share/Postgres.hs @@ -27,6 +27,7 @@ module Share.Postgres readTransaction, writeTransaction, runTransaction, + runTransactionMode, tryRunTransaction, tryRunTransactionMode, unliftTransaction, @@ -235,6 +236,9 @@ writeTransaction t = transaction defaultIsolationLevel ReadWrite t runTransaction :: (MonadReader (Env.Env x) m, MonadIO m, HasCallStack) => Transaction Void a -> m a runTransaction t = runSession (writeTransaction t) +runTransactionMode :: (MonadReader (Env.Env x) m, MonadIO m, HasCallStack) => IsolationLevel -> Mode -> Transaction Void a -> m a +runTransactionMode isoLevel mode t = runSession (transaction isoLevel mode t) + -- | Run a transaction in the App monad, returning an Either error. -- -- Uses a Write transaction for simplicity since there's not much diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs index 0356b56..84b85d4 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -4,6 +4,7 @@ module Share.Postgres.Search.DefinitionSync ( submitReleaseToBeSynced, claimUnsyncedRelease, insertDefinitionDocuments, + cleanIndexForRelease, ) where @@ -74,6 +75,15 @@ insertDefinitionDocuments docs = do Hasql.Jsonb $ Aeson.toJSON metadata ) +-- | Wipe out any rows for the given release, useful when re-indexing. +cleanIndexForRelease :: ReleaseId -> Transaction e () +cleanIndexForRelease releaseId = do + execute_ + [sql| + DELETE FROM global_definition_search_docs + WHERE release_id = #{releaseId} + |] + -- | Convert a search token to a TSVector. -- -- >>> import Unison.Syntax.Name qualified as Name From 405fc8a8d071d3a2b1a2dde72665e1110980a58b Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 19 Jul 2024 10:30:05 -0700 Subject: [PATCH 23/48] Upgrade Hasql and add Pipelining! PG pipelining --- app/Env.hs | 5 +-- src/Share/Postgres.hs | 66 ++++++++++++++++++++++++++--------- src/Share/Postgres/Orphans.hs | 31 +++++++++------- stack.yaml | 8 ++++- stack.yaml.lock | 42 ++++++++++++++++++---- 5 files changed, 113 insertions(+), 39 deletions(-) diff --git a/app/Env.hs b/app/Env.hs index d16a017..240e1c2 100644 --- a/app/Env.hs +++ b/app/Env.hs @@ -23,6 +23,7 @@ import Share.Utils.Logging qualified as Logging import Share.Utils.Servant.Cookies qualified as Cookies import Share.Web.Authentication (cookieSessionTTL) import Hasql.Pool qualified as Pool +import Hasql.Pool.Config qualified as Pool import Network.URI (parseURI) import Servant.API qualified as Servant import System.Environment (lookupEnv) @@ -97,8 +98,8 @@ withEnv action = do let pgConnectionMaxIdleTime = Time.secondsToDiffTime (60 * 5) -- 5 minutes -- Limiting max lifetime helps cycle connections which may have accumulated memory cruft. let pgConnectionMaxLifetime = Time.secondsToDiffTime (60 * 60) -- 1 hour - pgConnectionPool <- - Pool.acquire postgresConnMax pgConnectionAcquisitionTimeout pgConnectionMaxLifetime pgConnectionMaxIdleTime (Text.encodeUtf8 postgresConfig) + let pgSettings = Pool.settings [Pool.staticConnectionSettings (Text.encodeUtf8 postgresConfig), Pool.size postgresConnMax, Pool.acquisitionTimeout pgConnectionAcquisitionTimeout, Pool.idlenessTimeout pgConnectionMaxIdleTime, Pool.agingTimeout pgConnectionMaxLifetime] + pgConnectionPool <- Pool.acquire pgSettings timeCache <- FL.newTimeCache FL.simpleTimeFormat -- E.g. 05/Sep/2023:13:23:56 -0700 sandboxedRuntime <- RT.startRuntime True RT.Persistent "share" let ctx = () diff --git a/src/Share/Postgres.hs b/src/Share/Postgres.hs index 0647d36..9be2ed9 100644 --- a/src/Share/Postgres.hs +++ b/src/Share/Postgres.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE ImpredicativeTypes #-} {-# LANGUAGE LiberalTypeSynonyms #-} @@ -8,6 +9,7 @@ module Share.Postgres ( -- * Types Transaction, + Pipeline, T, Session, Mode (..), @@ -19,6 +21,7 @@ module Share.Postgres Interp.DecodeField, RawBytes (..), Only (..), + QueryA (..), QueryM (..), decodeField, (:.) (..), @@ -39,6 +42,9 @@ module Share.Postgres tryRunSessionWithPool, unliftSession, defaultIsolationLevel, + pipelined, + pFor, + pFor_, -- * query Helpers rollback, @@ -69,6 +75,7 @@ import Control.Lens import Control.Monad.Except import Control.Monad.Reader import Control.Monad.State +import Data.Functor.Compose (Compose (..)) import Data.Map qualified as Map import Data.Maybe import Data.Text qualified as Text @@ -79,6 +86,7 @@ import Data.Void import Hasql.Decoders qualified as Decoders import Hasql.Encoders qualified as Encoders import Hasql.Interpolate qualified as Interp +import Hasql.Pipeline qualified as Hasql.Pipeline import Hasql.Pool qualified as Pool import Hasql.Session qualified as Hasql import Hasql.Session qualified as Session @@ -100,7 +108,7 @@ data TransactionError e -- | A transaction that may fail with an error 'e' (or throw an unrecoverable error) newtype Transaction e a = Transaction {unTransaction :: Hasql.Session (Either (TransactionError e) a)} - deriving (Functor, Applicative, Monad, MonadIO) via (ExceptT (TransactionError e) Hasql.Session) + deriving (Functor, Applicative, Monad) via (ExceptT (TransactionError e) Hasql.Session) instance MonadError e (Transaction e) where throwError = Transaction . pure . Left . Err @@ -110,6 +118,20 @@ instance MonadError e (Transaction e) where Left (Unrecoverable err) -> pure (Left (Unrecoverable err)) Right a -> pure (Right a) +-- | Applicative pipelining transaction +newtype Pipeline e a = Pipeline {unPipeline :: Hasql.Pipeline.Pipeline (Either (TransactionError e) a)} + deriving (Functor, Applicative) via (Compose Hasql.Pipeline.Pipeline (Either (TransactionError e))) + +-- | Run a pipeline in a transaction +pipelined :: Pipeline e a -> Transaction e a +pipelined p = Transaction (Hasql.pipeline (unPipeline p)) + +pFor :: (Traversable f) => f a -> (a -> Pipeline e b) -> Transaction e (f b) +pFor f p = pipelined $ for f p + +pFor_ :: (Foldable f) => f a -> (a -> Pipeline e b) -> Transaction e () +pFor_ f p = pipelined $ for_ f p + -- | A transaction that has no errors. Prefer using a fully polymorphic 'e' when possible, -- but this is very helpful when dealing with data type which include fields which are -- loaded on-demand from the DB. @@ -296,47 +318,57 @@ tryRunSessionWithPool pool s = do runSessionOrRespondError :: (HasCallStack, ToServerError e, Loggable e) => Session e a -> WebApp a runSessionOrRespondError t = tryRunSession t >>= either respondError pure --- | Represents any monad in which we can run a statement -class (Monad m) => QueryM m where +-- | Represents anywhere we can run a statement +class (Applicative m) => QueryA m where statement :: q -> Hasql.Statement q r -> m r + -- | Fail the transaction and whole request with an unrecoverable server error. + unrecoverableError :: (HasCallStack, ToServerError e, Loggable e, Show e) => e -> m a + +class (Monad m, QueryA m) => QueryM m where -- | Allow running IO actions in a transaction. These actions may be run multiple times if -- the transaction is retried. transactionUnsafeIO :: IO a -> m a - -- | Fail the transaction and whole request with an unrecoverable server error. - unrecoverableError :: (HasCallStack, ToServerError e, Loggable e, Show e) => e -> m a - -instance QueryM (Transaction e) where +instance QueryA (Transaction e) where statement q s = do transactionStatement q s - transactionUnsafeIO io = Transaction (Right <$> liftIO io) - unrecoverableError e = Transaction (pure (Left (Unrecoverable (someServerError e)))) -instance QueryM (Session e) where +instance QueryM (Transaction e) where + transactionUnsafeIO io = Transaction (Right <$> liftIO io) + +instance QueryA (Session e) where statement q s = do lift $ Session.statement q s + unrecoverableError e = throwError (Unrecoverable (someServerError e)) + +instance QueryM (Session e) where transactionUnsafeIO io = lift $ liftIO io - unrecoverableError e = throwError (Unrecoverable (someServerError e)) +instance QueryA (Pipeline e) where + statement q s = Pipeline (Right <$> Hasql.Pipeline.statement q s) -instance (QueryM m) => QueryM (ReaderT e m) where + unrecoverableError e = Pipeline $ pure (Left (Unrecoverable (someServerError e))) + +instance (QueryM m) => QueryA (ReaderT e m) where statement q s = lift $ statement q s + unrecoverableError e = lift $ unrecoverableError e + +instance (QueryM m) => QueryM (ReaderT e m) where transactionUnsafeIO io = lift $ transactionUnsafeIO io +instance (QueryM m) => QueryA (MaybeT m) where + statement q s = lift $ statement q s + unrecoverableError e = lift $ unrecoverableError e instance (QueryM m) => QueryM (MaybeT m) where - statement q s = lift $ statement q s - transactionUnsafeIO io = lift $ transactionUnsafeIO io - unrecoverableError e = lift $ unrecoverableError e - prepareStatements :: Bool prepareStatements = True @@ -352,7 +384,7 @@ query1Col sql = listToMaybe <$> queryListCol sql queryListCol :: forall a m. (QueryM m) => (Interp.DecodeField a) => Interp.Sql -> m [a] queryListCol sql = queryListRows @(Interp.OneColumn a) sql <&> coerce @[Interp.OneColumn a] @[a] -execute_ :: (QueryM m) => Interp.Sql -> m () +execute_ :: (QueryA m) => Interp.Sql -> m () execute_ sql = statement () (Interp.interp prepareStatements sql) queryExpect1Row :: forall r m. (HasCallStack) => (Interp.DecodeRow r, QueryM m) => Interp.Sql -> m r diff --git a/src/Share/Postgres/Orphans.hs b/src/Share/Postgres/Orphans.hs index 7b1d226..fa7fa81 100644 --- a/src/Share/Postgres/Orphans.hs +++ b/src/Share/Postgres/Orphans.hs @@ -203,20 +203,27 @@ instance Hasql.DecodeValue SqliteTermEdit.Typing where _ -> Nothing ) -instance ToServerError Hasql.QueryError where +instance ToServerError Hasql.SessionError where toServerError _ = (ErrorID "query-error", err500) -instance Logging.Loggable Hasql.QueryError where - toLog (Hasql.QueryError template params err) = - Logging.withSeverity Logging.Error . Logging.textLog $ - Text.unlines - [ "QueryError:", - indent (tShow err), - "TEMPLATE:", - indent (Text.decodeUtf8 template), - "PARAMS:", - indent (tShow params) - ] +instance Logging.Loggable Hasql.SessionError where + toLog = \case + (Hasql.QueryError template params err) -> + Logging.withSeverity Logging.Error . Logging.textLog $ + Text.unlines + [ "QueryError:", + indent (tShow err), + "TEMPLATE:", + indent (Text.decodeUtf8 template), + "PARAMS:", + indent (tShow params) + ] + (Hasql.PipelineError cmdErr) -> + Logging.withSeverity Logging.Error . Logging.textLog $ + Text.unlines + [ "PipelineError:", + indent (tShow cmdErr) + ] where indent :: Text -> Text indent = Text.unlines . fmap (" " <>) . Text.lines diff --git a/stack.yaml b/stack.yaml index 4a51a48..8bf136d 100644 --- a/stack.yaml +++ b/stack.yaml @@ -45,10 +45,16 @@ extra-deps: - monad-validate-1.3.0.0@sha256:eb6ddd5c9cf72ff0563cba604fa00291376e96138fdb4932d00ff3a99d66706e,2605 - raven-haskell-0.1.4.1@sha256:0d27e37968327faba577558a2ee4465ebfd3b6929b09cf4881dfa62a6873c85a,1393 - strings-1.1@sha256:0285dec4c8ab262359342b3e5ef1eb567074669461b9b38404f1cb870c881c5c,1617 -- hasql-interpolate-0.2.2.0@sha256:e6dcd161bd7147915f5f837b2dfc6f1710d6f0ce47341944ea1925194b8ed1fd,3206 - network-udp-0.0.0@sha256:408d2d4fa1a25e49e95752ee124cca641993404bb133ae10fb81daef22d876ae,1075 - recover-rtti-0.5.0@sha256:7d598b0c89dac9e170b488a7a50b322fcae06342fbd2da18cb8a7f93a0b44e68,4913 +# Bumping hasql up to get Pipelining mode +- hasql-1.8 +- hasql-pool-1.2.0.2 +- hasql-interpolate-1.0.1.0 +- postgresql-binary-0.14@sha256:3f3518f841cf80b107862800715bc64f43c2c71696c4129f63404c1ff61cc919,4025 +- postgresql-libpq-0.10.1.0@sha256:6a45edff0a9e30b32cda6e443107950492322622c4fbefc8fb4dcf6452dcf0b4,3203 + ghc-options: # All packages "$locals": -Wall -Wno-name-shadowing -Wno-missing-pattern-synonym-signatures -fprint-expanded-synonyms -fwrite-ide-info diff --git a/stack.yaml.lock b/stack.yaml.lock index 85ab95b..fbea855 100644 --- a/stack.yaml.lock +++ b/stack.yaml.lock @@ -53,13 +53,6 @@ packages: size: 212 original: hackage: strings-1.1@sha256:0285dec4c8ab262359342b3e5ef1eb567074669461b9b38404f1cb870c881c5c,1617 -- completed: - hackage: hasql-interpolate-0.2.2.0@sha256:e6dcd161bd7147915f5f837b2dfc6f1710d6f0ce47341944ea1925194b8ed1fd,3206 - pantry-tree: - sha256: e826a06d038ef9e2f1fdbaec5c0e3fb1baca63dbb463498fbf1e2d7540545c67 - size: 1276 - original: - hackage: hasql-interpolate-0.2.2.0@sha256:e6dcd161bd7147915f5f837b2dfc6f1710d6f0ce47341944ea1925194b8ed1fd,3206 - completed: hackage: network-udp-0.0.0@sha256:408d2d4fa1a25e49e95752ee124cca641993404bb133ae10fb81daef22d876ae,1075 pantry-tree: @@ -74,6 +67,41 @@ packages: size: 2489 original: hackage: recover-rtti-0.5.0@sha256:7d598b0c89dac9e170b488a7a50b322fcae06342fbd2da18cb8a7f93a0b44e68,4913 +- completed: + hackage: hasql-1.8@sha256:52b61231259a79733428a4b51f5661c0eb7934001f6a3b3b637203a4a745c951,5642 + pantry-tree: + sha256: 3aea9d8e5f3eccd419ba31336d0f2d737e0cc23b82fc0306cdc8975c281a272f + size: 3438 + original: + hackage: hasql-1.8 +- completed: + hackage: hasql-pool-1.2.0.2@sha256:621e3997d701b424a777fc6cd0218e385c89dff9417a933305d19b03820deb3f,2389 + pantry-tree: + sha256: 8dd1ee8e3a41894266708e28076a902c8d51b275af22c3f182d6bfe219ea7fa7 + size: 982 + original: + hackage: hasql-pool-1.2.0.2 +- completed: + hackage: hasql-interpolate-1.0.1.0@sha256:07980986467ed196e812a54c2762a42ceca56ca899cb4ef3cdb4f4191b07d338,3298 + pantry-tree: + sha256: f7d9f29c576c4d36cb2f44d2f9a672e08e84d79060928aa9ed005e9e30c35202 + size: 1276 + original: + hackage: hasql-interpolate-1.0.1.0 +- completed: + hackage: postgresql-binary-0.14@sha256:3f3518f841cf80b107862800715bc64f43c2c71696c4129f63404c1ff61cc919,4025 + pantry-tree: + sha256: 29cfa313f2642c8b1d3ca15afbd2911ab842ae130e65d99fb5f18e8a24df8c88 + size: 1661 + original: + hackage: postgresql-binary-0.14@sha256:3f3518f841cf80b107862800715bc64f43c2c71696c4129f63404c1ff61cc919,4025 +- completed: + hackage: postgresql-libpq-0.10.1.0@sha256:6a45edff0a9e30b32cda6e443107950492322622c4fbefc8fb4dcf6452dcf0b4,3203 + pantry-tree: + sha256: 1c0120f2cd6d15bfdf64060ebde4b037f01381a8d04d188d753b43e1c978d164 + size: 1096 + original: + hackage: postgresql-libpq-0.10.1.0@sha256:6a45edff0a9e30b32cda6e443107950492322622c4fbefc8fb4dcf6452dcf0b4,3203 snapshots: - completed: sha256: 8e7996960d864443a66eb4105338bbdd6830377b9f6f99cd5527ef73c10c01e7 From 6d2d7dcb2b742c24269fda5cf95b224c15de3305 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 19 Jul 2024 13:33:19 -0700 Subject: [PATCH 24/48] Fixed --- src/Share/Postgres/Queries.hs | 8 ++++---- src/Share/Postgres/Search/DefinitionSync.hs | 2 +- src/Unison/Server/Share/DefinitionSummary.hs | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Share/Postgres/Queries.hs b/src/Share/Postgres/Queries.hs index b76885b..93a9400 100644 --- a/src/Share/Postgres/Queries.hs +++ b/src/Share/Postgres/Queries.hs @@ -21,7 +21,7 @@ import Share.Github import Share.IDs import Share.IDs qualified as IDs import Share.OAuth.Types -import Share.Postgres (QueryM (unrecoverableError)) +import Share.Postgres (unrecoverableError) import Share.Postgres qualified as PG import Share.Postgres.IDs import Share.Postgres.LooseCode.Queries qualified as LCQ @@ -41,13 +41,13 @@ import Share.Web.Share.Releases.Types (ReleaseStatusFilter (..), StatusUpdate (. import Unison.Util.List qualified as Utils import Unison.Util.Monoid (intercalateMap) -expectUserByUserId :: PG.QueryM m => UserId -> m User +expectUserByUserId :: (PG.QueryM m) => UserId -> m User expectUserByUserId uid = do userByUserId uid >>= \case Just user -> pure user Nothing -> unrecoverableError $ EntityMissing (ErrorID "user:missing") ("User with id " <> IDs.toText uid <> " not found") -userByUserId :: PG.QueryM m => UserId -> m (Maybe User) +userByUserId :: (PG.QueryM m) => UserId -> m (Maybe User) userByUserId uid = do PG.query1Row [PG.sql| @@ -711,7 +711,7 @@ createBranch !_nlReceipt projectId branchName contributorId causalId mergeTarget |] createRelease :: - PG.QueryM m => + (PG.QueryM m) => NameLookupReceipt -> ProjectId -> ReleaseVersion -> diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs index 84b85d4..f38f95e 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -53,7 +53,7 @@ claimUnsyncedRelease = do -- | Save definition documents to be indexed for search. insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] -> Transaction e () -insertDefinitionDocuments docs = do +insertDefinitionDocuments docs = pipelined $ do let docsTable = docRow <$> docs for_ docsTable \(projectId, releaseId, fqn, tokens, metadata) -> do -- Ideally we'd do a bulk insert, but Hasql doesn't provide any method for passing arrays of diff --git a/src/Unison/Server/Share/DefinitionSummary.hs b/src/Unison/Server/Share/DefinitionSummary.hs index f73e3c9..112c097 100644 --- a/src/Unison/Server/Share/DefinitionSummary.hs +++ b/src/Unison/Server/Share/DefinitionSummary.hs @@ -26,7 +26,7 @@ import Servant.Server (err500) import Share.Backend qualified as Backend import Share.Codebase qualified as Codebase import Share.Codebase.Types (CodebaseM) -import Share.Postgres (QueryM (unrecoverableError)) +import Share.Postgres (unrecoverableError) import Share.Postgres.Hashes.Queries qualified as HashQ import Share.Postgres.IDs (BranchHashId, CausalId) import Share.Postgres.NameLookups.Ops qualified as NLOps From 1cd2af87797812369a960fa1bc91bc7d6cd7d226 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 19 Jul 2024 23:24:20 -0700 Subject: [PATCH 25/48] Add arities --- sql/2024-07-18-00-00_defn_search_sync.sql | 3 + .../BackgroundJobs/Search/DefinitionSync.hs | 65 +++++++++++-------- .../Search/DefinitionSync/Types.hs | 5 +- src/Share/Postgres/Search/DefinitionSync.hs | 16 +++-- 4 files changed, 54 insertions(+), 35 deletions(-) diff --git a/sql/2024-07-18-00-00_defn_search_sync.sql b/sql/2024-07-18-00-00_defn_search_sync.sql index 68dbd99..08fa1cd 100644 --- a/sql/2024-07-18-00-00_defn_search_sync.sql +++ b/sql/2024-07-18-00-00_defn_search_sync.sql @@ -16,6 +16,9 @@ CREATE TABLE global_definition_search_docs ( -- Fully qualified name name TEXT NOT NULL, search_tokens TSVECTOR NOT NULL, + -- Number of arguments. 0 for values. + arity INT NOT NULL, + -- Contains the rendered type signature, type, hash, etc. -- so we don't need to look up types for hundreds of search results on the fly. metadata JSONB NOT NULL, diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index ad25b5c..eeb0815 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -4,11 +4,13 @@ module Share.BackgroundJobs.Search.DefinitionSync (worker) where import Control.Lens import Control.Monad.Except +import Data.Either (isRight) import Data.Generics.Product (HasField (..)) import Data.List qualified as List import Data.Map qualified as Map import Data.Map.Monoidal.Strict (MonoidalMap) import Data.Map.Monoidal.Strict qualified as MonMap +import Data.Monoid (Any (..), Sum (..)) import Data.Set qualified as Set import Data.Set.Lens (setOf) import Ki.Unlifted qualified as Ki @@ -131,7 +133,7 @@ syncTerms namesPerspective bhId projectId releaseId termsCursor = do typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing let sh = Referent.toShortHash ref - let refTokens = tokensForTerm fqn ref typ termSummary + let (refTokens, arity) = tokensForTerm fqn ref typ termSummary let dd = DefinitionDocument { project = projectId, @@ -139,6 +141,7 @@ syncTerms namesPerspective bhId projectId releaseId termsCursor = do fqn, hash = sh, tokens = refTokens, + arity = arity, metadata = ToTTermSummary termSummary } pure dd @@ -158,12 +161,13 @@ syncTerms namesPerspective bhId projectId releaseId termsCursor = do pure errs -- | Compute the search tokens for a term given its name, hash, and type signature -tokensForTerm :: (Var.Var v) => Name -> Referent -> Type.Type v a -> Summary.TermSummary -> Set (DefnSearchToken TypeReference) +tokensForTerm :: (Var.Var v) => Name -> Referent -> Type.Type v a -> Summary.TermSummary -> (Set (DefnSearchToken TypeReference), Int) tokensForTerm name ref typ (Summary.TermSummary {tag}) = do let sh = Referent.toShortHash ref baseTokens = Set.fromList [NameToken name, HashToken sh] tagTokens = Set.singleton $ TermTagToken tag - in baseTokens <> typeSigTokens typ <> tagTokens + (tsTokens, arity) = typeSigTokens typ + in (baseTokens <> tsTokens <> tagTokens, arity) data TokenGenEnv v = TokenGenEnv { varIds :: Map v VarId @@ -178,14 +182,15 @@ data TokenGenState v = TokenGenState type TokenGenM v = ReaderT (TokenGenEnv v) (State (TokenGenState v)) -- | Compute var occurrence and type ref occurrence search tokens from a type signature. -typeSigTokens :: forall v ann. (Var.Var v) => Type.Type v ann -> Set (DefnSearchToken TypeReference) +typeSigTokens :: forall v ann. (Var.Var v) => Type.Type v ann -> (Set (DefnSearchToken TypeReference), Int) typeSigTokens typ = - let occMap :: MonoidalMap (Either VarId TypeReference) Occurrence - occMap = flip evalState initState . flip runReaderT (TokenGenEnv mempty) $ ABT.cata alg typ + let occMap :: MonoidalMap (Either VarId TypeReference) (Occurrence, Any) + arity :: Int + (occMap, Sum arity) = flip evalState initState . flip runReaderT (TokenGenEnv mempty) $ ABT.cata alg typ (varIds, typeRefs) = MonMap.toList occMap & foldMap \case - (Left vId, occ) -> ([(vId, occ)], []) - (Right typeRef, occ) -> ([], [(typeRef, occ)]) + (Left vId, (occ, ret)) -> ([(vId, (occ, ret))], []) + (Right typeRef, (occ, ret)) -> ([], [(typeRef, (occ, ret))]) expandedVarTokens = varIds -- Rewrite varIds normalized by number of occurrences, @@ -200,36 +205,40 @@ typeSigTokens typ = & imap (\i (_vId, occ) -> (VarId i, occ)) -- Expand a token for each occurrence of a variable, this way -- 'Text' still matches the type 'Text -> Text' - & foldMap (\(vId, occ) -> (TypeVarToken vId) <$> [1 .. occ]) + & foldMap + ( \(vId, (occ, Any isReturn)) -> + Monoid.whenM isReturn [TypeVarToken vId Nothing] <> ((TypeVarToken vId . Just) <$> [1 .. occ]) + ) & Set.fromList expandedTypeRefTokens = typeRefs - & foldMap \(typeRef, occ) -> - TypeMentionToken typeRef <$> [1 .. occ] + & foldMap \(typeRef, (occ, Any isReturn)) -> + Monoid.whenM isReturn [TypeMentionToken typeRef Nothing] <> (TypeMentionToken typeRef . Just <$> [1 .. occ]) & Set.fromList - in expandedVarTokens <> expandedTypeRefTokens + in (expandedVarTokens <> expandedTypeRefTokens, arity) where initState = TokenGenState 0 -- Cata algebra for collecting type reference tokens from a type signature. alg :: ann -> - ABT.ABT Type.F v (TokenGenM v (MonoidalMap (Either VarId TypeReference) Occurrence)) -> - TokenGenM v (MonoidalMap (Either VarId TypeReference) Occurrence) + ABT.ABT Type.F v (TokenGenM v (MonoidalMap (Either VarId TypeReference) (Occurrence, Any {- Is return type -}), Sum Int)) -> + TokenGenM v (MonoidalMap (Either VarId TypeReference) (Occurrence, Any {- Is return type -}), Sum Int) alg _ann = \case ABT.Var v -> do vId <- varIdFor v - pure $ MonMap.singleton (Left vId) 1 + pure $ (MonMap.singleton (Left vId) (1, Any True), mempty) ABT.Cycle a -> a ABT.Abs v r -> do vId <- nextVarId local (field @"varIds" . at v ?~ vId) r ABT.Tm tf -> case tf of Type.Ref typeRef -> do - pure $ MonMap.singleton (Right typeRef) 1 + pure $ (MonMap.singleton (Right typeRef) (1, Any True), mempty) Type.Arrow a b -> do - aTokens <- a - bTokens <- b - pure $ aTokens <> bTokens + -- Anything on the left of an arrow is not a return type. + aTokens <- a <&> \(toks, _arity) -> MonMap.map (\(occ, _) -> (occ, Any False)) toks + (bTokens, arity) <- b + pure $ (aTokens <> bTokens, arity + 1) Type.Ann a _kind -> a -- At the moment we don't handle higher kinded type applications differently than regular -- type mentions. @@ -238,12 +247,15 @@ typeSigTokens typ = bTokens <- b pure $ aTokens <> bTokens Type.Effect a b -> do - aTokens <- a - bTokens <- b - pure $ aTokens <> bTokens - Type.Effects as -> Monoid.foldMapM id as + -- Don't include vars from effects. + aTokens <- removeVars . fst <$> a + (bTokens, bArity) <- b + pure $ (aTokens <> bTokens, bArity) + Type.Effects as -> first removeVars <$> Monoid.foldMapM id as Type.Forall a -> a Type.IntroOuter a -> a + removeVars :: MonoidalMap (Either VarId TypeReference) (Occurrence, Any) -> MonoidalMap (Either VarId TypeReference) (Occurrence, Any) + removeVars = MonMap.filterWithKey (\k _ -> isRight k) nextVarId :: TokenGenM v VarId nextVarId = field @"nextVarId" <<%= succ varIdFor :: v -> TokenGenM v VarId @@ -263,11 +275,11 @@ syncTypes namesPerspective projectId releaseId typesCursor = do Cursors.foldBatched typesCursor defnBatchSize \types -> do (errs, refDocs) <- types & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do - declTokens <- case ref of - Reference.Builtin _ -> pure mempty + (declTokens, declArity) <- case ref of + Reference.Builtin _ -> pure (mempty, 0) Reference.DerivedId refId -> do decl <- lift (Codebase.loadTypeDeclaration refId) `whenNothingM` throwError (NoDeclForType fqn ref) - pure $ tokensForDecl refId decl + pure $ (tokensForDecl refId decl, length . DD.bound $ DD.asDataDecl decl) let basicTokens = Set.fromList [NameToken fqn, HashToken $ Reference.toShortHash ref] typeSummary <- lift $ Summary.typeSummaryForReference ref (Just fqn) Nothing @@ -279,6 +291,7 @@ syncTypes namesPerspective projectId releaseId typesCursor = do fqn, hash = sh, tokens = declTokens <> basicTokens, + arity = declArity, metadata = ToTTypeSummary typeSummary } pure dd diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index 537c0df..0857f07 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -69,9 +69,9 @@ data DefnSearchToken typeRef = -- Allows searching by literal name NameToken Name | -- A mention of some external type or ability - TypeMentionToken typeRef Occurrence + TypeMentionToken typeRef (Maybe Occurrence {- Nothing means it's a return value -}) | -- Allows searching for type sigs with type variables - TypeVarToken VarId Occurrence + TypeVarToken VarId (Maybe Occurrence {- Nothing means it's a return value -}) | -- Allows searching by component hash -- Note: not actually a _short_ hash, it's a full hash with the referent info tagged -- on. @@ -123,6 +123,7 @@ data DefinitionDocument proj release name typeRef = DefinitionDocument -- For now we only index types by their final name segment, may need to revisit this -- in the future. tokens :: Set (DefnSearchToken typeRef), + arity :: Int, metadata :: TermOrTypeSummary } deriving (Show, Generic) diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSync.hs index f38f95e..8114e71 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSync.hs @@ -55,23 +55,24 @@ claimUnsyncedRelease = do insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] -> Transaction e () insertDefinitionDocuments docs = pipelined $ do let docsTable = docRow <$> docs - for_ docsTable \(projectId, releaseId, fqn, tokens, metadata) -> do + for_ docsTable \(projectId, releaseId, fqn, tokens, arity, metadata) -> do -- Ideally we'd do a bulk insert, but Hasql doesn't provide any method for passing arrays of -- arrays as parameters, so instead we insert each record individually so we can use our -- only level of array-ness for the tokens. execute_ $ [sql| - INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, metadata) - VALUES (#{projectId}, #{releaseId}, #{fqn}, array_to_tsvector(#{tokens}), #{metadata}::jsonb) + INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, arity, metadata) + VALUES (#{projectId}, #{releaseId}, #{fqn}, array_to_tsvector(#{tokens}), #{arity}, #{metadata}::jsonb) ON CONFLICT DO NOTHING |] where - docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Hasql.Jsonb) - docRow DefinitionDocument {project, release, fqn, tokens, metadata} = + docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Int32, Hasql.Jsonb) + docRow DefinitionDocument {project, release, fqn, tokens, arity, metadata} = ( project, release, Name.toText fqn, foldMap searchTokenToText $ Set.toList tokens, + fromIntegral arity, Hasql.Jsonb $ Aeson.toJSON metadata ) @@ -147,10 +148,11 @@ searchTokenToText = \case tagType = "t" typeModType :: Text typeModType = "mod" - makeSearchToken :: Text -> Text -> Maybe Occurrence -> Text + makeSearchToken :: Text -> Text -> Maybe (Maybe Occurrence) -> Text makeSearchToken kind txt occ = do Text.intercalate ":" $ [kind, Text.replace ":" "" txt] <> case occ of - Just (Occurrence n) -> [tShow n] + Just (Just (Occurrence n)) -> [tShow n] + Just Nothing -> ["r"] Nothing -> [] From dd0ce24cf34de469807f2c00fbe9d667b6823119 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 22 Jul 2024 10:42:08 -0700 Subject: [PATCH 26/48] Add Gist trigram index on name --- ...efn_search_sync.sql => 2024-07-18-00-00_defn_search.sql} | 6 ++++++ 1 file changed, 6 insertions(+) rename sql/{2024-07-18-00-00_defn_search_sync.sql => 2024-07-18-00-00_defn_search.sql} (85%) diff --git a/sql/2024-07-18-00-00_defn_search_sync.sql b/sql/2024-07-18-00-00_defn_search.sql similarity index 85% rename from sql/2024-07-18-00-00_defn_search_sync.sql rename to sql/2024-07-18-00-00_defn_search.sql index 08fa1cd..b44d1ac 100644 --- a/sql/2024-07-18-00-00_defn_search_sync.sql +++ b/sql/2024-07-18-00-00_defn_search.sql @@ -2,6 +2,9 @@ -- In this case it allows us to include the project_id and release_id in the GIN index for search tokens. CREATE EXTENSION IF NOT EXISTS btree_gin; +-- Allows us to create trigram indexes for fuzzy searching. +CREATE EXTENSION IF NOT EXISTS pg_trgm; + -- New table for coordinating background job for syncing global definitions for search. -- Table of all releases which have been published, but not yet synced to the global definition search index. @@ -31,3 +34,6 @@ CREATE TABLE global_definition_search_docs ( -- Index for searching global definitions by 'search token', with an optional project/release filter. -- P.s. there's a search token type for name, so we don't need to index that separately. CREATE INDEX global_definition_search_tokens ON global_definition_search_docs USING GIN(search_tokens, project_id, release_id); + +-- Index for fuzzy-searching on the fully qualified name. +CREATE INDEX global_definition_search_name_trigram ON global_definition_search_docs USING GIST (name gist_trgm_ops); From bb90c564e4c76d40d567a48d541100b7b03503c9 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 22 Jul 2024 10:42:08 -0700 Subject: [PATCH 27/48] Define API for definition search --- src/Share/Web/Share/API.hs | 21 ++++++++++++++++++++- src/Share/Web/Share/Types.hs | 13 ++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/Share/Web/Share/API.hs b/src/Share/Web/Share/API.hs index d0f25d1..429dc89 100644 --- a/src/Share/Web/Share/API.hs +++ b/src/Share/Web/Share/API.hs @@ -3,18 +3,19 @@ module Share.Web.Share.API where +import Servant import Share.IDs import Share.OAuth.Session (AuthenticatedSession, AuthenticatedUserId, MaybeAuthenticatedSession) import Share.Prelude (NonEmpty) import Share.Utils.API import Share.Utils.Caching import Share.Utils.Servant +import Share.Utils.Unison (ProjectShortHandParam) import Share.Web.Share.Branches.API (UserBranchesAPI) import Share.Web.Share.CodeBrowsing.API (CodeBrowseAPI) import Share.Web.Share.Contributions.API (ContributionsByUserAPI) import Share.Web.Share.Projects.API (ProjectsAPI) import Share.Web.Share.Types -import Servant type UserAPI = MaybeAuthenticatedSession @@ -51,6 +52,24 @@ type SearchEndpoint = :> QueryParam "limit" Limit :> Get '[JSON] [SearchResult] +-- | Search for names to use in a definition search. +type SearchDefinitionNamesEndpoint = + MaybeAuthenticatedSession + :> RequiredQueryParam "query" Query + :> QueryParam "limit" Limit + :> QueryParam "user-filter" UserHandle + :> QueryParam "project-filter" ProjectShortHand + :> Get '[JSON] [DefinitionNameSearchResult] + +-- | Submit a definition search +type SearchDefinitionsEndpoint = + MaybeAuthenticatedSession + :> RequiredQueryParam "query" Query + :> QueryParam "limit" Limit + :> QueryParam "user-filter" UserHandle + :> QueryParam "project-filter" ProjectShortHand + :> Get '[JSON] [DefinitionSearchResult] + type AccountAPI = AuthenticatedSession :> ( AccountEndpoint diff --git a/src/Share/Web/Share/Types.hs b/src/Share/Web/Share/Types.hs index 48f1c07..88e4490 100644 --- a/src/Share/Web/Share/Types.hs +++ b/src/Share/Web/Share/Types.hs @@ -6,12 +6,13 @@ module Share.Web.Share.Types where import Data.Aeson (KeyValue ((.=)), ToJSON (..)) import Data.Aeson qualified as Aeson +import Network.URI (URI) +import Share.BackgroundJobs.Search.DefinitionSync.Types qualified as DefSync import Share.IDs import Share.Prelude import Share.Project (ProjectVisibility) import Share.Utils.API (NullableUpdate, parseNullableUpdate) import Share.Utils.URI -import Network.URI (URI) import Unison.Server.Doc (Doc) data UpdateUserRequest = UpdateUserRequest @@ -151,3 +152,13 @@ instance ToJSON UserDisplayInfo where "avatarUrl" Aeson..= avatarUrl, "userId" Aeson..= userId ] + +data DefinitionNameSearchResult = DefinitionNameSearchResult + +data DefinitionSearchResult + = DefinitionSearchResult + { fqn :: Name, + summary :: DefSync.TermOrTypeSummary, + project :: ProjectShortHand, + release :: ReleaseShortHand + } From 647e30a5fba880033538db06ba550a36e716f254 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 22 Jul 2024 10:42:08 -0700 Subject: [PATCH 28/48] Start implementing name search endpoint --- share-api.cabal | 2 +- .../BackgroundJobs/Search/DefinitionSync.hs | 5 ++--- .../Queries.hs} | 21 +++++++++++++++++- src/Share/Web/Share/Impl.hs | 22 +++++++++++++++++++ 4 files changed, 45 insertions(+), 5 deletions(-) rename src/Share/Postgres/Search/{DefinitionSync.hs => DefinitionSearch/Queries.hs} (85%) diff --git a/share-api.cabal b/share-api.cabal index b3a6719..34e2b58 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -70,7 +70,7 @@ library Share.Postgres.Projects.Queries Share.Postgres.Queries Share.Postgres.Refs.Types - Share.Postgres.Search.DefinitionSync + Share.Postgres.Search.DefinitionSearch.Queries Share.Postgres.Serialization Share.Postgres.Sync.Conversions Share.Postgres.Sync.Queries diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index eeb0815..c61e86b 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -28,8 +28,7 @@ import Share.Postgres.IDs (BranchHashId) import Share.Postgres.NameLookups.Ops qualified as NLOps import Share.Postgres.NameLookups.Types qualified as NL import Share.Postgres.Queries qualified as PG -import Share.Postgres.Search.DefinitionSync qualified as DDQ -import Share.Postgres.Search.DefinitionSync qualified as DefnSyncQ +import Share.Postgres.Search.DefinitionSearch.Queries qualified as DDQ import Share.Prelude import Share.Project (Project (..), ProjectVisibility (..)) import Share.Release (Release (..)) @@ -79,7 +78,7 @@ worker scope = do newWorker scope "search:defn-sync" $ forever do Logging.logInfoText "Syncing definitions..." Metrics.recordDefinitionSearchIndexDuration $ PG.runTransactionMode PG.ReadCommitted PG.ReadWrite $ do - mayReleaseId <- DefnSyncQ.claimUnsyncedRelease + mayReleaseId <- DDQ.claimUnsyncedRelease Debug.debugM Debug.Temp "Syncing release" mayReleaseId for_ mayReleaseId (syncRelease authZReceipt) liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 diff --git a/src/Share/Postgres/Search/DefinitionSync.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs similarity index 85% rename from src/Share/Postgres/Search/DefinitionSync.hs rename to src/Share/Postgres/Search/DefinitionSearch/Queries.hs index 8114e71..e3908ab 100644 --- a/src/Share/Postgres/Search/DefinitionSync.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -1,6 +1,6 @@ {-# LANGUAGE TypeOperators #-} -module Share.Postgres.Search.DefinitionSync +module Share.Postgres.Search.DefinitionSearch.Queries ( submitReleaseToBeSynced, claimUnsyncedRelease, insertDefinitionDocuments, @@ -156,3 +156,22 @@ searchTokenToText = \case Just (Just (Occurrence n)) -> [tShow n] Just Nothing -> ["r"] Nothing -> [] + +defNameSearch :: Maybe UserId -> Maybe (Either ProjectId ReleaseId) -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name)] +defNameSearch mayCaller mayFilter (Query query) = do + let filters = case mayFilter of + Left projId -> [sql| AND project_id = #{projId} |] + Right relId -> [sql| AND release_id = #{relId} |] + query1Col + [sql| + SELECT project_id, release_id, name FROM global_definition_search_docs doc + JOIN projects p ON p.id = doc.project_id + WHERE + -- Search name by a trigram 'word similarity' + -- which will match if the query is similar to any 'word' (e.g. name segment) + -- in the name. + #{query} <% name + AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects WHERE user_id = #{mayCaller} AND project_id = p.id))) + ^{filters} + ORDER BY (similarity(#{query}, name)) DESC + |] diff --git a/src/Share/Web/Share/Impl.hs b/src/Share/Web/Share/Impl.hs index 8353cef..ce69c2f 100644 --- a/src/Share/Web/Share/Impl.hs +++ b/src/Share/Web/Share/Impl.hs @@ -34,6 +34,7 @@ import Share.Web.Share.API qualified as Share import Share.Web.Share.Branches.Impl qualified as Branches import Share.Web.Share.CodeBrowsing.API (CodeBrowseAPI) import Share.Web.Share.Contributions.Impl qualified as Contributions +import Share.Web.Share.Contributions.Types (ShareContribution (projectShortHand)) import Share.Web.Share.Projects.Impl qualified as Projects import Share.Web.Share.Types import Unison.Codebase.Path qualified as Path @@ -351,6 +352,27 @@ searchEndpoint (MaybeAuthedUserID callerUserId) query (fromMaybe (Limit 20) -> l in SearchResultProject projectShortHand summary visibility pure $ userResults <> projectResults +searchDefinitionNamesEndpoint :: + Maybe UserId -> + Query -> + Maybe Limit -> + Maybe UserHandle -> + Maybe ProjectShortHandParam -> + WebApp [DefinitionNameSearchResult] +searchDefinitionNamesEndpoint callerUserId query mayLimit userFilter projectFilter = do + filter <- runMaybeT $ resolveProjectFilter <|> resolveUserFilter + DDQ.defNameSearch callerUserId filter query limit + where + limit = fromMaybe (Limit 20) mayLimit + resolveProjectFilter = do + projectShortHand <- hoistMaybe projectFilter + Project {projectId} <- lift $ PG.runTransaction $ Q.projectByShortHand projectShortHand + pure $ Left projectId + resolveUserFilter = do + userHandle <- hoistMaybe userFilter + User {user_id} <- lift $ PG.runTransaction $ Q.userByHandle userHandle + pure $ Right user_id + accountInfoEndpoint :: Session -> WebApp UserAccountInfo accountInfoEndpoint Session {sessionUserId} = do User {user_name, avatar_url, user_email, handle, user_id} <- PGO.expectUserById sessionUserId From bc27ea2bfe8464cbaa619f6f2974bca15ebcfd26 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 22 Jul 2024 15:32:17 -0700 Subject: [PATCH 29/48] Implement stub of searching definition names --- share-api.cabal | 1 + share-utils/src/Share/Utils/IDs.hs | 3 + .../BackgroundJobs/Search/DefinitionSync.hs | 1 + .../Search/DefinitionSync/Types.hs | 2 +- src/Share/IDs.hs | 7 +- src/Share/Postgres/Orphans.hs | 12 ++ .../Search/DefinitionSearch/Queries.hs | 57 +++++--- src/Share/Web/Share/API.hs | 2 +- src/Share/Web/Share/Branches/Impl.hs | 3 +- src/Share/Web/Share/CodeBrowsing/API.hs | 4 +- src/Share/Web/Share/Impl.hs | 34 +++-- src/Share/Web/Share/Releases/Impl.hs | 3 +- src/Share/Web/Share/Types.hs | 7 +- src/Unison/Server/Share/DefinitionSummary.hs | 114 +-------------- .../Server/Share/DefinitionSummary/Types.hs | 132 ++++++++++++++++++ 15 files changed, 236 insertions(+), 146 deletions(-) create mode 100644 src/Unison/Server/Share/DefinitionSummary/Types.hs diff --git a/share-api.cabal b/share-api.cabal index 34e2b58..0c04841 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -152,6 +152,7 @@ library Unison.Server.NameSearch.Postgres Unison.Server.Share.Definitions Unison.Server.Share.DefinitionSummary + Unison.Server.Share.DefinitionSummary.Types Unison.Server.Share.Docs Unison.Server.Share.FuzzyFind Unison.Server.Share.NamespaceDetails diff --git a/share-utils/src/Share/Utils/IDs.hs b/share-utils/src/Share/Utils/IDs.hs index 42d029d..39581fb 100644 --- a/share-utils/src/Share/Utils/IDs.hs +++ b/share-utils/src/Share/Utils/IDs.hs @@ -122,6 +122,9 @@ instance (IsID a, Typeable a) => Hasql.DecodeValue (UsingID a) where Hasql.decodeValue & Decoder.refine \txt -> fromText txt <&> UsingID +instance (IsID a) => From (UsingID a) Text where + from = toText + -- | CI doesnt' expose its internal constructor so we can't derive via without adding our own -- instances. newtype CaseInsensitiveID = CaseInsensitiveID (CI Text) diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index c61e86b..2534ce8 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -52,6 +52,7 @@ import Unison.PrettyPrintEnvDecl.Postgres qualified as PPEPostgres import Unison.Reference (TypeReference) import Unison.Reference qualified as Reference import Unison.Server.Share.DefinitionSummary qualified as Summary +import Unison.Server.Share.DefinitionSummary.Types qualified as Summary import Unison.Server.Types qualified as Server.Types import Unison.ShortHash (ShortHash) import Unison.Type qualified as Type diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index 0857f07..dd87d41 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -16,7 +16,7 @@ import Data.Text qualified as Text import Share.Prelude import Unison.DataDeclaration qualified as DD import Unison.Name (Name) -import Unison.Server.Share.DefinitionSummary (TermSummary (..), TypeSummary (..)) +import Unison.Server.Share.DefinitionSummary.Types (TermSummary (..), TypeSummary (..)) import Unison.Server.Types (TermTag, TypeTag) import Unison.ShortHash (ShortHash) diff --git a/src/Share/IDs.hs b/src/Share/IDs.hs index 68ff0b0..fd90b93 100644 --- a/src/Share/IDs.hs +++ b/src/Share/IDs.hs @@ -5,6 +5,7 @@ {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE KindSignatures #-} +{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE UndecidableInstances #-} module Share.IDs @@ -357,8 +358,10 @@ branchNameParser = do Megaparsec.observing eof >>= \case Left _ -> pure () Right () -> customFailure BranchNameParseFailure'Empty - (BranchNameTag'ReleaseDraft,) . releaseDraftFixup <$> releaseDraftParser - <|> (BranchNameTag'Plain,) <$> plainBranchParser + (BranchNameTag'ReleaseDraft,) . releaseDraftFixup + <$> releaseDraftParser + <|> (BranchNameTag'Plain,) + <$> plainBranchParser where releaseDraftParser :: Parsec BranchNameParseFailure Text ReleaseVersion releaseDraftParser = do diff --git a/src/Share/Postgres/Orphans.hs b/src/Share/Postgres/Orphans.hs index fa7fa81..461d415 100644 --- a/src/Share/Postgres/Orphans.hs +++ b/src/Share/Postgres/Orphans.hs @@ -32,7 +32,9 @@ import Unison.Hash (Hash) import Unison.Hash qualified as Hash import Unison.Hash32 (Hash32) import Unison.Hash32 qualified as Hash32 +import Unison.Name (Name) import Unison.NameSegment.Internal (NameSegment (..)) +import Unison.Syntax.Name qualified as Name -- Orphans for 'Hash' instance Hasql.EncodeValue Hash where @@ -100,6 +102,16 @@ deriving via Text instance Hasql.DecodeValue NameSegment deriving via Text instance Hasql.EncodeValue NameSegment +instance Hasql.DecodeValue Name where + decodeValue = + Hasql.decodeValue @Text + & Decoders.refine Name.parseTextEither + +instance Hasql.EncodeValue Name where + encodeValue = + Hasql.encodeValue @Text + & contramap Name.toText + instance (Hasql.DecodeValue t, Hasql.DecodeValue h, Show t, Show h) => Hasql.DecodeRow (Reference' t h) where decodeRow = do t <- decodeField @(Maybe t) diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index e3908ab..7e9b952 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -5,6 +5,8 @@ module Share.Postgres.Search.DefinitionSearch.Queries claimUnsyncedRelease, insertDefinitionDocuments, cleanIndexForRelease, + defNameSearch, + DefnNameSearchFilter (..), ) where @@ -13,9 +15,10 @@ import Data.Set qualified as Set import Data.Text qualified as Text import Hasql.Interpolate qualified as Hasql import Share.BackgroundJobs.Search.DefinitionSync.Types -import Share.IDs (ProjectId, ReleaseId) +import Share.IDs (ProjectId, ReleaseId, UserId) import Share.Postgres import Share.Prelude +import Share.Utils.API (Limit, Query (Query)) import Unison.DataDeclaration qualified as DD import Unison.Name (Name) import Unison.NameSegment (NameSegment) @@ -157,21 +160,43 @@ searchTokenToText = \case Just Nothing -> ["r"] Nothing -> [] -defNameSearch :: Maybe UserId -> Maybe (Either ProjectId ReleaseId) -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name)] -defNameSearch mayCaller mayFilter (Query query) = do +data DefnNameSearchFilter + = ProjectFilter ProjectId + | ReleaseFilter ReleaseId + | UserFilter UserId + +defNameSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name)] +defNameSearch mayCaller mayFilter (Query query) limit = do let filters = case mayFilter of - Left projId -> [sql| AND project_id = #{projId} |] - Right relId -> [sql| AND release_id = #{relId} |] - query1Col + Just (ProjectFilter projId) -> [sql| AND project_id = #{projId} |] + Just (ReleaseFilter relId) -> [sql| AND release_id = #{relId} |] + Just (UserFilter userId) -> [sql| AND p.owner_id = #{userId} |] + Nothing -> mempty + queryListRows @(ProjectId, ReleaseId, Name) [sql| - SELECT project_id, release_id, name FROM global_definition_search_docs doc - JOIN projects p ON p.id = doc.project_id - WHERE - -- Search name by a trigram 'word similarity' - -- which will match if the query is similar to any 'word' (e.g. name segment) - -- in the name. - #{query} <% name - AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects WHERE user_id = #{mayCaller} AND project_id = p.id))) - ^{filters} - ORDER BY (similarity(#{query}, name)) DESC + WITH matches_deduped_by_project(project_id, release_id, name) AS ( + SELECT DISTINCT ON (project_id, name) project_id, release_id, name FROM global_definition_search_docs doc + JOIN projects p ON p.id = doc.project_id + JOIN releases r ON r.id = doc.release_id + WHERE + -- Search name by a trigram 'word similarity' + -- which will match if the query is similar to any 'word' (e.g. name segment) + -- in the name. + #{query} <% name + AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects WHERE user_id = #{mayCaller} AND project_id = p.id))) + ^{filters} + ORDER BY project_id, name, release.major_version, release.minor_version, release.patch_version + ), + -- Find the best matches + best_results(project_id, release_id, name) AS ( + SELECT project_id, release_id, name + FROM matches_deduped_by_project + ORDER BY similarity(#{query}, name) DESC + LIMIT #{limit} + ) + -- THEN sort docs to the bottom. + SELECT project_id, release_id, name + FROM best_results + -- docs and tests to the bottom, but otherwise sort by quality of the match. + ORDER BY (tag = 'doc', tag = 'test', similarity(#{query}, name)) DESC |] diff --git a/src/Share/Web/Share/API.hs b/src/Share/Web/Share/API.hs index 429dc89..bb770a9 100644 --- a/src/Share/Web/Share/API.hs +++ b/src/Share/Web/Share/API.hs @@ -10,7 +10,6 @@ import Share.Prelude (NonEmpty) import Share.Utils.API import Share.Utils.Caching import Share.Utils.Servant -import Share.Utils.Unison (ProjectShortHandParam) import Share.Web.Share.Branches.API (UserBranchesAPI) import Share.Web.Share.CodeBrowsing.API (CodeBrowseAPI) import Share.Web.Share.Contributions.API (ContributionsByUserAPI) @@ -59,6 +58,7 @@ type SearchDefinitionNamesEndpoint = :> QueryParam "limit" Limit :> QueryParam "user-filter" UserHandle :> QueryParam "project-filter" ProjectShortHand + :> QueryParam "release-filter" ReleaseVersion :> Get '[JSON] [DefinitionNameSearchResult] -- | Submit a definition search diff --git a/src/Share/Web/Share/Branches/Impl.hs b/src/Share/Web/Share/Branches/Impl.hs index 077111e..7fad76c 100644 --- a/src/Share/Web/Share/Branches/Impl.hs +++ b/src/Share/Web/Share/Branches/Impl.hs @@ -48,7 +48,8 @@ import Unison.NameSegment.Internal (NameSegment (..)) import Unison.Reference (Reference) import Unison.Referent (Referent) import Unison.Referent qualified as Referent -import Unison.Server.Share.DefinitionSummary (TermSummary, TypeSummary, serveTermSummary, serveTypeSummary) +import Unison.Server.Share.DefinitionSummary (serveTermSummary, serveTypeSummary) +import Unison.Server.Share.DefinitionSummary.Types (TermSummary, TypeSummary) import Unison.Server.Share.Definitions qualified as ShareBackend import Unison.Server.Share.FuzzyFind qualified as Fuzzy import Unison.Server.Share.NamespaceDetails qualified as ND diff --git a/src/Share/Web/Share/CodeBrowsing/API.hs b/src/Share/Web/Share/CodeBrowsing/API.hs index cd9d064..097bd94 100644 --- a/src/Share/Web/Share/CodeBrowsing/API.hs +++ b/src/Share/Web/Share/CodeBrowsing/API.hs @@ -4,16 +4,16 @@ module Share.Web.Share.CodeBrowsing.API where import Data.Text (Text) +import Servant import Share.Utils.Caching import Share.Utils.Servant (OptionalCapture, RequiredQueryParam) -import Servant import U.Codebase.HashTags (CausalHash) import Unison.Codebase.Path qualified as Path import Unison.HashQualified qualified as HQ import Unison.Name (Name) import Unison.Reference (Reference) import Unison.Referent (Referent) -import Unison.Server.Share.DefinitionSummary (TermSummary, TypeSummary) +import Unison.Server.Share.DefinitionSummary.Types (TermSummary, TypeSummary) import Unison.Server.Share.FuzzyFind qualified as Fuzzy import Unison.Server.Share.NamespaceListing (NamespaceListing) import Unison.Server.Types (DefinitionDisplayResults, NamespaceDetails) diff --git a/src/Share/Web/Share/Impl.hs b/src/Share/Web/Share/Impl.hs index ce69c2f..5d5b9cf 100644 --- a/src/Share/Web/Share/Impl.hs +++ b/src/Share/Web/Share/Impl.hs @@ -18,9 +18,11 @@ import Share.Postgres qualified as PG import Share.Postgres.IDs (CausalHash) import Share.Postgres.Ops qualified as PGO import Share.Postgres.Queries qualified as Q +import Share.Postgres.Search.DefinitionSearch.Queries qualified as DDQ import Share.Postgres.Users.Queries qualified as UsersQ import Share.Prelude import Share.Project (Project (..)) +import Share.Release (Release (..)) import Share.User (User (..)) import Share.UserProfile (UserProfile (..)) import Share.Utils.API @@ -34,7 +36,6 @@ import Share.Web.Share.API qualified as Share import Share.Web.Share.Branches.Impl qualified as Branches import Share.Web.Share.CodeBrowsing.API (CodeBrowseAPI) import Share.Web.Share.Contributions.Impl qualified as Contributions -import Share.Web.Share.Contributions.Types (ShareContribution (projectShortHand)) import Share.Web.Share.Projects.Impl qualified as Projects import Share.Web.Share.Types import Unison.Codebase.Path qualified as Path @@ -43,7 +44,8 @@ import Unison.Name (Name) import Unison.Reference (Reference) import Unison.Referent (Referent) import Unison.Referent qualified as Referent -import Unison.Server.Share.DefinitionSummary (TermSummary, TypeSummary, serveTermSummary, serveTypeSummary) +import Unison.Server.Share.DefinitionSummary (serveTermSummary, serveTypeSummary) +import Unison.Server.Share.DefinitionSummary.Types (TermSummary, TypeSummary) import Unison.Server.Share.Definitions qualified as ShareBackend import Unison.Server.Share.FuzzyFind qualified as Fuzzy import Unison.Server.Share.NamespaceDetails qualified as ND @@ -357,21 +359,31 @@ searchDefinitionNamesEndpoint :: Query -> Maybe Limit -> Maybe UserHandle -> - Maybe ProjectShortHandParam -> + Maybe IDs.ProjectShortHand -> + Maybe IDs.ReleaseVersion -> WebApp [DefinitionNameSearchResult] -searchDefinitionNamesEndpoint callerUserId query mayLimit userFilter projectFilter = do - filter <- runMaybeT $ resolveProjectFilter <|> resolveUserFilter - DDQ.defNameSearch callerUserId filter query limit +searchDefinitionNamesEndpoint callerUserId query mayLimit userFilter projectFilter releaseFilter = do + filter <- runMaybeT $ resolveProjectAndReleaseFilter <|> resolveUserFilter + matches <- PG.runTransaction $ DDQ.defNameSearch callerUserId filter query limit + -- TODO: Fix this: + let response = matches <&> \(_projId, _releaseId, name) -> DefinitionNameSearchResult (Name.toText name) "name" + pure response where limit = fromMaybe (Limit 20) mayLimit - resolveProjectFilter = do + resolveProjectAndReleaseFilter :: MaybeT WebApp DDQ.DefnNameSearchFilter + resolveProjectAndReleaseFilter = do projectShortHand <- hoistMaybe projectFilter - Project {projectId} <- lift $ PG.runTransaction $ Q.projectByShortHand projectShortHand - pure $ Left projectId + Project {projectId} <- lift . PG.runTransactionOrRespondError $ Q.projectByShortHand projectShortHand `whenNothingM` throwError (EntityMissing (ErrorID "no-project-found") $ "No project found for short hand: " <> IDs.toText projectShortHand) + case releaseFilter of + Nothing -> pure $ DDQ.ProjectFilter projectId + Just releaseVersion -> do + Release {releaseId} <- lift . PG.runTransactionOrRespondError $ Q.releaseByProjectIdAndReleaseShortHand projectId (IDs.ReleaseShortHand releaseVersion) `whenNothingM` throwError (EntityMissing (ErrorID "no-release-found") $ "No release found for project: " <> IDs.toText projectShortHand <> " and version: " <> IDs.toText releaseVersion) + pure $ DDQ.ReleaseFilter releaseId + resolveUserFilter :: MaybeT WebApp DDQ.DefnNameSearchFilter resolveUserFilter = do userHandle <- hoistMaybe userFilter - User {user_id} <- lift $ PG.runTransaction $ Q.userByHandle userHandle - pure $ Right user_id + User {user_id} <- lift $ PG.runTransactionOrRespondError $ Q.userByHandle userHandle `whenNothingM` throwError (EntityMissing (ErrorID "no-user-for-handle") $ "User not found for handle: " <> IDs.toText userHandle) + pure $ DDQ.UserFilter user_id accountInfoEndpoint :: Session -> WebApp UserAccountInfo accountInfoEndpoint Session {sessionUserId} = do diff --git a/src/Share/Web/Share/Releases/Impl.hs b/src/Share/Web/Share/Releases/Impl.hs index 40398d6..680d7ba 100644 --- a/src/Share/Web/Share/Releases/Impl.hs +++ b/src/Share/Web/Share/Releases/Impl.hs @@ -50,7 +50,8 @@ import Unison.NameSegment.Internal (NameSegment (..)) import Unison.Reference (Reference) import Unison.Referent (Referent) import Unison.Referent qualified as Referent -import Unison.Server.Share.DefinitionSummary (TermSummary, TypeSummary, serveTermSummary, serveTypeSummary) +import Unison.Server.Share.DefinitionSummary (serveTermSummary, serveTypeSummary) +import Unison.Server.Share.DefinitionSummary.Types (TermSummary, TypeSummary) import Unison.Server.Share.Definitions qualified as ShareBackend import Unison.Server.Share.FuzzyFind qualified as Fuzzy import Unison.Server.Share.NamespaceDetails qualified as ND diff --git a/src/Share/Web/Share/Types.hs b/src/Share/Web/Share/Types.hs index 88e4490..e673b7e 100644 --- a/src/Share/Web/Share/Types.hs +++ b/src/Share/Web/Share/Types.hs @@ -13,6 +13,7 @@ import Share.Prelude import Share.Project (ProjectVisibility) import Share.Utils.API (NullableUpdate, parseNullableUpdate) import Share.Utils.URI +import Unison.Name (Name) import Unison.Server.Doc (Doc) data UpdateUserRequest = UpdateUserRequest @@ -153,7 +154,11 @@ instance ToJSON UserDisplayInfo where "userId" Aeson..= userId ] -data DefinitionNameSearchResult = DefinitionNameSearchResult +data DefinitionNameSearchResult + = DefinitionNameSearchResult + { token :: Text, + kind :: Text + } data DefinitionSearchResult = DefinitionSearchResult diff --git a/src/Unison/Server/Share/DefinitionSummary.hs b/src/Unison/Server/Share/DefinitionSummary.hs index 112c097..c7f2360 100644 --- a/src/Unison/Server/Share/DefinitionSummary.hs +++ b/src/Unison/Server/Share/DefinitionSummary.hs @@ -9,20 +9,13 @@ {-# LANGUAGE TypeOperators #-} module Unison.Server.Share.DefinitionSummary - ( TermSummaryAPI, - serveTermSummary, + ( serveTermSummary, termSummaryForReferent, - TermSummary (..), - TypeSummaryAPI, serveTypeSummary, typeSummaryForReference, - TypeSummary (..), ) where -import Data.Aeson -import Servant (Capture, QueryParam, (:>)) -import Servant.Server (err500) import Share.Backend qualified as Backend import Share.Codebase qualified as Codebase import Share.Codebase.Types (CodebaseM) @@ -31,12 +24,9 @@ import Share.Postgres.Hashes.Queries qualified as HashQ import Share.Postgres.IDs (BranchHashId, CausalId) import Share.Postgres.NameLookups.Ops qualified as NLOps import Share.Postgres.NameLookups.Types qualified as NameLookups -import Share.Utils.Logging qualified as Logging -import Share.Web.Errors (ToServerError (..)) import U.Codebase.Referent qualified as V2Referent import Unison.Codebase.Editor.DisplayObject (DisplayObject (..)) import Unison.Codebase.Path qualified as Path -import Unison.Codebase.ShortCausalHash (ShortCausalHash) import Unison.Codebase.SqliteCodebase.Conversions qualified as CV import Unison.HashQualified qualified as HQ import Unison.Name (Name) @@ -47,71 +37,15 @@ import Unison.PrettyPrintEnvDecl.Postgres qualified as PPEPostgres import Unison.Reference (Reference) import Unison.Reference qualified as Reference import Unison.Referent (Referent) -import Unison.Server.Syntax (SyntaxText) +import Unison.Server.Backend (BackendError (..)) +import Unison.Server.Share.DefinitionSummary.Types (TermSummary (..), TypeSummary (..)) import Unison.Server.Types - ( APIGet, - TermTag (..), - TypeTag, - mayDefaultWidth, + ( mayDefaultWidth, ) -import Unison.ShortHash qualified as SH import Unison.Symbol (Symbol) import Unison.Type qualified as Type import Unison.Util.Pretty (Width) -data SummaryError = MissingSignatureForTerm Reference - deriving (Show) - -instance ToServerError SummaryError where - toServerError = \case - MissingSignatureForTerm _reference -> - ("missing-term-signature", err500) - -instance Logging.Loggable SummaryError where - toLog = \case - e@(MissingSignatureForTerm {}) -> - Logging.withSeverity Logging.Error . Logging.showLog $ e - -type TermSummaryAPI = - "definitions" - :> "terms" - :> "by-hash" - :> Capture "hash" Referent - :> "summary" - -- Optional name to include in summary. - -- It's propagated through to the response as-is. - -- If missing, the short hash will be used instead. - :> QueryParam "name" Name - :> QueryParam "rootBranch" ShortCausalHash - :> QueryParam "relativeTo" Path.Path - :> QueryParam "renderWidth" Width - :> APIGet TermSummary - -data TermSummary = TermSummary - { displayName :: HQ.HashQualified Name, - hash :: SH.ShortHash, - summary :: DisplayObject SyntaxText SyntaxText, - tag :: TermTag - } - deriving (Generic, Show) - -instance ToJSON TermSummary where - toJSON (TermSummary {..}) = - object - [ "displayName" .= displayName, - "hash" .= hash, - "summary" .= summary, - "tag" .= tag - ] - -instance FromJSON TermSummary where - parseJSON = withObject "TermSummary" $ \o -> do - displayName <- o .: "displayName" - hash <- o .: "hash" - summary <- o .: "summary" - tag <- o .: "tag" - pure $ TermSummary {..} - serveTermSummary :: Referent -> Maybe Name -> @@ -155,46 +89,6 @@ termSummaryForReferent referent typeSig mayName rootBranchHashId relativeTo mayW then BuiltinObject sig else UserObject sig -type TypeSummaryAPI = - "definitions" - :> "types" - :> "by-hash" - :> Capture "hash" Reference - :> "summary" - -- Optional name to include in summary. - -- It's propagated through to the response as-is. - -- If missing, the short hash will be used instead. - :> QueryParam "name" Name - :> QueryParam "rootBranch" ShortCausalHash - :> QueryParam "relativeTo" Path.Path - :> QueryParam "renderWidth" Width - :> APIGet TypeSummary - -data TypeSummary = TypeSummary - { displayName :: HQ.HashQualified Name, - hash :: SH.ShortHash, - summary :: DisplayObject SyntaxText SyntaxText, - tag :: TypeTag - } - deriving (Generic, Show) - -instance ToJSON TypeSummary where - toJSON (TypeSummary {..}) = - object - [ "displayName" .= displayName, - "hash" .= hash, - "summary" .= summary, - "tag" .= tag - ] - -instance FromJSON TypeSummary where - parseJSON = withObject "TypeSummary" $ \o -> do - displayName <- o .: "displayName" - hash <- o .: "hash" - summary <- o .: "summary" - tag <- o .: "tag" - pure $ TypeSummary {..} - serveTypeSummary :: Reference -> Maybe Name -> diff --git a/src/Unison/Server/Share/DefinitionSummary/Types.hs b/src/Unison/Server/Share/DefinitionSummary/Types.hs new file mode 100644 index 0000000..dcb4c37 --- /dev/null +++ b/src/Unison/Server/Share/DefinitionSummary/Types.hs @@ -0,0 +1,132 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeOperators #-} + +module Unison.Server.Share.DefinitionSummary.Types + ( TermSummaryAPI, + TermSummary (..), + TypeSummaryAPI, + TypeSummary (..), + ) +where + +import Data.Aeson +import Servant (Capture, QueryParam, (:>)) +import Servant.Server (err500) +import Share.Utils.Logging qualified as Logging +import Share.Web.Errors (ToServerError (..)) +import Unison.Codebase.Editor.DisplayObject (DisplayObject (..)) +import Unison.Codebase.Path qualified as Path +import Unison.Codebase.ShortCausalHash (ShortCausalHash) +import Unison.HashQualified qualified as HQ +import Unison.Name (Name) +import Unison.Prelude +import Unison.Reference (Reference) +import Unison.Referent (Referent) +import Unison.Server.Syntax (SyntaxText) +import Unison.Server.Types + ( APIGet, + TermTag (..), + TypeTag, + ) +import Unison.ShortHash qualified as SH +import Unison.Util.Pretty (Width) + +data SummaryError = MissingSignatureForTerm Reference + deriving (Show) + +instance ToServerError SummaryError where + toServerError = \case + MissingSignatureForTerm _reference -> + ("missing-term-signature", err500) + +instance Logging.Loggable SummaryError where + toLog = \case + e@(MissingSignatureForTerm {}) -> + Logging.withSeverity Logging.Error . Logging.showLog $ e + +type TermSummaryAPI = + "definitions" + :> "terms" + :> "by-hash" + :> Capture "hash" Referent + :> "summary" + -- Optional name to include in summary. + -- It's propagated through to the response as-is. + -- If missing, the short hash will be used instead. + :> QueryParam "name" Name + :> QueryParam "rootBranch" ShortCausalHash + :> QueryParam "relativeTo" Path.Path + :> QueryParam "renderWidth" Width + :> APIGet TermSummary + +data TermSummary = TermSummary + { displayName :: HQ.HashQualified Name, + hash :: SH.ShortHash, + summary :: DisplayObject SyntaxText SyntaxText, + tag :: TermTag + } + deriving (Generic, Show) + +instance ToJSON TermSummary where + toJSON (TermSummary {..}) = + object + [ "displayName" .= displayName, + "hash" .= hash, + "summary" .= summary, + "tag" .= tag + ] + +instance FromJSON TermSummary where + parseJSON = withObject "TermSummary" $ \o -> do + displayName <- o .: "displayName" + hash <- o .: "hash" + summary <- o .: "summary" + tag <- o .: "tag" + pure $ TermSummary {..} + +type TypeSummaryAPI = + "definitions" + :> "types" + :> "by-hash" + :> Capture "hash" Reference + :> "summary" + -- Optional name to include in summary. + -- It's propagated through to the response as-is. + -- If missing, the short hash will be used instead. + :> QueryParam "name" Name + :> QueryParam "rootBranch" ShortCausalHash + :> QueryParam "relativeTo" Path.Path + :> QueryParam "renderWidth" Width + :> APIGet TypeSummary + +data TypeSummary = TypeSummary + { displayName :: HQ.HashQualified Name, + hash :: SH.ShortHash, + summary :: DisplayObject SyntaxText SyntaxText, + tag :: TypeTag + } + deriving (Generic, Show) + +instance ToJSON TypeSummary where + toJSON (TypeSummary {..}) = + object + [ "displayName" .= displayName, + "hash" .= hash, + "summary" .= summary, + "tag" .= tag + ] + +instance FromJSON TypeSummary where + parseJSON = withObject "TypeSummary" $ \o -> do + displayName <- o .: "displayName" + hash <- o .: "hash" + summary <- o .: "summary" + tag <- o .: "tag" + pure $ TypeSummary {..} From 67edc0a32379703ca0d2af70e569c39b6221a8d7 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 22 Jul 2024 15:57:57 -0700 Subject: [PATCH 30/48] Add column for term and type tags --- package.yaml | 1 + share-api.cabal | 2 ++ sql/2024-07-18-00-00_defn_search.sql | 12 ++++--- .../BackgroundJobs/Search/DefinitionSync.hs | 4 ++- .../Search/DefinitionSync/Types.hs | 27 +++++++++++++++- .../Search/DefinitionSearch/Queries.hs | 31 ++++++++++--------- 6 files changed, 56 insertions(+), 21 deletions(-) diff --git a/package.yaml b/package.yaml index a120944..793f720 100644 --- a/package.yaml +++ b/package.yaml @@ -161,6 +161,7 @@ default-extensions: - BlockArguments - QuasiQuotes - ImportQualifiedPost + - OverloadedRecordDot library: source-dirs: src diff --git a/share-api.cabal b/share-api.cabal index 0c04841..c824a69 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -189,6 +189,7 @@ library BlockArguments QuasiQuotes ImportQualifiedPost + OverloadedRecordDot ghc-options: -Wall -Werror -Wno-name-shadowing -Wno-type-defaults -Wno-missing-pattern-synonym-signatures -fprint-expanded-synonyms -fwrite-ide-info -O2 -funbox-strict-fields build-depends: Diff @@ -330,6 +331,7 @@ executable share-api BlockArguments QuasiQuotes ImportQualifiedPost + OverloadedRecordDot ghc-options: -Wall -Werror -Wno-name-shadowing -Wno-type-defaults -Wno-missing-pattern-synonym-signatures -fprint-expanded-synonyms -fwrite-ide-info -O2 -funbox-strict-fields -threaded -rtsopts "-with-rtsopts=-N -A32m -qn2 -T" build-depends: Diff diff --git a/sql/2024-07-18-00-00_defn_search.sql b/sql/2024-07-18-00-00_defn_search.sql index b44d1ac..91990d1 100644 --- a/sql/2024-07-18-00-00_defn_search.sql +++ b/sql/2024-07-18-00-00_defn_search.sql @@ -3,7 +3,7 @@ CREATE EXTENSION IF NOT EXISTS btree_gin; -- Allows us to create trigram indexes for fuzzy searching. -CREATE EXTENSION IF NOT EXISTS pg_trgm; +CREATE EXTENSION IF NOT EXISTS pg_trgm; -- New table for coordinating background job for syncing global definitions for search. @@ -13,27 +13,31 @@ CREATE TABLE global_definition_search_release_queue ( created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP ); +-- Every defn fits into one of these categories. +CREATE TYPE definition_tag AS ENUM ('doc', 'test', 'plain', 'data', 'ability', 'data-constructor', 'ability-constructor'); + CREATE TABLE global_definition_search_docs ( project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE, release_id UUID NOT NULL REFERENCES project_releases(id) ON DELETE CASCADE, -- Fully qualified name name TEXT NOT NULL, search_tokens TSVECTOR NOT NULL, - -- Number of arguments. 0 for values. + -- Number of arguments. 0 for values. arity INT NOT NULL, + tag definition_tag NOT NULL, -- Contains the rendered type signature, type, hash, etc. -- so we don't need to look up types for hundreds of search results on the fly. metadata JSONB NOT NULL, - -- Ostensibly there's the possibility of name conflicts, + -- Ostensibly there's the possibility of name conflicts, -- but those are rare enough we don't care, we just insert with ON CONFLICT DO NOTHING. PRIMARY KEY (project_id, release_id, name) ); -- Index for searching global definitions by 'search token', with an optional project/release filter. -- P.s. there's a search token type for name, so we don't need to index that separately. -CREATE INDEX global_definition_search_tokens ON global_definition_search_docs USING GIN(search_tokens, project_id, release_id); +CREATE INDEX global_definition_search_tokens ON global_definition_search_docs USING GIN(search_tokens, tag, project_id, release_id); -- Index for fuzzy-searching on the fully qualified name. CREATE INDEX global_definition_search_name_trigram ON global_definition_search_docs USING GIST (name gist_trgm_ops); diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 2534ce8..a6005cd 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -15,7 +15,7 @@ import Data.Set qualified as Set import Data.Set.Lens (setOf) import Ki.Unlifted qualified as Ki import Share.BackgroundJobs.Monad (Background) -import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence, TermOrTypeSummary (..), VarId (VarId)) +import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence, TermOrTypeSummary (..), TermOrTypeTag (..), VarId (..)) import Share.BackgroundJobs.Workers (newWorker) import Share.Codebase (CodebaseM) import Share.Codebase qualified as Codebase @@ -142,6 +142,7 @@ syncTerms namesPerspective bhId projectId releaseId termsCursor = do hash = sh, tokens = refTokens, arity = arity, + tag = ToTTermTag (termSummary.tag), metadata = ToTTermSummary termSummary } pure dd @@ -292,6 +293,7 @@ syncTypes namesPerspective projectId releaseId typesCursor = do hash = sh, tokens = declTokens <> basicTokens, arity = declArity, + tag = ToTTypeTag (typeSummary.tag), metadata = ToTTypeSummary typeSummary } pure dd diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index dd87d41..1f784db 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -3,6 +3,7 @@ module Share.BackgroundJobs.Search.DefinitionSync.Types ( TermOrTypeSummary (..), + TermOrTypeTag (..), DefinitionDocument (..), DefnSearchToken (..), Occurrence (..), @@ -13,11 +14,13 @@ where import Data.Aeson import Data.Monoid (Sum (..)) import Data.Text qualified as Text +import Hasql.Encoders qualified as Encoders +import Hasql.Interpolate qualified as Hasql import Share.Prelude import Unison.DataDeclaration qualified as DD import Unison.Name (Name) import Unison.Server.Share.DefinitionSummary.Types (TermSummary (..), TypeSummary (..)) -import Unison.Server.Types (TermTag, TypeTag) +import Unison.Server.Types (TermTag (..), TypeTag (..)) import Unison.ShortHash (ShortHash) data TermOrTypeSummary = ToTTermSummary TermSummary | ToTTypeSummary TypeSummary @@ -49,6 +52,27 @@ instance FromJSON TermOrTypeSummary where pure $ ToTTypeSummary $ TypeSummary {..} _ -> fail $ "Invalid kind: " <> Text.unpack kind +data TermOrTypeTag = ToTTermTag TermTag | ToTTypeTag TypeTag + deriving stock (Show, Eq, Ord) + +instance Hasql.EncodeValue TermOrTypeTag where + encodeValue = + Encoders.enum + ( \case + ToTTermTag tt -> encodeTermTag tt + ToTTypeTag tt -> encodeTypeTag tt + ) + where + encodeTermTag = \case + Doc -> "doc" + Test -> "test" + Plain -> "plain" + Constructor Data -> "data-constructor" + Constructor Ability -> "ability-constructor" + encodeTypeTag = \case + Data -> "data" + Ability -> "ability" + -- | The number of occurences of this token in the search query. -- E.g. for the query: 'Text -> Text -> Text', the Text type mention token would -- occur 3 times, and the set would be: @@ -124,6 +148,7 @@ data DefinitionDocument proj release name typeRef = DefinitionDocument -- in the future. tokens :: Set (DefnSearchToken typeRef), arity :: Int, + tag :: TermOrTypeTag, metadata :: TermOrTypeSummary } deriving (Show, Generic) diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index 7e9b952..2696ff1 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -58,24 +58,25 @@ claimUnsyncedRelease = do insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] -> Transaction e () insertDefinitionDocuments docs = pipelined $ do let docsTable = docRow <$> docs - for_ docsTable \(projectId, releaseId, fqn, tokens, arity, metadata) -> do + for_ docsTable \(projectId, releaseId, fqn, tokens, arity, tag, metadata) -> do -- Ideally we'd do a bulk insert, but Hasql doesn't provide any method for passing arrays of -- arrays as parameters, so instead we insert each record individually so we can use our -- only level of array-ness for the tokens. execute_ $ [sql| - INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, arity, metadata) - VALUES (#{projectId}, #{releaseId}, #{fqn}, array_to_tsvector(#{tokens}), #{arity}, #{metadata}::jsonb) + INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, arity, tag, metadata) + VALUES (#{projectId}, #{releaseId}, #{fqn}, array_to_tsvector(#{tokens}), #{arity}, #{tag}::definition_tag, #{metadata}::jsonb) ON CONFLICT DO NOTHING |] where - docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Int32, Hasql.Jsonb) - docRow DefinitionDocument {project, release, fqn, tokens, arity, metadata} = + docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Int32, TermOrTypeTag, Hasql.Jsonb) + docRow DefinitionDocument {project, release, fqn, tokens, arity, tag, metadata} = ( project, release, Name.toText fqn, foldMap searchTokenToText $ Set.toList tokens, fromIntegral arity, + tag, Hasql.Jsonb $ Aeson.toJSON metadata ) @@ -175,28 +176,28 @@ defNameSearch mayCaller mayFilter (Query query) limit = do queryListRows @(ProjectId, ReleaseId, Name) [sql| WITH matches_deduped_by_project(project_id, release_id, name) AS ( - SELECT DISTINCT ON (project_id, name) project_id, release_id, name FROM global_definition_search_docs doc + SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name FROM global_definition_search_docs doc JOIN projects p ON p.id = doc.project_id JOIN releases r ON r.id = doc.release_id WHERE -- Search name by a trigram 'word similarity' -- which will match if the query is similar to any 'word' (e.g. name segment) -- in the name. - #{query} <% name - AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects WHERE user_id = #{mayCaller} AND project_id = p.id))) + #{query} <% doc.name + AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects pp WHERE pp.user_id = #{mayCaller} AND pp.project_id = p.id))) ^{filters} - ORDER BY project_id, name, release.major_version, release.minor_version, release.patch_version + ORDER BY doc.project_id, doc.name, release.major_version, release.minor_version, release.patch_version ), -- Find the best matches best_results(project_id, release_id, name) AS ( - SELECT project_id, release_id, name - FROM matches_deduped_by_project - ORDER BY similarity(#{query}, name) DESC + SELECT m.project_id, m.release_id, m.name + FROM matches_deduped_by_project m + ORDER BY similarity(#{query}, m.name) DESC LIMIT #{limit} ) -- THEN sort docs to the bottom. - SELECT project_id, release_id, name - FROM best_results + SELECT br.project_id, br.release_id, br.name + FROM best_results br -- docs and tests to the bottom, but otherwise sort by quality of the match. - ORDER BY (tag = 'doc', tag = 'test', similarity(#{query}, name)) DESC + ORDER BY (br.tag = 'doc'::definition_tag, br.tag = 'test'::definition_tag, similarity(#{query}, br.name)) DESC |] From 610d4ea83f1cffb4c179a4f66b175daea4c5efba Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 22 Jul 2024 16:03:45 -0700 Subject: [PATCH 31/48] Wire up search-names endpoint --- .../Postgres/Search/DefinitionSearch/Queries.hs | 14 +++++++------- src/Share/Web/API.hs | 3 ++- src/Share/Web/Impl.hs | 1 + src/Share/Web/Share/API.hs | 4 ++-- src/Share/Web/Share/Types.hs | 7 +++++++ 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index 2696ff1..b8f4df8 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -175,10 +175,10 @@ defNameSearch mayCaller mayFilter (Query query) limit = do Nothing -> mempty queryListRows @(ProjectId, ReleaseId, Name) [sql| - WITH matches_deduped_by_project(project_id, release_id, name) AS ( - SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name FROM global_definition_search_docs doc + WITH matches_deduped_by_project(project_id, release_id, name, tag) AS ( + SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.tag FROM global_definition_search_docs doc JOIN projects p ON p.id = doc.project_id - JOIN releases r ON r.id = doc.release_id + JOIN project_releases r ON r.id = doc.release_id WHERE -- Search name by a trigram 'word similarity' -- which will match if the query is similar to any 'word' (e.g. name segment) @@ -186,11 +186,11 @@ defNameSearch mayCaller mayFilter (Query query) limit = do #{query} <% doc.name AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects pp WHERE pp.user_id = #{mayCaller} AND pp.project_id = p.id))) ^{filters} - ORDER BY doc.project_id, doc.name, release.major_version, release.minor_version, release.patch_version + ORDER BY doc.project_id, doc.name, r.major_version, r.minor_version, r.patch_version ), -- Find the best matches - best_results(project_id, release_id, name) AS ( - SELECT m.project_id, m.release_id, m.name + best_results(project_id, release_id, name, tag) AS ( + SELECT m.project_id, m.release_id, m.name, m.tag FROM matches_deduped_by_project m ORDER BY similarity(#{query}, m.name) DESC LIMIT #{limit} @@ -199,5 +199,5 @@ defNameSearch mayCaller mayFilter (Query query) limit = do SELECT br.project_id, br.release_id, br.name FROM best_results br -- docs and tests to the bottom, but otherwise sort by quality of the match. - ORDER BY (br.tag = 'doc'::definition_tag, br.tag = 'test'::definition_tag, similarity(#{query}, br.name)) DESC + ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC |] diff --git a/src/Share/Web/API.hs b/src/Share/Web/API.hs index a53c7c5..5d841f5 100644 --- a/src/Share/Web/API.hs +++ b/src/Share/Web/API.hs @@ -3,6 +3,7 @@ module Share.Web.API where +import Servant import Share.OAuth.API qualified as OAuth import Share.OAuth.Session (MaybeAuthenticatedSession) import Share.Prelude @@ -12,7 +13,6 @@ import Share.Web.Share.API qualified as Share import Share.Web.Share.Projects.API qualified as Projects import Share.Web.Support.API qualified as Support import Share.Web.Types -import Servant import Unison.Share.API.Projects qualified as UCMProjects import Unison.Sync.API qualified as Unison.Sync @@ -22,6 +22,7 @@ type API = :<|> ("codebases" :> Share.UserPublicCodebaseAPI) :<|> ("users" :> Share.UserAPI) :<|> ("search" :> Share.SearchEndpoint) + :<|> ("search-names" :> Share.SearchDefinitionNamesEndpoint) :<|> ("account" :> Share.AccountAPI) :<|> ("catalog" :> Projects.CatalogAPI) -- This path is part of the standard: https://datatracker.ietf.org/doc/html/rfc5785 diff --git a/src/Share/Web/Impl.hs b/src/Share/Web/Impl.hs index a6cc4ab..2a8c98f 100644 --- a/src/Share/Web/Impl.hs +++ b/src/Share/Web/Impl.hs @@ -60,6 +60,7 @@ server = :<|> Share.userCodebaseServer :<|> Share.userServer :<|> Share.searchEndpoint + :<|> Share.searchDefinitionNamesEndpoint :<|> Share.accountServer :<|> Projects.catalogServer :<|> discoveryEndpoint diff --git a/src/Share/Web/Share/API.hs b/src/Share/Web/Share/API.hs index bb770a9..8f9a00d 100644 --- a/src/Share/Web/Share/API.hs +++ b/src/Share/Web/Share/API.hs @@ -5,7 +5,7 @@ module Share.Web.Share.API where import Servant import Share.IDs -import Share.OAuth.Session (AuthenticatedSession, AuthenticatedUserId, MaybeAuthenticatedSession) +import Share.OAuth.Session (AuthenticatedSession, AuthenticatedUserId, MaybeAuthenticatedSession, MaybeAuthenticatedUserId) import Share.Prelude (NonEmpty) import Share.Utils.API import Share.Utils.Caching @@ -53,7 +53,7 @@ type SearchEndpoint = -- | Search for names to use in a definition search. type SearchDefinitionNamesEndpoint = - MaybeAuthenticatedSession + MaybeAuthenticatedUserId :> RequiredQueryParam "query" Query :> QueryParam "limit" Limit :> QueryParam "user-filter" UserHandle diff --git a/src/Share/Web/Share/Types.hs b/src/Share/Web/Share/Types.hs index e673b7e..e16771f 100644 --- a/src/Share/Web/Share/Types.hs +++ b/src/Share/Web/Share/Types.hs @@ -160,6 +160,13 @@ data DefinitionNameSearchResult kind :: Text } +instance ToJSON DefinitionNameSearchResult where + toJSON DefinitionNameSearchResult {..} = + Aeson.object + [ "token" .= token, + "kind" .= kind + ] + data DefinitionSearchResult = DefinitionSearchResult { fqn :: Name, From 605655cadd289ebefbed0c2fa64045bd5dcf4be5 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 22 Jul 2024 16:38:20 -0700 Subject: [PATCH 32/48] Fix scoping of filters --- sql/2024-07-17-00-00_cursors.sql | 19 ------------------- .../Search/DefinitionSearch/Queries.hs | 4 ++-- 2 files changed, 2 insertions(+), 21 deletions(-) delete mode 100644 sql/2024-07-17-00-00_cursors.sql diff --git a/sql/2024-07-17-00-00_cursors.sql b/sql/2024-07-17-00-00_cursors.sql deleted file mode 100644 index 8880c27..0000000 --- a/sql/2024-07-17-00-00_cursors.sql +++ /dev/null @@ -1,19 +0,0 @@ - -CREATE OR REPLACE FUNCTION create_cursor(cursor_name text) -RETURNS refcursor AS $$ -DECLARE - segments text[]; - input_segments text[]; - current_segment text; -BEGIN - input_segments := string_to_array(trim(trailing '.' from fqn), '.'); - segments := ARRAY[]::text[]; - - FOREACH current_segment IN ARRAY input_segments - LOOP - segments := array_append(segments, array_to_string(input_segments[1:array_position(input_segments, current_segment)], '.') || '.'); - END LOOP; - - RETURN segments; -END; -$$ LANGUAGE plpgsql; diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index b8f4df8..4ad2cce 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -169,8 +169,8 @@ data DefnNameSearchFilter defNameSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name)] defNameSearch mayCaller mayFilter (Query query) limit = do let filters = case mayFilter of - Just (ProjectFilter projId) -> [sql| AND project_id = #{projId} |] - Just (ReleaseFilter relId) -> [sql| AND release_id = #{relId} |] + Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |] + Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |] Just (UserFilter userId) -> [sql| AND p.owner_id = #{userId} |] Nothing -> mempty queryListRows @(ProjectId, ReleaseId, Name) From 01cccc67463f53f70796118e4cb722b0b3e9f3d4 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 23 Jul 2024 16:54:16 -0700 Subject: [PATCH 33/48] Attempt at a robust lax type parser --- package.yaml | 1 + share-api.cabal | 3 + .../Search/DefinitionSync/Types.hs | 38 +++ .../Search/DefinitionSearch/Queries.hs | 10 +- src/Share/Prelude/Orphans.hs | 9 +- src/Share/Web/API.hs | 1 + src/Share/Web/Impl.hs | 1 + src/Share/Web/Share/API.hs | 5 +- src/Share/Web/Share/DefinitionSearch.hs | 253 ++++++++++++++++++ src/Share/Web/Share/Impl.hs | 52 ++-- src/Share/Web/Share/Types.hs | 26 +- 11 files changed, 370 insertions(+), 29 deletions(-) create mode 100644 src/Share/Web/Share/DefinitionSearch.hs diff --git a/package.yaml b/package.yaml index 793f720..7369e1b 100644 --- a/package.yaml +++ b/package.yaml @@ -75,6 +75,7 @@ dependencies: - network-uri - nonempty-containers - parallel +- parser-combinators - pem - hasql - hasql-pool diff --git a/share-api.cabal b/share-api.cabal index c824a69..ae7ea97 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -126,6 +126,7 @@ library Share.Web.Share.Contributions.Impl Share.Web.Share.Contributions.MergeDetection Share.Web.Share.Contributions.Types + Share.Web.Share.DefinitionSearch Share.Web.Share.Diffs.Impl Share.Web.Share.Diffs.Types Share.Web.Share.Impl @@ -240,6 +241,7 @@ library , network-uri , nonempty-containers , parallel + , parser-combinators , pem , prometheus-client , prometheus-metrics-ghc @@ -382,6 +384,7 @@ executable share-api , network-uri , nonempty-containers , parallel + , parser-combinators , pem , prometheus-client , prometheus-metrics-ghc diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index 1f784db..fab530c 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -14,8 +14,11 @@ where import Data.Aeson import Data.Monoid (Sum (..)) import Data.Text qualified as Text +import Hasql.Decoders qualified as Decoders import Hasql.Encoders qualified as Encoders import Hasql.Interpolate qualified as Hasql +import Servant (FromHttpApiData) +import Servant.API (FromHttpApiData (..), ToHttpApiData (..)) import Share.Prelude import Unison.DataDeclaration qualified as DD import Unison.Name (Name) @@ -55,6 +58,30 @@ instance FromJSON TermOrTypeSummary where data TermOrTypeTag = ToTTermTag TermTag | ToTTypeTag TypeTag deriving stock (Show, Eq, Ord) +instance FromHttpApiData TermOrTypeTag where + parseQueryParam = \case + "doc" -> Right $ ToTTermTag Doc + "test" -> Right $ ToTTermTag Test + "plain" -> Right $ ToTTermTag Plain + "data-constructor" -> Right $ ToTTermTag $ Constructor Data + "ability-constructor" -> Right $ ToTTermTag $ Constructor Ability + "data" -> Right $ ToTTypeTag Data + "ability" -> Right $ ToTTypeTag Ability + _ -> Left "Invalid TermOrTypeTag" + +instance ToHttpApiData TermOrTypeTag where + toQueryParam = \case + ToTTermTag Doc -> "doc" + ToTTermTag Test -> "test" + ToTTermTag Plain -> "plain" + ToTTermTag (Constructor Data) -> "data-constructor" + ToTTermTag (Constructor Ability) -> "ability-constructor" + ToTTypeTag Data -> "data" + ToTTypeTag Ability -> "ability" + +instance ToJSON TermOrTypeTag where + toJSON = String . toQueryParam + instance Hasql.EncodeValue TermOrTypeTag where encodeValue = Encoders.enum @@ -73,6 +100,17 @@ instance Hasql.EncodeValue TermOrTypeTag where Data -> "data" Ability -> "ability" +instance Hasql.DecodeValue TermOrTypeTag where + decodeValue = Decoders.enum $ \case + "doc" -> pure $ ToTTermTag Doc + "test" -> pure $ ToTTermTag Test + "plain" -> pure $ ToTTermTag Plain + "data-constructor" -> pure $ ToTTermTag $ Constructor Data + "ability-constructor" -> pure $ ToTTermTag $ Constructor Ability + "data" -> pure $ ToTTypeTag Data + "ability" -> pure $ ToTTypeTag Ability + _ -> fail "Invalid TermOrTypeTag" + -- | The number of occurences of this token in the search query. -- E.g. for the query: 'Text -> Text -> Text', the Text type mention token would -- occur 3 times, and the set would be: diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index 4ad2cce..05e2e0a 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -6,6 +6,7 @@ module Share.Postgres.Search.DefinitionSearch.Queries insertDefinitionDocuments, cleanIndexForRelease, defNameSearch, + definitionSearch, DefnNameSearchFilter (..), ) where @@ -166,14 +167,14 @@ data DefnNameSearchFilter | ReleaseFilter ReleaseId | UserFilter UserId -defNameSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name)] +defNameSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeTag)] defNameSearch mayCaller mayFilter (Query query) limit = do let filters = case mayFilter of Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |] Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |] Just (UserFilter userId) -> [sql| AND p.owner_id = #{userId} |] Nothing -> mempty - queryListRows @(ProjectId, ReleaseId, Name) + queryListRows @(ProjectId, ReleaseId, Name, TermOrTypeTag) [sql| WITH matches_deduped_by_project(project_id, release_id, name, tag) AS ( SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.tag FROM global_definition_search_docs doc @@ -196,8 +197,11 @@ defNameSearch mayCaller mayFilter (Query query) limit = do LIMIT #{limit} ) -- THEN sort docs to the bottom. - SELECT br.project_id, br.release_id, br.name + SELECT br.project_id, br.release_id, br.name, br.tag FROM best_results br -- docs and tests to the bottom, but otherwise sort by quality of the match. ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC |] + +definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either Name ShortHash)) -> Transaction e [DefinitionDocument proj release name typeRef] +definitionSearch searchTokens = _ diff --git a/src/Share/Prelude/Orphans.hs b/src/Share/Prelude/Orphans.hs index 6a4925e..ceeaf64 100644 --- a/src/Share/Prelude/Orphans.hs +++ b/src/Share/Prelude/Orphans.hs @@ -13,20 +13,21 @@ import Data.UUID (UUID) import Data.UUID qualified as UUID import GHC.TypeLits qualified as TypeError import Hasql.Interpolate qualified as Interp +import Text.Megaparsec qualified as MP import Unison.Server.Orphans () import Unison.ShortHash (ShortHash) import Unison.ShortHash qualified as SH import Witch -instance {-# OVERLAPPING #-} TypeError.TypeError ('TypeError.Text "A String will be encoded as char[], Did you mean to use Text instead?") => Interp.EncodeValue String where +instance {-# OVERLAPPING #-} (TypeError.TypeError ('TypeError.Text "A String will be encoded as char[], Did you mean to use Text instead?")) => Interp.EncodeValue String where encodeValue = error "unpossible" -instance {-# OVERLAPPING #-} TypeError.TypeError ('TypeError.Text "Strings are decoded as a char[], Did you mean to use Text instead?") => Interp.DecodeValue String where +instance {-# OVERLAPPING #-} (TypeError.TypeError ('TypeError.Text "Strings are decoded as a char[], Did you mean to use Text instead?")) => Interp.DecodeValue String where decodeValue = error "unpossible" -- Useful instance, but doesn't exist in either lib, likely because they just don't want to depend on one another. -instance Semialign f => Semialign (Cofree f) where - align :: Semialign f => Cofree f a -> Cofree f b -> Cofree f (These a b) +instance (Semialign f) => Semialign (Cofree f) where + align :: (Semialign f) => Cofree f a -> Cofree f b -> Cofree f (These a b) align (a :< l) (b :< r) = These a b :< alignWith go l r where diff --git a/src/Share/Web/API.hs b/src/Share/Web/API.hs index 5d841f5..d188e3e 100644 --- a/src/Share/Web/API.hs +++ b/src/Share/Web/API.hs @@ -23,6 +23,7 @@ type API = :<|> ("users" :> Share.UserAPI) :<|> ("search" :> Share.SearchEndpoint) :<|> ("search-names" :> Share.SearchDefinitionNamesEndpoint) + :<|> ("search-definitions" :> Share.SearchDefinitionsEndpoint) :<|> ("account" :> Share.AccountAPI) :<|> ("catalog" :> Projects.CatalogAPI) -- This path is part of the standard: https://datatracker.ietf.org/doc/html/rfc5785 diff --git a/src/Share/Web/Impl.hs b/src/Share/Web/Impl.hs index 2a8c98f..7228424 100644 --- a/src/Share/Web/Impl.hs +++ b/src/Share/Web/Impl.hs @@ -61,6 +61,7 @@ server = :<|> Share.userServer :<|> Share.searchEndpoint :<|> Share.searchDefinitionNamesEndpoint + :<|> Share.searchDefinitionsEndpoint :<|> Share.accountServer :<|> Projects.catalogServer :<|> discoveryEndpoint diff --git a/src/Share/Web/Share/API.hs b/src/Share/Web/Share/API.hs index 8f9a00d..145afe5 100644 --- a/src/Share/Web/Share/API.hs +++ b/src/Share/Web/Share/API.hs @@ -63,12 +63,13 @@ type SearchDefinitionNamesEndpoint = -- | Submit a definition search type SearchDefinitionsEndpoint = - MaybeAuthenticatedSession + MaybeAuthenticatedUserId :> RequiredQueryParam "query" Query :> QueryParam "limit" Limit :> QueryParam "user-filter" UserHandle :> QueryParam "project-filter" ProjectShortHand - :> Get '[JSON] [DefinitionSearchResult] + :> QueryParam "release-filter" ReleaseVersion + :> Get '[JSON] DefinitionSearchResults type AccountAPI = AuthenticatedSession diff --git a/src/Share/Web/Share/DefinitionSearch.hs b/src/Share/Web/Share/DefinitionSearch.hs new file mode 100644 index 0000000..c8e2aa7 --- /dev/null +++ b/src/Share/Web/Share/DefinitionSearch.hs @@ -0,0 +1,253 @@ +{-# LANGUAGE DataKinds #-} + +module Share.Web.DefinitionSearch (queryToTokens) where + +import Control.Lens +import Data.Char qualified as Char +import Data.List qualified as List +import Data.Map.Monoidal (MonoidalMap) +import Data.Map.Monoidal qualified as MonMap +import Data.Monoid (Sum (..)) +import Data.Set qualified as Set +import Data.Text qualified as Text +import Share.BackgroundJobs.Search.DefinitionSync.Types (DefnSearchToken (..), Occurrence, VarId (..)) +import Share.Prelude +import Text.Megaparsec qualified as MP +import Text.Megaparsec.Char qualified as MP +import Text.Megaparsec.Char.Lexer qualified as MP hiding (space) +import Unison.Name (Name) +import Unison.Name qualified as Name +import Unison.NameSegment (NameSegment) +import Unison.ShortHash (ShortHash) +import Unison.ShortHash qualified as SH +import Unison.Syntax.Name qualified as Name +import Unison.Syntax.NameSegment qualified as NameSegment + +data QueryError + = InvalidHash Text + | InvalidName Text + deriving stock (Show, Eq, Ord) + +instance MP.ShowErrorComponent QueryError where + showErrorComponent = \case + InvalidHash hash -> "Encountered invalid hash: " <> Text.unpack hash + InvalidName name -> "Encountered Invalid name: " <> Text.unpack name + +data MentionRef + = HashMention ShortHash + | NameMention Name + | TypeNameMention Name + | TypeVarMention Text + deriving stock (Show, Eq, Ord) + +type P = MP.Parsec QueryError Text + +-- | A very lax parser which converts a query into structured tokens for searching definitions. +-- +-- A query may look like: +-- +-- Simple definition name query: +-- E.g. foldMap +-- +-- Type signature query +-- E.g.: +-- k -> v -> Map k v -> Map k v +-- +-- Ad-hoc query: +-- +-- Nat Text Abort +-- +-- Hash-query +-- +-- #abc1234 +-- +-- >>> queryToTokens "foldMap" +-- Right (fromList [NameToken (Name Relative (NameSegment {toUnescapedText = "foldMap"} :| []))],Nothing) +-- +-- >>> queryToTokens "#abc1234" +-- Right (fromList [HashToken (ShortHash {prefix = "abc1234", cycle = Nothing, cid = Nothing})],Nothing) +-- +-- >>> queryToTokens "##Nat" +-- Right (fromList [HashToken (Builtin "Nat")],Nothing) +-- +-- >>> queryToTokens "Nat Text #deadbeef Abort" +-- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Abort"})) (Just 1),TypeMentionToken (Left (NameSegment {toUnescapedText = "Nat"})) (Just 1),TypeMentionToken (Left (NameSegment {toUnescapedText = "Text"})) (Just 1),HashToken (ShortHash {prefix = "deadbeef", cycle = Nothing, cid = Nothing})],Nothing) +-- +-- >>> queryToTokens "k -> v -> Map k v -> Map k v" +-- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Map"})) (Just 2),TypeVarToken 0 (Just 3),TypeVarToken 1 (Just 3)],Just 3) +-- +-- >>> queryToTokens ": b -> a -> b" +-- Right (fromList [TypeVarToken 0 (Just 1),TypeVarToken 1 (Just 2)],Just 2) +-- +-- >>> queryToTokens "(a ->{𝕖} b) -> [a] ->{𝕖} [b]" +-- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "List"})) (Just 2),TypeVarToken 0 (Just 2),TypeVarToken 1 (Just 2)],Just 2) +-- +-- >>> queryToTokens "'{Abort} ()" +-- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Abort"})) (Just 1)],Nothing) +-- +-- Unfinished query: +-- >>> queryToTokens "(Text -> Text" +-- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Text"})) (Just 2)],Nothing) +-- +-- Horribly mishapen query: +-- >>> queryToTokens "[{ &Text !{𝕖} (Optional)" +-- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Optional"})) (Just 1),TypeMentionToken (Left (NameSegment {toUnescapedText = "Text"})) (Just 1)],Nothing) +queryToTokens :: Text -> Either Text (Set (DefnSearchToken (Either NameSegment ShortHash)), Maybe Int) +queryToTokens query = + let cleanQuery = + query + & Text.filter Char.isAscii + in MP.runParser queryParser "query" cleanQuery + & \case + (Left _err) -> + let simpleQuery = + query + & Text.map (\c -> if Char.isAlphaNum c || c `elem` ("#." :: String) then c else ' ') + & Text.words + & Text.unwords + in -- If even the lax parser fails, try simplifying the query even further to see if + -- we can parse anything at all. + queryToTokens simpleQuery + (Right (mayArity, occurrences)) -> + let (hashAndNameTokens, typeVarMentions) = + MonMap.toList + occurrences + & foldMap \case + (HashMention hash, _occ) -> ([HashToken hash], []) + (NameMention name, _occ) -> ([NameToken name], []) + (TypeNameMention name, occ) -> ([TypeMentionToken (Left $ Name.lastSegment name) $ Just occ], []) + (TypeVarMention var, occ) -> ([], [(var, occ)]) + + -- Normalize type vars so varIds are sorted according to number of occurences. + normalizedTypeVarTokens = + List.sortOn snd typeVarMentions + & imap (\i (_vId, occ) -> TypeVarToken (VarId i) $ Just occ) + -- if there's no indication the user is trying to do a 'real' type query then + -- ignore arity. + arity = do + Sum n <- mayArity + if n <= 0 + then Nothing + else Just n + in Right (Set.fromList $ hashAndNameTokens <> normalizedTypeVarTokens, arity) + +queryParser :: P (Maybe (Sum Int), MonoidalMap MentionRef Occurrence) +queryParser = do + MP.choice + [ (Nothing,) <$> MP.try simpleHashQueryP, + (Nothing,) <$> MP.try simpleNameQueryP, + first Just <$> typeQueryP + ] + <* MP.eof + +lexeme :: P a -> P a +lexeme = MP.lexeme MP.space + +simpleHashQueryP :: P (MonoidalMap MentionRef Occurrence) +simpleHashQueryP = do + possibleHash <- lexeme hashP + -- Simple queries have ONLY the hash + MP.eof + pure $ MonMap.singleton (HashMention possibleHash) 1 + +simpleNameQueryP :: P (MonoidalMap MentionRef Occurrence) +simpleNameQueryP = do + name <- nameP False + -- Simple queries have ONLY the name + MP.eof + pure $ MonMap.singleton (NameMention name) 1 + +-- | Parse a type query, returning the arity of the top-level type +typeQueryP :: P (Sum Int, MonoidalMap MentionRef Occurrence) +typeQueryP = do + _ <- optional $ lexeme (MP.char ':') + fmap fold . many $ do + tokens <- + lexeme $ + MP.choice + [ typeQueryTokenP, + listP, + MP.try unitP, + MP.try tupleP, + -- We do anything smart with bracketed types yet. + MP.between (lexeme (MP.char '(')) (optional $ lexeme (MP.char ')')) (snd <$> typeQueryP), + -- Remove type var mentions from ability lists, we don't consider them when building + -- the index so they just wreck search results. + removeTypeVarMentions <$> MP.between (lexeme (MP.char '{')) (optional $ lexeme (MP.char '}')) (foldMap snd <$> MP.sepBy typeQueryP (lexeme $ MP.char ',')) + ] + arityBump <- + optional (lexeme (MP.string "->")) + <&> \case + Nothing -> Sum 0 + Just _ -> Sum 1 + pure (arityBump, tokens) + where + removeTypeVarMentions :: MonoidalMap MentionRef Occurrence -> MonoidalMap MentionRef Occurrence + removeTypeVarMentions = MonMap.filterWithKey \k _v -> case k of + TypeVarMention _ -> False + _ -> True + +-- We just ignore units for now, they don't contribute much to the search. +unitP :: P (MonoidalMap MentionRef Occurrence) +unitP = MP.choice [MP.string "()", MP.string "Unit", MP.string "'"] $> mempty + +tupleP :: P (MonoidalMap MentionRef Occurrence) +tupleP = MP.between (MP.char '(') (MP.char ')') do + typeQueryP + _ <- MP.char ',' + typeQueryP + pure $ MonMap.singleton (TypeNameMention (Name.unsafeParseText "Tuple")) 1 + +listP :: P (MonoidalMap MentionRef Occurrence) +listP = MP.between (lexeme (MP.char '[')) (optional $ lexeme (MP.char ']')) do + (_, tokens) <- typeQueryP + pure $ tokens <> MonMap.singleton (TypeNameMention (Name.unsafeParseText "List")) 1 + +typeQueryTokenP :: P (MonoidalMap MentionRef Occurrence) +typeQueryTokenP = do + MP.choice + [ hashMentionTokenP, + typeVarMentionTokenP, + typeNameMentionTokenP + ] + where + hashMentionTokenP :: P (MonoidalMap MentionRef Occurrence) + hashMentionTokenP = do + hash <- hashP + pure $ MonMap.singleton (HashMention hash) 1 + + typeNameMentionTokenP :: P (MonoidalMap MentionRef Occurrence) + typeNameMentionTokenP = do + name <- nameP True + pure $ MonMap.singleton (TypeNameMention name) 1 + + typeVarMentionTokenP :: P (MonoidalMap MentionRef Occurrence) + typeVarMentionTokenP = do + varText <- typeVarP + pure $ MonMap.singleton (TypeVarMention varText) 1 + +typeVarP :: P Text +typeVarP = Text.pack <$> liftA2 (:) (MP.oneOf $ ['a' .. 'z'] <> "_") (many $ MP.alphaNumChar <|> MP.char '_') + +hashP :: P ShortHash +hashP = do + -- Start with at least one hash; + _ <- MP.char '#' + possibleHash <- ('#' :) <$> (some $ MP.alphaNumChar <|> MP.char '#') + case SH.fromText (Text.pack possibleHash) of + Nothing -> MP.customFailure . InvalidHash $ Text.pack possibleHash + Just hash -> pure hash + +nameP :: Bool -> P Name +nameP mustBeType = do + firstChar <- + if mustBeType + then MP.satisfy Char.isUpper + else MP.satisfy (\c -> NameSegment.symbolyIdChar c || NameSegment.wordyIdChar c) + nameRemainder <- + if mustBeType + then many (MP.satisfy $ \c -> NameSegment.wordyIdChar c || c == '.') + else (many (MP.satisfy $ \c -> NameSegment.symbolyIdChar c || NameSegment.wordyIdChar c || c == '.')) + case Name.parseTextEither (Text.pack $ firstChar : nameRemainder) of + Left _ -> MP.customFailure . InvalidName $ Text.pack (firstChar : nameRemainder) + Right name -> pure name diff --git a/src/Share/Web/Share/Impl.hs b/src/Share/Web/Share/Impl.hs index 5d5b9cf..6444d9d 100644 --- a/src/Share/Web/Share/Impl.hs +++ b/src/Share/Web/Share/Impl.hs @@ -363,27 +363,45 @@ searchDefinitionNamesEndpoint :: Maybe IDs.ReleaseVersion -> WebApp [DefinitionNameSearchResult] searchDefinitionNamesEndpoint callerUserId query mayLimit userFilter projectFilter releaseFilter = do - filter <- runMaybeT $ resolveProjectAndReleaseFilter <|> resolveUserFilter + filter <- runMaybeT $ resolveProjectAndReleaseFilter projectFilter releaseFilter <|> resolveUserFilter userFilter matches <- PG.runTransaction $ DDQ.defNameSearch callerUserId filter query limit - -- TODO: Fix this: - let response = matches <&> \(_projId, _releaseId, name) -> DefinitionNameSearchResult (Name.toText name) "name" + let response = matches <&> \(_projId, _releaseId, name, tag) -> DefinitionNameSearchResult name tag pure response where limit = fromMaybe (Limit 20) mayLimit - resolveProjectAndReleaseFilter :: MaybeT WebApp DDQ.DefnNameSearchFilter - resolveProjectAndReleaseFilter = do - projectShortHand <- hoistMaybe projectFilter - Project {projectId} <- lift . PG.runTransactionOrRespondError $ Q.projectByShortHand projectShortHand `whenNothingM` throwError (EntityMissing (ErrorID "no-project-found") $ "No project found for short hand: " <> IDs.toText projectShortHand) - case releaseFilter of - Nothing -> pure $ DDQ.ProjectFilter projectId - Just releaseVersion -> do - Release {releaseId} <- lift . PG.runTransactionOrRespondError $ Q.releaseByProjectIdAndReleaseShortHand projectId (IDs.ReleaseShortHand releaseVersion) `whenNothingM` throwError (EntityMissing (ErrorID "no-release-found") $ "No release found for project: " <> IDs.toText projectShortHand <> " and version: " <> IDs.toText releaseVersion) - pure $ DDQ.ReleaseFilter releaseId - resolveUserFilter :: MaybeT WebApp DDQ.DefnNameSearchFilter - resolveUserFilter = do - userHandle <- hoistMaybe userFilter - User {user_id} <- lift $ PG.runTransactionOrRespondError $ Q.userByHandle userHandle `whenNothingM` throwError (EntityMissing (ErrorID "no-user-for-handle") $ "User not found for handle: " <> IDs.toText userHandle) - pure $ DDQ.UserFilter user_id + +resolveProjectAndReleaseFilter :: Maybe IDs.ProjectShortHand -> Maybe IDs.ReleaseVersion -> MaybeT WebApp DDQ.DefnNameSearchFilter +resolveProjectAndReleaseFilter projectFilter releaseFilter = do + projectShortHand <- hoistMaybe projectFilter + Project {projectId} <- lift . PG.runTransactionOrRespondError $ Q.projectByShortHand projectShortHand `whenNothingM` throwError (EntityMissing (ErrorID "no-project-found") $ "No project found for short hand: " <> IDs.toText projectShortHand) + case releaseFilter of + Nothing -> pure $ DDQ.ProjectFilter projectId + Just releaseVersion -> do + Release {releaseId} <- lift . PG.runTransactionOrRespondError $ Q.releaseByProjectIdAndReleaseShortHand projectId (IDs.ReleaseShortHand releaseVersion) `whenNothingM` throwError (EntityMissing (ErrorID "no-release-found") $ "No release found for project: " <> IDs.toText projectShortHand <> " and version: " <> IDs.toText releaseVersion) + pure $ DDQ.ReleaseFilter releaseId + +resolveUserFilter :: Maybe UserHandle -> MaybeT WebApp DDQ.DefnNameSearchFilter +resolveUserFilter userFilter = do + userHandle <- hoistMaybe userFilter + User {user_id} <- lift $ PG.runTransactionOrRespondError $ Q.userByHandle userHandle `whenNothingM` throwError (EntityMissing (ErrorID "no-user-for-handle") $ "User not found for handle: " <> IDs.toText userHandle) + pure $ DDQ.UserFilter user_id + +searchDefinitionsEndpoint :: + Maybe UserId -> + Query -> + Maybe Limit -> + Maybe UserHandle -> + Maybe IDs.ProjectShortHand -> + Maybe IDs.ReleaseVersion -> + WebApp DefinitionSearchResults +searchDefinitionsEndpoint callerUserId (Query query) mayLimit userFilter projectFilter releaseFilter = do + filter <- runMaybeT $ resolveProjectAndReleaseFilter projectFilter releaseFilter <|> resolveUserFilter userFilter + let searchTokens = DefinitionSearch.queryToTokens query + matches <- PG.runTransaction $ DDQ.definitionSearch callerUserId filter limit _ + let results = matches <&> _ + pure $ DefinitionSearchResults results + where + limit = fromMaybe (Limit 20) mayLimit accountInfoEndpoint :: Session -> WebApp UserAccountInfo accountInfoEndpoint Session {sessionUserId} = do diff --git a/src/Share/Web/Share/Types.hs b/src/Share/Web/Share/Types.hs index e16771f..5ac6c82 100644 --- a/src/Share/Web/Share/Types.hs +++ b/src/Share/Web/Share/Types.hs @@ -7,6 +7,7 @@ module Share.Web.Share.Types where import Data.Aeson (KeyValue ((.=)), ToJSON (..)) import Data.Aeson qualified as Aeson import Network.URI (URI) +import Share.BackgroundJobs.Search.DefinitionSync.Types (TermOrTypeTag) import Share.BackgroundJobs.Search.DefinitionSync.Types qualified as DefSync import Share.IDs import Share.Prelude @@ -156,15 +157,25 @@ instance ToJSON UserDisplayInfo where data DefinitionNameSearchResult = DefinitionNameSearchResult - { token :: Text, - kind :: Text + { token :: Name, + tag :: TermOrTypeTag } instance ToJSON DefinitionNameSearchResult where toJSON DefinitionNameSearchResult {..} = Aeson.object [ "token" .= token, - "kind" .= kind + "tag" .= tag + ] + +newtype DefinitionSearchResults = DefinitionSearchResults + { results :: [DefinitionSearchResult] + } + +instance ToJSON DefinitionSearchResults where + toJSON DefinitionSearchResults {..} = + Aeson.object + [ "results" .= results ] data DefinitionSearchResult @@ -174,3 +185,12 @@ data DefinitionSearchResult project :: ProjectShortHand, release :: ReleaseShortHand } + +instance ToJSON DefinitionSearchResult where + toJSON DefinitionSearchResult {..} = + Aeson.object + [ "fqn" .= fqn, + "summary" .= summary, + "project" .= project, + "release" .= release + ] From cc1d28de969537f09997f4dd98ba15e1b90c4185 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 23 Jul 2024 17:08:36 -0700 Subject: [PATCH 34/48] Implement search-definitions endpoint --- src/Share/Postgres/Hashes/Queries.hs | 35 +++++++++++++++++-- .../Search/DefinitionSearch/Queries.hs | 4 +-- src/Share/Web/Share/DefinitionSearch.hs | 2 +- src/Share/Web/Share/Impl.hs | 22 +++++++++--- 4 files changed, 54 insertions(+), 9 deletions(-) diff --git a/src/Share/Postgres/Hashes/Queries.hs b/src/Share/Postgres/Hashes/Queries.hs index 52c852c..105cc16 100644 --- a/src/Share/Postgres/Hashes/Queries.hs +++ b/src/Share/Postgres/Hashes/Queries.hs @@ -287,7 +287,7 @@ expectCausalIdsOf trav = do then unrecoverableError $ EntityMissing "missing-expected-causal" $ "Missing one of these causals: " <> Text.intercalate ", " (into @Text <$> hashes) else pure results -expectNamespaceIdsByCausalIdsOf :: QueryM m => Traversal s t CausalId BranchHashId -> s -> m t +expectNamespaceIdsByCausalIdsOf :: (QueryM m) => Traversal s t CausalId BranchHashId -> s -> m t expectNamespaceIdsByCausalIdsOf trav s = do s & unsafePartsOf trav %%~ \causalIds -> do @@ -334,7 +334,38 @@ loadCausalIdByHash causalHash = do AND EXISTS (SELECT FROM causal_ownership o WHERE o.causal_id = causals.id AND o.user_id = #{codebaseOwner}) |] -expectCausalIdByHash :: HasCallStack => CausalHash -> Codebase.CodebaseM e CausalId +expectCausalIdByHash :: (HasCallStack) => CausalHash -> Codebase.CodebaseM e CausalId expectCausalIdByHash causalHash = do loadCausalIdByHash causalHash `whenNothingM` unrecoverableError (MissingExpectedEntity $ "Expected causal id for hash: " <> tShow causalHash) + +-- | TODO: + +---- | Expands shorthashes to their full form (still as a ShortHash) +---- +---- Note: Be wary, this could cause hashes which are unknown to a user to leak if they're not +-- expandShortHashesOf :: (HasCallStack) => Traversal s t ShortHash ShortHash -> s -> CodebaseM e t +-- expandShortHashesOf trav s = do +-- codebaseOwner <- asks Codebase.codebaseOwner +-- s +-- & unsafePartsOf (trav . prefix_) %%~ \shortHashes -> do +-- let numberedShortHashes = zip [0 :: Int32 ..] shortHashes +-- results :: [Text] <- +-- queryListCol +-- [sql| +-- WITH hash_prefixes(ord, prefix) AS ( +-- SELECT * FROM ^{toTable numberedShortHashes} +-- ) +-- SELECT sh.prefix +-- FROM hash_prefixes sh +-- JOIN short_hashes sh2 ON sh.prefix = sh2.prefix +-- ORDER BY sh.ord ASC +-- |] +-- if length results /= length shortHashes +-- then error "expandShortHashesOf: Missing expected short hash" +-- else pure results +-- where +-- prefix_ :: Traversal' ShortHash Text +-- prefix_ f = \case +-- SH.Builtin b -> pure $ SH.Builtin b +-- SH.ShortHash p c i -> SH.ShortHash <$> f p <*> pure c <*> pure i diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index 05e2e0a..1094957 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -203,5 +203,5 @@ defNameSearch mayCaller mayFilter (Query query) limit = do ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC |] -definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either Name ShortHash)) -> Transaction e [DefinitionDocument proj release name typeRef] -definitionSearch searchTokens = _ +definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either NameSegment ShortHash)) -> Maybe Int -> Transaction e [DefinitionDocument proj release name typeRef] +definitionSearch searchTokens preferredArity = _ diff --git a/src/Share/Web/Share/DefinitionSearch.hs b/src/Share/Web/Share/DefinitionSearch.hs index c8e2aa7..d85006b 100644 --- a/src/Share/Web/Share/DefinitionSearch.hs +++ b/src/Share/Web/Share/DefinitionSearch.hs @@ -1,6 +1,6 @@ {-# LANGUAGE DataKinds #-} -module Share.Web.DefinitionSearch (queryToTokens) where +module Share.Web.Share.DefinitionSearch (queryToTokens) where import Control.Lens import Data.Char qualified as Char diff --git a/src/Share/Web/Share/Impl.hs b/src/Share/Web/Share/Impl.hs index 6444d9d..886fefa 100644 --- a/src/Share/Web/Share/Impl.hs +++ b/src/Share/Web/Share/Impl.hs @@ -7,6 +7,7 @@ module Share.Web.Share.Impl where import Servant +import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..)) import Share.Codebase qualified as Codebase import Share.Codebase.Types qualified as Codebase import Share.IDs (TourId, UserHandle) @@ -27,6 +28,7 @@ import Share.User (User (..)) import Share.UserProfile (UserProfile (..)) import Share.Utils.API import Share.Utils.Caching +import Share.Utils.Logging qualified as Logging import Share.Utils.Servant.Cookies qualified as Cookies import Share.Web.App import Share.Web.Authentication qualified as AuthN @@ -36,6 +38,7 @@ import Share.Web.Share.API qualified as Share import Share.Web.Share.Branches.Impl qualified as Branches import Share.Web.Share.CodeBrowsing.API (CodeBrowseAPI) import Share.Web.Share.Contributions.Impl qualified as Contributions +import Share.Web.Share.DefinitionSearch qualified as DefinitionSearch import Share.Web.Share.Projects.Impl qualified as Projects import Share.Web.Share.Types import Unison.Codebase.Path qualified as Path @@ -396,10 +399,21 @@ searchDefinitionsEndpoint :: WebApp DefinitionSearchResults searchDefinitionsEndpoint callerUserId (Query query) mayLimit userFilter projectFilter releaseFilter = do filter <- runMaybeT $ resolveProjectAndReleaseFilter projectFilter releaseFilter <|> resolveUserFilter userFilter - let searchTokens = DefinitionSearch.queryToTokens query - matches <- PG.runTransaction $ DDQ.definitionSearch callerUserId filter limit _ - let results = matches <&> _ - pure $ DefinitionSearchResults results + case DefinitionSearch.queryToTokens query of + Left _err -> do + Logging.logErrorText $ "Failed to parse query: " <> query + pure $ DefinitionSearchResults [] + Right (searchTokens, mayArity) -> do + matches <- PG.runTransaction $ DDQ.definitionSearch callerUserId filter limit searchTokens mayArity + let results = + matches <&> \DefinitionDocument {fqn, metadata = summary, project, release} -> + DefinitionSearchResult + { fqn, + summary, + project, + release + } + pure $ DefinitionSearchResults results where limit = fromMaybe (Limit 20) mayLimit From 35422d37ca3766adc304b989d33be317107acab4 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 23 Jul 2024 17:13:33 -0700 Subject: [PATCH 35/48] Implement definition search query, add escaping and prefix wildcarding to tokens --- Queries.hs | 277 ++++++++++++++++++ .../BackgroundJobs/Search/DefinitionSync.hs | 26 +- .../Search/DefinitionSync/Types.hs | 10 +- .../Search/DefinitionSearch/Queries.hs | 201 ++++++++++--- src/Share/Prelude/Orphans.hs | 1 - 5 files changed, 457 insertions(+), 58 deletions(-) create mode 100644 Queries.hs diff --git a/Queries.hs b/Queries.hs new file mode 100644 index 0000000..2ef9049 --- /dev/null +++ b/Queries.hs @@ -0,0 +1,277 @@ +{-# LANGUAGE TypeOperators #-} + +module Share.Postgres.Search.DefinitionSearch.Queries + ( submitReleaseToBeSynced, + claimUnsyncedRelease, + insertDefinitionDocuments, + cleanIndexForRelease, + defNameSearch, + definitionSearch, + DefnNameSearchFilter (..), + ) +where + +import Control.Lens +import Data.Aeson (fromJSON) +import Data.Aeson qualified as Aeson +import Data.Foldable qualified as Foldable +import Data.Set qualified as Set +import Data.Text qualified as Text +import Hasql.Interpolate qualified as Hasql +import Servant (ServerError (..)) +import Servant.Server (err500) +import Share.BackgroundJobs.Search.DefinitionSync.Types +import Share.IDs (ProjectId, ReleaseId, UserId) +import Share.Postgres +import Share.Prelude +import Share.Utils.API (Limit, Query (Query)) +import Share.Utils.Logging qualified as Logging +import Share.Web.Errors qualified as Errors +import Unison.DataDeclaration qualified as DD +import Unison.Name (Name) +import Unison.Name qualified as Name +import Unison.NameSegment (NameSegment) +import Unison.Server.Types (TermTag (..), TypeTag (..)) +import Unison.ShortHash (ShortHash) +import Unison.Syntax.Name qualified as Name +import Unison.Syntax.NameSegment qualified as NameSegment + +data DefinitionSearchError = FailedToDecodeMetadata Aeson.Value Text + deriving stock (Show, Eq, Ord) + +instance Errors.ToServerError DefinitionSearchError where + toServerError = \case + FailedToDecodeMetadata _v _err -> (Errors.ErrorID "invalid-definition-search-metadata", err500 {errBody = "Internal Server Error"}) + +instance Logging.Loggable DefinitionSearchError where + toLog = \case + FailedToDecodeMetadata v err -> + Logging.textLog ("Failed to decode metadata: " <> tShow v <> " " <> err) + & Logging.withSeverity Logging.Error + +submitReleaseToBeSynced :: ReleaseId -> Transaction e () +submitReleaseToBeSynced releaseId = do + execute_ + [sql| + INSERT INTO global_definition_search_release_queue (id) + VALUES (#{releaseId}) + |] + +-- | Claim the oldest unsynced release to be indexed. +claimUnsyncedRelease :: Transaction e (Maybe ReleaseId) +claimUnsyncedRelease = do + query1Col + [sql| + WITH chosen_release(release_id) AS ( + SELECT q.release_id + FROM global_definition_search_release_queue q + ORDER BY q.created_at ASC + LIMIT 1 + -- Skip any that are being synced by other workers. + FOR UPDATE SKIP LOCKED + ) + DELETE FROM global_definition_search_release_queue + USING chosen_release + WHERE global_definition_search_release_queue.release_id = chosen_release.release_id + RETURNING chosen_release.release_id + |] + +-- | Save definition documents to be indexed for search. +insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] -> Transaction e () +insertDefinitionDocuments docs = pipelined $ do + let docsTable = docRow <$> docs + for_ docsTable \(projectId, releaseId, fqn, tokens, arity, tag, metadata) -> do + -- Ideally we'd do a bulk insert, but Hasql doesn't provide any method for passing arrays of + -- arrays as parameters, so instead we insert each record individually so we can use our + -- only level of array-ness for the tokens. + execute_ $ + [sql| + INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, arity, tag, metadata) + VALUES (#{projectId}, #{releaseId}, #{fqn}, array_to_tsvector(#{tokens}), #{arity}, #{tag}::definition_tag, #{metadata}::jsonb) + ON CONFLICT DO NOTHING + |] + where + docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Int32, TermOrTypeTag, Hasql.Jsonb) + docRow DefinitionDocument {project, release, fqn, tokens, arity, tag, metadata} = + ( project, + release, + Name.toText fqn, + foldMap searchTokenToText $ Set.toList tokens, + fromIntegral arity, + tag, + Hasql.Jsonb $ Aeson.toJSON metadata + ) + +-- | Wipe out any rows for the given release, useful when re-indexing. +cleanIndexForRelease :: ReleaseId -> Transaction e () +cleanIndexForRelease releaseId = do + execute_ + [sql| + DELETE FROM global_definition_search_docs + WHERE release_id = #{releaseId} + |] + +-- | Convert a search token to a TSVector. +-- +-- +-- Note: Names in tokens have their segments reversed, this is because PG Gin indexes only support +-- prefix-matching on lexemes, and this lets us match on any valid name suffix. +-- This is also why Hashes and Names come LAST in the token, so we can do, e.g. `mn,1,map.List:*` +-- +-- >>> import Unison.Syntax.Name qualified as Name +-- >>> searchTokenToText (NameToken (Name.unsafeParseText "my.cool.name")) +-- ["n,my.cool.name"] +-- +-- >>> import Unison.ShortHash qualified as SH +-- >>> import Data.Maybe (fromJust) +-- >>> searchTokenToText (TypeMentionToken (NameSegment.unsafeParseText "Thing", fromJust $ SH.fromText "#2tWjVAuc7") (Just $ Occurrence 1)) +-- ["mn,1,Thing","mh,1,#2tWjVAuc7"] +-- +-- >>> searchTokenToText (TypeMentionToken (NameSegment.unsafeParseText "Thing", fromJust $ SH.fromText "#2tWjVAuc7") Nothing) +-- ["mn,r,Thing","mh,r,#2tWjVAuc7"] +-- +-- >>> searchTokenToText (TypeVarToken (VarId 1) (Just $ Occurrence 1)) +-- ["v,1,1"] +-- +-- >>> searchTokenToText (TermTagToken Doc) +-- ["t,doc"] +-- >>> searchTokenToText (TermTagToken (Constructor Data)) +-- ["t,data-con"] +-- >>> searchTokenToText (TypeTagToken Data) +-- ["t,data"] +searchTokenToText :: DefnSearchToken (NameSegment, ShortHash) -> [Text] +searchTokenToText = \case + NameToken name -> [makeSearchToken nameType (reversedNameText name) Nothing] + TypeMentionToken (ns, typeRef) occ -> + [ makeSearchToken typeMentionTypeByNameType (NameSegment.toEscapedText ns) (Just occ), + makeSearchToken typeMentionTypeByHashType (into @Text @ShortHash typeRef) (Just occ) + ] + TypeVarToken varId occ -> [makeSearchToken typeVarType (varIdText varId) (Just occ)] + HashToken sh -> [makeSearchToken hashType (into @Text sh) Nothing] + TermTagToken termTag -> [makeSearchToken tagType (termTagText termTag) Nothing] + TypeTagToken typTag -> [makeSearchToken tagType (typeTagText typTag) Nothing] + TypeModToken mod -> [makeSearchToken typeModType (typeModText mod) Nothing] + where + reversedNameText :: Name -> Text + reversedNameText n = Text.intercalate "." $ Foldable.toList $ fmap NameSegment.toEscapedText $ Name.reverseSegments n + typeModText = \case + DD.Structural -> "structural" + DD.Unique {} -> "unique" + varIdText :: VarId -> Text + varIdText (VarId n) = tShow n + termTagText :: TermTag -> Text + termTagText = \case + Doc -> "doc" + Test -> "test" + Plain -> "plain" + Constructor typeTag -> typeTagText typeTag <> "-con" + typeTagText :: TypeTag -> Text + typeTagText = \case + Data -> "data" + Ability -> "ability" + nameType :: Text + nameType = "n" + typeMentionTypeByNameType :: Text + typeMentionTypeByNameType = "mn" + typeMentionTypeByHashType :: Text + typeMentionTypeByHashType = "mh" + typeVarType :: Text + typeVarType = "v" + hashType :: Text + hashType = "h" + tagType :: Text + tagType = "t" + typeModType :: Text + typeModType = "mod" + makeSearchToken :: Text -> Text -> Maybe (Maybe Occurrence) -> Text + makeSearchToken kind txt occTxt = do + let occ = case occTxt of + Just (Just (Occurrence n)) -> [tShow n] + Just Nothing -> ["r"] + Nothing -> [] + in Text.intercalate "," $ + [kind] <> occ <> [Text.replace "," "" txt] + +searchTokensToTsQuery :: Set (DefnSearchToken (Either NameSegment ShortHash)) -> Text +searchTokensToTsQuery tokens = + Text.intercalate " & " $ searchTokenToText <$> Set.toList tokens + +data DefnNameSearchFilter + = ProjectFilter ProjectId + | ReleaseFilter ReleaseId + | UserFilter UserId + +defNameSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeTag)] +defNameSearch mayCaller mayFilter (Query query) limit = do + let filters = case mayFilter of + Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |] + Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |] + Just (UserFilter userId) -> [sql| AND p.owner_id = #{userId} |] + Nothing -> mempty + queryListRows @(ProjectId, ReleaseId, Name, TermOrTypeTag) + [sql| + WITH matches_deduped_by_project(project_id, release_id, name, tag) AS ( + SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.tag FROM global_definition_search_docs doc + JOIN projects p ON p.id = doc.project_id + JOIN project_releases r ON r.id = doc.release_id + WHERE + -- Search name by a trigram 'word similarity' + -- which will match if the query is similar to any 'word' (e.g. name segment) + -- in the name. + #{query} <% doc.name + AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects pp WHERE pp.user_id = #{mayCaller} AND pp.project_id = p.id))) + ^{filters} + ORDER BY doc.project_id, doc.name, r.major_version, r.minor_version, r.patch_version + ), + -- Find the best matches + best_results(project_id, release_id, name, tag) AS ( + SELECT m.project_id, m.release_id, m.name, m.tag + FROM matches_deduped_by_project m + ORDER BY similarity(#{query}, m.name) DESC + LIMIT #{limit} + ) + -- THEN sort docs to the bottom. + SELECT br.project_id, br.release_id, br.name, br.tag + FROM best_results br + -- docs and tests to the bottom, but otherwise sort by quality of the match. + ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC + |] + +definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either NameSegment ShortHash)) -> Maybe Int -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeSummary)] +definitionSearch mayCaller mayFilter limit searchTokens preferredArity = do + let filters = case mayFilter of + Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |] + Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |] + Just (UserFilter userId) -> [sql| AND p.owner_id = #{userId} |] + Nothing -> mempty + rows <- + queryListRows @(ProjectId, ReleaseId, Name, Hasql.Jsonb) + [sql| + WITH matches_deduped_by_project(project_id, release_id, name, metadata) AS ( + SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.metadata FROM global_definition_search_docs doc + JOIN projects p ON p.id = doc.project_id + JOIN project_releases r ON r.id = doc.release_id + WHERE + -- match on search tokens using GIN index. + plainto_tsquery(#{searchTokenText}) @@ doc.name + AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects pp WHERE pp.user_id = #{mayCaller} AND pp.project_id = p.id))) + ^{filters} + ORDER BY doc.project_id, doc.name, r.major_version, r.minor_version, r.patch_version + ), + -- Find the best matches + best_results(project_id, release_id, name, tag) AS ( + SELECT m.project_id, m.release_id, m.name, m.tag + FROM matches_deduped_by_project m + ORDER BY similarity(#{query}, m.name) DESC + LIMIT #{limit} + ) + -- THEN sort docs to the bottom. + SELECT br.project_id, br.release_id, br.name, br.tag + FROM best_results br + -- docs and tests to the bottom, but otherwise sort by quality of the match. + ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC + |] + rows & traverseOf (traversed . _4) \(Hasql.Jsonb v) -> do + case fromJSON v of + Aeson.Error err -> unrecoverableError $ FailedToDecodeMetadata v (Text.pack err) + Aeson.Success summary -> pure summary diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index a6005cd..5526b2c 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -15,7 +15,7 @@ import Data.Set qualified as Set import Data.Set.Lens (setOf) import Ki.Unlifted qualified as Ki import Share.BackgroundJobs.Monad (Background) -import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..), DefnSearchToken (..), Occurrence, TermOrTypeSummary (..), TermOrTypeTag (..), VarId (..)) +import Share.BackgroundJobs.Search.DefinitionSync.Types (Arity (..), DefinitionDocument (..), DefnSearchToken (..), Occurrence, TermOrTypeSummary (..), TermOrTypeTag (..), VarId (..)) import Share.BackgroundJobs.Workers (newWorker) import Share.Codebase (CodebaseM) import Share.Codebase qualified as Codebase @@ -44,8 +44,6 @@ import Unison.Debug qualified as Debug import Unison.HashQualifiedPrime qualified as HQ' import Unison.LabeledDependency qualified as LD import Unison.Name (Name) -import Unison.Name qualified as Name -import Unison.NameSegment (NameSegment) import Unison.PrettyPrintEnv qualified as PPE import Unison.PrettyPrintEnvDecl qualified as PPED import Unison.PrettyPrintEnvDecl.Postgres qualified as PPEPostgres @@ -151,18 +149,18 @@ syncTerms namesPerspective bhId projectId releaseId termsCursor = do let allDeps = setOf (folded . folding tokens . folded . to LD.TypeReference) refDocs pped <- PG.timeTransaction "Build PPED" $ PPEPostgres.ppedForReferences namesPerspective allDeps let ppe = PPED.unsuffixifiedPPE pped - let namedDocs :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] + let namedDocs :: [DefinitionDocument ProjectId ReleaseId Name (Name, ShortHash)] namedDocs = refDocs & traversed . field @"tokens" %~ Set.mapMaybe \token -> do for token \ref -> do name <- PPE.types ppe ref - pure $ (Name.lastSegment . HQ'.toName $ name, Reference.toShortHash ref) + pure $ (HQ'.toName $ name, Reference.toShortHash ref) lift $ PG.timeTransaction "Inserting Docs" $ DDQ.insertDefinitionDocuments namedDocs pure errs -- | Compute the search tokens for a term given its name, hash, and type signature -tokensForTerm :: (Var.Var v) => Name -> Referent -> Type.Type v a -> Summary.TermSummary -> (Set (DefnSearchToken TypeReference), Int) +tokensForTerm :: (Var.Var v) => Name -> Referent -> Type.Type v a -> Summary.TermSummary -> (Set (DefnSearchToken TypeReference), Arity) tokensForTerm name ref typ (Summary.TermSummary {tag}) = do let sh = Referent.toShortHash ref baseTokens = Set.fromList [NameToken name, HashToken sh] @@ -183,10 +181,10 @@ data TokenGenState v = TokenGenState type TokenGenM v = ReaderT (TokenGenEnv v) (State (TokenGenState v)) -- | Compute var occurrence and type ref occurrence search tokens from a type signature. -typeSigTokens :: forall v ann. (Var.Var v) => Type.Type v ann -> (Set (DefnSearchToken TypeReference), Int) +typeSigTokens :: forall v ann. (Var.Var v) => Type.Type v ann -> (Set (DefnSearchToken TypeReference), Arity) typeSigTokens typ = let occMap :: MonoidalMap (Either VarId TypeReference) (Occurrence, Any) - arity :: Int + arity :: Arity (occMap, Sum arity) = flip evalState initState . flip runReaderT (TokenGenEnv mempty) $ ABT.cata alg typ (varIds, typeRefs) = MonMap.toList occMap & foldMap \case @@ -222,8 +220,8 @@ typeSigTokens typ = -- Cata algebra for collecting type reference tokens from a type signature. alg :: ann -> - ABT.ABT Type.F v (TokenGenM v (MonoidalMap (Either VarId TypeReference) (Occurrence, Any {- Is return type -}), Sum Int)) -> - TokenGenM v (MonoidalMap (Either VarId TypeReference) (Occurrence, Any {- Is return type -}), Sum Int) + ABT.ABT Type.F v (TokenGenM v (MonoidalMap (Either VarId TypeReference) (Occurrence, Any {- Is return type -}), Sum Arity)) -> + TokenGenM v (MonoidalMap (Either VarId TypeReference) (Occurrence, Any {- Is return type -}), Sum Arity) alg _ann = \case ABT.Var v -> do vId <- varIdFor v @@ -277,10 +275,10 @@ syncTypes namesPerspective projectId releaseId typesCursor = do (errs, refDocs) <- types & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do (declTokens, declArity) <- case ref of - Reference.Builtin _ -> pure (mempty, 0) + Reference.Builtin _ -> pure (mempty, Arity 0) Reference.DerivedId refId -> do decl <- lift (Codebase.loadTypeDeclaration refId) `whenNothingM` throwError (NoDeclForType fqn ref) - pure $ (tokensForDecl refId decl, length . DD.bound $ DD.asDataDecl decl) + pure $ (tokensForDecl refId decl, Arity . fromIntegral . length . DD.bound $ DD.asDataDecl decl) let basicTokens = Set.fromList [NameToken fqn, HashToken $ Reference.toShortHash ref] typeSummary <- lift $ Summary.typeSummaryForReference ref (Just fqn) Nothing @@ -301,13 +299,13 @@ syncTypes namesPerspective projectId releaseId typesCursor = do let allDeps = setOf (folded . folding tokens . folded . to LD.TypeReference) refDocs pped <- PPEPostgres.ppedForReferences namesPerspective allDeps let ppe = PPED.unsuffixifiedPPE pped - let namedDocs :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] + let namedDocs :: [DefinitionDocument ProjectId ReleaseId Name (Name, ShortHash)] namedDocs = refDocs & traversed . field @"tokens" %~ Set.mapMaybe \token -> do for token \ref -> do name <- PPE.types ppe ref - pure $ (Name.lastSegment . HQ'.toName $ name, Reference.toShortHash ref) + pure $ (HQ'.toName name, Reference.toShortHash ref) lift $ DDQ.insertDefinitionDocuments namedDocs pure errs diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs index fab530c..f35fd52 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync/Types.hs @@ -1,5 +1,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TemplateHaskell #-} module Share.BackgroundJobs.Search.DefinitionSync.Types ( TermOrTypeSummary (..), @@ -8,9 +9,11 @@ module Share.BackgroundJobs.Search.DefinitionSync.Types DefnSearchToken (..), Occurrence (..), VarId (..), + Arity (..), ) where +import Control.Lens hiding ((.=)) import Data.Aeson import Data.Monoid (Sum (..)) import Data.Text qualified as Text @@ -126,6 +129,9 @@ newtype Occurrence = Occurrence Int newtype VarId = VarId Int deriving newtype (Show, Read, Eq, Ord, Num, ToJSON, Enum) +newtype Arity = Arity Int32 + deriving newtype (Show, Read, Eq, Ord, Num, ToJSON, Enum, Hasql.EncodeValue, Hasql.DecodeValue) + -- | Represents the possible ways we can search the global definitions index. data DefnSearchToken typeRef = -- Allows searching by literal name @@ -143,6 +149,8 @@ data DefnSearchToken typeRef | TypeModToken DD.Modifier deriving stock (Show, Eq, Ord, Functor, Foldable, Traversable) +makePrisms ''DefnSearchToken + -- | Converts a DefnSearchToken to a prefix-searchable text string. -- -- >>> tokenToText (NameToken (Name.unsafeParseText "List.map")) @@ -185,7 +193,7 @@ data DefinitionDocument proj release name typeRef = DefinitionDocument -- For now we only index types by their final name segment, may need to revisit this -- in the future. tokens :: Set (DefnSearchToken typeRef), - arity :: Int, + arity :: Arity, tag :: TermOrTypeTag, metadata :: TermOrTypeSummary } diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index 1094957..a9bc249 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -11,23 +11,43 @@ module Share.Postgres.Search.DefinitionSearch.Queries ) where +import Control.Lens +import Data.Aeson (fromJSON) import Data.Aeson qualified as Aeson +import Data.Foldable qualified as Foldable import Data.Set qualified as Set import Data.Text qualified as Text import Hasql.Interpolate qualified as Hasql +import Servant (ServerError (..)) +import Servant.Server (err500) import Share.BackgroundJobs.Search.DefinitionSync.Types import Share.IDs (ProjectId, ReleaseId, UserId) import Share.Postgres import Share.Prelude import Share.Utils.API (Limit, Query (Query)) +import Share.Utils.Logging qualified as Logging +import Share.Web.Errors qualified as Errors import Unison.DataDeclaration qualified as DD import Unison.Name (Name) -import Unison.NameSegment (NameSegment) +import Unison.Name qualified as Name import Unison.Server.Types (TermTag (..), TypeTag (..)) import Unison.ShortHash (ShortHash) import Unison.Syntax.Name qualified as Name import Unison.Syntax.NameSegment qualified as NameSegment +data DefinitionSearchError = FailedToDecodeMetadata Aeson.Value Text + deriving stock (Show, Eq, Ord) + +instance Errors.ToServerError DefinitionSearchError where + toServerError = \case + FailedToDecodeMetadata _v _err -> (Errors.ErrorID "invalid-definition-search-metadata", err500 {errBody = "Internal Server Error"}) + +instance Logging.Loggable DefinitionSearchError where + toLog = \case + FailedToDecodeMetadata v err -> + Logging.textLog ("Failed to decode metadata: " <> tShow v <> " " <> err) + & Logging.withSeverity Logging.Error + submitReleaseToBeSynced :: ReleaseId -> Transaction e () submitReleaseToBeSynced releaseId = do execute_ @@ -56,7 +76,7 @@ claimUnsyncedRelease = do |] -- | Save definition documents to be indexed for search. -insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] -> Transaction e () +insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (Name, ShortHash)] -> Transaction e () insertDefinitionDocuments docs = pipelined $ do let docsTable = docRow <$> docs for_ docsTable \(projectId, releaseId, fqn, tokens, arity, tag, metadata) -> do @@ -70,16 +90,26 @@ insertDefinitionDocuments docs = pipelined $ do ON CONFLICT DO NOTHING |] where - docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Int32, TermOrTypeTag, Hasql.Jsonb) + docRow :: DefinitionDocument ProjectId ReleaseId Name (Name, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Arity, TermOrTypeTag, Hasql.Jsonb) docRow DefinitionDocument {project, release, fqn, tokens, arity, tag, metadata} = - ( project, - release, - Name.toText fqn, - foldMap searchTokenToText $ Set.toList tokens, - fromIntegral arity, - tag, - Hasql.Jsonb $ Aeson.toJSON metadata - ) + let expandedTokens :: [DefnSearchToken (Either Name ShortHash)] + expandedTokens = + tokens & foldMap \case + NameToken name -> [NameToken name] + TypeMentionToken (name, ref) occ -> [TypeMentionToken (Left name) occ, TypeMentionToken (Right ref) occ] + TypeVarToken v occ -> [TypeVarToken v occ] + HashToken sh -> [HashToken sh] + TermTagToken tag -> [TermTagToken tag] + TypeTagToken tag -> [TypeTagToken tag] + TypeModToken mod -> [TypeModToken mod] + in ( project, + release, + Name.toText fqn, + searchTokenToText False <$> expandedTokens, + arity, + tag, + Hasql.Jsonb $ Aeson.toJSON metadata + ) -- | Wipe out any rows for the given release, useful when re-indexing. cleanIndexForRelease :: ReleaseId -> Transaction e () @@ -92,38 +122,78 @@ cleanIndexForRelease releaseId = do -- | Convert a search token to a TSVector. -- +-- +-- Note: Names in tokens have their segments reversed, this is because PG Gin indexes only support +-- prefix-matching on lexemes, and this lets us match on any valid name suffix. +-- This is also why Hashes and Names come LAST in the token, so we can do, e.g. `mn,1,map.List:*` +-- -- >>> import Unison.Syntax.Name qualified as Name --- >>> searchTokenToText (NameToken (Name.unsafeParseText "my.cool.name")) --- ["n:my.cool.name"] +-- >>> searchTokenToText False (NameToken (Name.unsafeParseText "my.cool.name")) +-- "n,name.cool.my" +-- +-- >>> searchTokenToText False (TypeMentionToken (Left $ Name.unsafeParseText "Thing") (Just $ Occurrence 1)) +-- "mn,1,Thing" -- -- >>> import Unison.ShortHash qualified as SH -- >>> import Data.Maybe (fromJust) --- >>> searchTokenToText (TypeMentionToken (NameSegment.unsafeParseText "Thing", fromJust $ SH.fromText "#2tWjVAuc7") (Occurrence 1)) --- ["mn:Thing:1","mh:#2tWjVAuc7:1"] +-- >>> searchTokenToText False (TypeMentionToken (Right . fromJust $ SH.fromText "#2tWjVAuc7") (Just $ Occurrence 1)) +-- "mh,1,#2tWjVAuc7" +-- +-- >>> searchTokenToText False (TypeMentionToken (Left $ Name.unsafeParseText "Thing") Nothing) +-- "mn,r,Thing" +-- +-- >>> searchTokenToText False (TypeMentionToken (Right $ fromJust $ SH.fromText "#2tWjVAuc7") Nothing) +-- "mh,r,#2tWjVAuc7" -- --- >>> searchTokenToText (TypeVarToken (VarId 1) (Occurrence 1)) --- ["v:1:1"] +-- >>> searchTokenToText False (TypeVarToken (VarId 1) (Just $ Occurrence 1)) +-- "v,1,1" -- --- >>> searchTokenToText (TermTagToken Doc) --- ["t:doc"] --- >>> searchTokenToText (TermTagToken (Constructor Data)) --- WAS WAS WAS "t:data-con" --- ["t:data-con"] --- >>> searchTokenToText (TypeTagToken Data) --- ["t:data"] -searchTokenToText :: DefnSearchToken (NameSegment, ShortHash) -> [Text] -searchTokenToText = \case - NameToken name -> [makeSearchToken nameType (Name.toText name) Nothing] - TypeMentionToken (ns, typeRef) occ -> - [ makeSearchToken typeMentionTypeByNameType (NameSegment.toEscapedText ns) (Just occ), - makeSearchToken typeMentionTypeByHashType (into @Text @ShortHash typeRef) (Just occ) - ] - TypeVarToken varId occ -> [makeSearchToken typeVarType (varIdText varId) (Just occ)] - HashToken sh -> [makeSearchToken hashType (into @Text sh) Nothing] - TermTagToken termTag -> [makeSearchToken tagType (termTagText termTag) Nothing] - TypeTagToken typTag -> [makeSearchToken tagType (typeTagText typTag) Nothing] - TypeModToken mod -> [makeSearchToken typeModType (typeModText mod) Nothing] +-- >>> searchTokenToText False (TermTagToken Doc) +-- "t,doc" +-- >>> searchTokenToText False (TermTagToken (Constructor Data)) +-- "t,data-con" +-- >>> searchTokenToText False (TypeTagToken Data) +-- "t,data" +-- +-- Should backslash escape special symbols. +-- >>> searchTokenToText False (NameToken (Name.unsafeParseText "my.name!")) +-- "n,name\\!.my" +-- +-- >>> searchTokenToText False (NameToken (Name.unsafeParseText "operators.\\&:|!|")) +-- "n,\\\\\\&\\:\\|\\!\\|.operators" +-- +-- Should add wildcards to the end of name, hash, and type mention tokens, but not others. +-- >>> searchTokenToText True (NameToken (Name.unsafeParseText "my.name")) +-- "n,name.my:*" +-- +-- >>> searchTokenToText True (TypeMentionToken (Left $ Name.unsafeParseText "Thing") (Just $ Occurrence 1)) +-- "mn,1,Thing:*" +-- +-- >>> searchTokenToText True (TypeMentionToken (Right . fromJust $ SH.fromText "#2tWjVAuc7") (Just $ Occurrence 1)) +-- "mh,1,#2tWjVAuc7:*" +-- +-- >>> searchTokenToText True (TypeVarToken (VarId 1) (Just $ Occurrence 1)) +-- "v,1,1" +searchTokenToText :: Bool -> DefnSearchToken (Either Name ShortHash) -> Text +searchTokenToText shouldAddWildcards = \case + NameToken name -> + makeSearchToken nameType (reversedNameText name) Nothing + & addWildCard + TypeMentionToken (Left name) occ -> + makeSearchToken typeMentionTypeByNameType (reversedNameText name) (Just occ) + & addWildCard + TypeMentionToken (Right sh) occ -> + makeSearchToken typeMentionTypeByHashType (into @Text @ShortHash sh) (Just occ) + & addWildCard + TypeVarToken varId occ -> makeSearchToken typeVarType (varIdText varId) (Just occ) + HashToken sh -> + makeSearchToken hashType (into @Text sh) Nothing + & addWildCard + TermTagToken termTag -> makeSearchToken tagType (termTagText termTag) Nothing + TypeTagToken typTag -> makeSearchToken tagType (typeTagText typTag) Nothing + TypeModToken mod -> makeSearchToken typeModType (typeModText mod) Nothing where + addWildCard token = if shouldAddWildcards then (token <> ":*") else token typeModText = \case DD.Structural -> "structural" DD.Unique {} -> "unique" @@ -153,14 +223,31 @@ searchTokenToText = \case tagType = "t" typeModType :: Text typeModType = "mod" + escapeToken :: Text -> Text + escapeToken txt = + txt + -- FIRST we escape all existing backslashes + & Text.replace "\\" "\\\\" + -- Then fold over the provided characters, escaping them with a preceding backslash + & \t -> foldr (\c acc -> Text.replace (Text.singleton c) (Text.pack ['\\', c]) acc) t ("()|& :*!," :: String) makeSearchToken :: Text -> Text -> Maybe (Maybe Occurrence) -> Text - makeSearchToken kind txt occ = do - Text.intercalate ":" $ - [kind, Text.replace ":" "" txt] - <> case occ of + makeSearchToken kind txt occTxt = do + let occ = case occTxt of Just (Just (Occurrence n)) -> [tShow n] Just Nothing -> ["r"] Nothing -> [] + in Text.intercalate "," $ + [kind] <> occ <> [escapeToken txt] + +reversedNameText :: Name -> Text +reversedNameText n = Text.intercalate "." $ Foldable.toList $ fmap NameSegment.toEscapedText $ Name.reverseSegments n + +searchTokensToTsQuery :: Set (DefnSearchToken (Either Name ShortHash)) -> Text +searchTokensToTsQuery tokens = + tokens + & Set.toList + & fmap (searchTokenToText True) + & Text.intercalate " & " data DefnNameSearchFilter = ProjectFilter ProjectId @@ -203,5 +290,35 @@ defNameSearch mayCaller mayFilter (Query query) limit = do ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC |] -definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either NameSegment ShortHash)) -> Maybe Int -> Transaction e [DefinitionDocument proj release name typeRef] -definitionSearch searchTokens preferredArity = _ +definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either Name ShortHash)) -> Maybe Int32 -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeSummary)] +definitionSearch mayCaller mayFilter limit searchTokens preferredArity = do + let filters = case mayFilter of + Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |] + Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |] + Just (UserFilter userId) -> [sql| AND p.owner_id = #{userId} |] + Nothing -> mempty + let tsQueryText = searchTokensToTsQuery searchTokens + rows <- + queryListRows @(ProjectId, ReleaseId, Name, Hasql.Jsonb) + [sql| + WITH matches_deduped_by_project(project_id, release_id, name, arity, metadata) AS ( + SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.arity, doc.metadata FROM global_definition_search_docs doc + JOIN projects p ON p.id = doc.project_id + JOIN project_releases r ON r.id = doc.release_id + WHERE + -- match on search tokens using GIN index. + tsquery(#{tsQueryText}) @@ doc.search_tokens + AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects pp WHERE pp.user_id = #{mayCaller} AND pp.project_id = p.id))) + ^{filters} + ORDER BY doc.project_id, doc.name, r.major_version, r.minor_version, r.patch_version + ) SELECT m.project_id, m.release_id, m.name, m.metadata + FROM matches_deduped_by_project m + -- prefer results which have at LEAST the requested arity, then prefer shorter + -- arities. + ORDER BY (m.arity >= #{preferredArity}) DESC, m.arity ASC + LIMIT #{limit} + |] + rows & traverseOf (traversed . _4) \(Hasql.Jsonb v) -> do + case fromJSON v of + Aeson.Error err -> unrecoverableError $ FailedToDecodeMetadata v (Text.pack err) + Aeson.Success summary -> pure summary diff --git a/src/Share/Prelude/Orphans.hs b/src/Share/Prelude/Orphans.hs index ceeaf64..4ea5e9f 100644 --- a/src/Share/Prelude/Orphans.hs +++ b/src/Share/Prelude/Orphans.hs @@ -13,7 +13,6 @@ import Data.UUID (UUID) import Data.UUID qualified as UUID import GHC.TypeLits qualified as TypeError import Hasql.Interpolate qualified as Interp -import Text.Megaparsec qualified as MP import Unison.Server.Orphans () import Unison.ShortHash (ShortHash) import Unison.ShortHash qualified as SH From b0fb139c8b38e8000c84178c6648a0a864828a87 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 12:48:31 -0700 Subject: [PATCH 36/48] Fix up some sql --- Queries.hs | 277 ------------------ docker/docker-compose.yml | 10 +- share-api.cabal | 1 + src/Share/Postgres/Hashes/Queries.hs | 31 -- src/Share/Postgres/Projects/Queries.hs | 69 +++-- src/Share/Postgres/Releases/Queries.hs | 30 ++ .../Search/DefinitionSearch/Queries.hs | 31 +- src/Share/Web/Share/DefinitionSearch.hs | 12 +- src/Share/Web/Share/Impl.hs | 13 +- 9 files changed, 112 insertions(+), 362 deletions(-) delete mode 100644 Queries.hs create mode 100644 src/Share/Postgres/Releases/Queries.hs diff --git a/Queries.hs b/Queries.hs deleted file mode 100644 index 2ef9049..0000000 --- a/Queries.hs +++ /dev/null @@ -1,277 +0,0 @@ -{-# LANGUAGE TypeOperators #-} - -module Share.Postgres.Search.DefinitionSearch.Queries - ( submitReleaseToBeSynced, - claimUnsyncedRelease, - insertDefinitionDocuments, - cleanIndexForRelease, - defNameSearch, - definitionSearch, - DefnNameSearchFilter (..), - ) -where - -import Control.Lens -import Data.Aeson (fromJSON) -import Data.Aeson qualified as Aeson -import Data.Foldable qualified as Foldable -import Data.Set qualified as Set -import Data.Text qualified as Text -import Hasql.Interpolate qualified as Hasql -import Servant (ServerError (..)) -import Servant.Server (err500) -import Share.BackgroundJobs.Search.DefinitionSync.Types -import Share.IDs (ProjectId, ReleaseId, UserId) -import Share.Postgres -import Share.Prelude -import Share.Utils.API (Limit, Query (Query)) -import Share.Utils.Logging qualified as Logging -import Share.Web.Errors qualified as Errors -import Unison.DataDeclaration qualified as DD -import Unison.Name (Name) -import Unison.Name qualified as Name -import Unison.NameSegment (NameSegment) -import Unison.Server.Types (TermTag (..), TypeTag (..)) -import Unison.ShortHash (ShortHash) -import Unison.Syntax.Name qualified as Name -import Unison.Syntax.NameSegment qualified as NameSegment - -data DefinitionSearchError = FailedToDecodeMetadata Aeson.Value Text - deriving stock (Show, Eq, Ord) - -instance Errors.ToServerError DefinitionSearchError where - toServerError = \case - FailedToDecodeMetadata _v _err -> (Errors.ErrorID "invalid-definition-search-metadata", err500 {errBody = "Internal Server Error"}) - -instance Logging.Loggable DefinitionSearchError where - toLog = \case - FailedToDecodeMetadata v err -> - Logging.textLog ("Failed to decode metadata: " <> tShow v <> " " <> err) - & Logging.withSeverity Logging.Error - -submitReleaseToBeSynced :: ReleaseId -> Transaction e () -submitReleaseToBeSynced releaseId = do - execute_ - [sql| - INSERT INTO global_definition_search_release_queue (id) - VALUES (#{releaseId}) - |] - --- | Claim the oldest unsynced release to be indexed. -claimUnsyncedRelease :: Transaction e (Maybe ReleaseId) -claimUnsyncedRelease = do - query1Col - [sql| - WITH chosen_release(release_id) AS ( - SELECT q.release_id - FROM global_definition_search_release_queue q - ORDER BY q.created_at ASC - LIMIT 1 - -- Skip any that are being synced by other workers. - FOR UPDATE SKIP LOCKED - ) - DELETE FROM global_definition_search_release_queue - USING chosen_release - WHERE global_definition_search_release_queue.release_id = chosen_release.release_id - RETURNING chosen_release.release_id - |] - --- | Save definition documents to be indexed for search. -insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash)] -> Transaction e () -insertDefinitionDocuments docs = pipelined $ do - let docsTable = docRow <$> docs - for_ docsTable \(projectId, releaseId, fqn, tokens, arity, tag, metadata) -> do - -- Ideally we'd do a bulk insert, but Hasql doesn't provide any method for passing arrays of - -- arrays as parameters, so instead we insert each record individually so we can use our - -- only level of array-ness for the tokens. - execute_ $ - [sql| - INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, arity, tag, metadata) - VALUES (#{projectId}, #{releaseId}, #{fqn}, array_to_tsvector(#{tokens}), #{arity}, #{tag}::definition_tag, #{metadata}::jsonb) - ON CONFLICT DO NOTHING - |] - where - docRow :: DefinitionDocument ProjectId ReleaseId Name (NameSegment, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Int32, TermOrTypeTag, Hasql.Jsonb) - docRow DefinitionDocument {project, release, fqn, tokens, arity, tag, metadata} = - ( project, - release, - Name.toText fqn, - foldMap searchTokenToText $ Set.toList tokens, - fromIntegral arity, - tag, - Hasql.Jsonb $ Aeson.toJSON metadata - ) - --- | Wipe out any rows for the given release, useful when re-indexing. -cleanIndexForRelease :: ReleaseId -> Transaction e () -cleanIndexForRelease releaseId = do - execute_ - [sql| - DELETE FROM global_definition_search_docs - WHERE release_id = #{releaseId} - |] - --- | Convert a search token to a TSVector. --- --- --- Note: Names in tokens have their segments reversed, this is because PG Gin indexes only support --- prefix-matching on lexemes, and this lets us match on any valid name suffix. --- This is also why Hashes and Names come LAST in the token, so we can do, e.g. `mn,1,map.List:*` --- --- >>> import Unison.Syntax.Name qualified as Name --- >>> searchTokenToText (NameToken (Name.unsafeParseText "my.cool.name")) --- ["n,my.cool.name"] --- --- >>> import Unison.ShortHash qualified as SH --- >>> import Data.Maybe (fromJust) --- >>> searchTokenToText (TypeMentionToken (NameSegment.unsafeParseText "Thing", fromJust $ SH.fromText "#2tWjVAuc7") (Just $ Occurrence 1)) --- ["mn,1,Thing","mh,1,#2tWjVAuc7"] --- --- >>> searchTokenToText (TypeMentionToken (NameSegment.unsafeParseText "Thing", fromJust $ SH.fromText "#2tWjVAuc7") Nothing) --- ["mn,r,Thing","mh,r,#2tWjVAuc7"] --- --- >>> searchTokenToText (TypeVarToken (VarId 1) (Just $ Occurrence 1)) --- ["v,1,1"] --- --- >>> searchTokenToText (TermTagToken Doc) --- ["t,doc"] --- >>> searchTokenToText (TermTagToken (Constructor Data)) --- ["t,data-con"] --- >>> searchTokenToText (TypeTagToken Data) --- ["t,data"] -searchTokenToText :: DefnSearchToken (NameSegment, ShortHash) -> [Text] -searchTokenToText = \case - NameToken name -> [makeSearchToken nameType (reversedNameText name) Nothing] - TypeMentionToken (ns, typeRef) occ -> - [ makeSearchToken typeMentionTypeByNameType (NameSegment.toEscapedText ns) (Just occ), - makeSearchToken typeMentionTypeByHashType (into @Text @ShortHash typeRef) (Just occ) - ] - TypeVarToken varId occ -> [makeSearchToken typeVarType (varIdText varId) (Just occ)] - HashToken sh -> [makeSearchToken hashType (into @Text sh) Nothing] - TermTagToken termTag -> [makeSearchToken tagType (termTagText termTag) Nothing] - TypeTagToken typTag -> [makeSearchToken tagType (typeTagText typTag) Nothing] - TypeModToken mod -> [makeSearchToken typeModType (typeModText mod) Nothing] - where - reversedNameText :: Name -> Text - reversedNameText n = Text.intercalate "." $ Foldable.toList $ fmap NameSegment.toEscapedText $ Name.reverseSegments n - typeModText = \case - DD.Structural -> "structural" - DD.Unique {} -> "unique" - varIdText :: VarId -> Text - varIdText (VarId n) = tShow n - termTagText :: TermTag -> Text - termTagText = \case - Doc -> "doc" - Test -> "test" - Plain -> "plain" - Constructor typeTag -> typeTagText typeTag <> "-con" - typeTagText :: TypeTag -> Text - typeTagText = \case - Data -> "data" - Ability -> "ability" - nameType :: Text - nameType = "n" - typeMentionTypeByNameType :: Text - typeMentionTypeByNameType = "mn" - typeMentionTypeByHashType :: Text - typeMentionTypeByHashType = "mh" - typeVarType :: Text - typeVarType = "v" - hashType :: Text - hashType = "h" - tagType :: Text - tagType = "t" - typeModType :: Text - typeModType = "mod" - makeSearchToken :: Text -> Text -> Maybe (Maybe Occurrence) -> Text - makeSearchToken kind txt occTxt = do - let occ = case occTxt of - Just (Just (Occurrence n)) -> [tShow n] - Just Nothing -> ["r"] - Nothing -> [] - in Text.intercalate "," $ - [kind] <> occ <> [Text.replace "," "" txt] - -searchTokensToTsQuery :: Set (DefnSearchToken (Either NameSegment ShortHash)) -> Text -searchTokensToTsQuery tokens = - Text.intercalate " & " $ searchTokenToText <$> Set.toList tokens - -data DefnNameSearchFilter - = ProjectFilter ProjectId - | ReleaseFilter ReleaseId - | UserFilter UserId - -defNameSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Query -> Limit -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeTag)] -defNameSearch mayCaller mayFilter (Query query) limit = do - let filters = case mayFilter of - Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |] - Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |] - Just (UserFilter userId) -> [sql| AND p.owner_id = #{userId} |] - Nothing -> mempty - queryListRows @(ProjectId, ReleaseId, Name, TermOrTypeTag) - [sql| - WITH matches_deduped_by_project(project_id, release_id, name, tag) AS ( - SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.tag FROM global_definition_search_docs doc - JOIN projects p ON p.id = doc.project_id - JOIN project_releases r ON r.id = doc.release_id - WHERE - -- Search name by a trigram 'word similarity' - -- which will match if the query is similar to any 'word' (e.g. name segment) - -- in the name. - #{query} <% doc.name - AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects pp WHERE pp.user_id = #{mayCaller} AND pp.project_id = p.id))) - ^{filters} - ORDER BY doc.project_id, doc.name, r.major_version, r.minor_version, r.patch_version - ), - -- Find the best matches - best_results(project_id, release_id, name, tag) AS ( - SELECT m.project_id, m.release_id, m.name, m.tag - FROM matches_deduped_by_project m - ORDER BY similarity(#{query}, m.name) DESC - LIMIT #{limit} - ) - -- THEN sort docs to the bottom. - SELECT br.project_id, br.release_id, br.name, br.tag - FROM best_results br - -- docs and tests to the bottom, but otherwise sort by quality of the match. - ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC - |] - -definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either NameSegment ShortHash)) -> Maybe Int -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeSummary)] -definitionSearch mayCaller mayFilter limit searchTokens preferredArity = do - let filters = case mayFilter of - Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |] - Just (ReleaseFilter relId) -> [sql| AND doc.release_id = #{relId} |] - Just (UserFilter userId) -> [sql| AND p.owner_id = #{userId} |] - Nothing -> mempty - rows <- - queryListRows @(ProjectId, ReleaseId, Name, Hasql.Jsonb) - [sql| - WITH matches_deduped_by_project(project_id, release_id, name, metadata) AS ( - SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.metadata FROM global_definition_search_docs doc - JOIN projects p ON p.id = doc.project_id - JOIN project_releases r ON r.id = doc.release_id - WHERE - -- match on search tokens using GIN index. - plainto_tsquery(#{searchTokenText}) @@ doc.name - AND (NOT p.private OR (#{mayCaller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects pp WHERE pp.user_id = #{mayCaller} AND pp.project_id = p.id))) - ^{filters} - ORDER BY doc.project_id, doc.name, r.major_version, r.minor_version, r.patch_version - ), - -- Find the best matches - best_results(project_id, release_id, name, tag) AS ( - SELECT m.project_id, m.release_id, m.name, m.tag - FROM matches_deduped_by_project m - ORDER BY similarity(#{query}, m.name) DESC - LIMIT #{limit} - ) - -- THEN sort docs to the bottom. - SELECT br.project_id, br.release_id, br.name, br.tag - FROM best_results br - -- docs and tests to the bottom, but otherwise sort by quality of the match. - ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC - |] - rows & traverseOf (traversed . _4) \(Hasql.Jsonb v) -> do - case fromJSON v of - Aeson.Error err -> unrecoverableError $ FailedToDecodeMetadata v (Text.pack err) - Aeson.Success summary -> pure summary diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index f92ce9b..06a14a9 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -17,10 +17,10 @@ services: POSTGRES_PASSWORD: sekrit volumes: - ../sql:/docker-entrypoint-initdb.d - # # Optionally persist the data between container invocations - # - postgresVolume:/var/lib/postgresql/data + # Optionally persist the data between container invocations + - postgresVolume:/var/lib/postgresql/data - ./postgresql.conf:/etc/postgresql/postgresql.conf - command: postgres -c config_file=/etc/postgresql/postgresql.conf # -c log_statement=all + command: postgres -c config_file=/etc/postgresql/postgresql.conf -c log_statement=all redis: @@ -85,5 +85,5 @@ services: - redis - postgres -# volumes: -# postgresVolume: +volumes: + postgresVolume: diff --git a/share-api.cabal b/share-api.cabal index ae7ea97..90bcff0 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -70,6 +70,7 @@ library Share.Postgres.Projects.Queries Share.Postgres.Queries Share.Postgres.Refs.Types + Share.Postgres.Releases.Queries Share.Postgres.Search.DefinitionSearch.Queries Share.Postgres.Serialization Share.Postgres.Sync.Conversions diff --git a/src/Share/Postgres/Hashes/Queries.hs b/src/Share/Postgres/Hashes/Queries.hs index 105cc16..3f5b5ea 100644 --- a/src/Share/Postgres/Hashes/Queries.hs +++ b/src/Share/Postgres/Hashes/Queries.hs @@ -338,34 +338,3 @@ expectCausalIdByHash :: (HasCallStack) => CausalHash -> Codebase.CodebaseM e Cau expectCausalIdByHash causalHash = do loadCausalIdByHash causalHash `whenNothingM` unrecoverableError (MissingExpectedEntity $ "Expected causal id for hash: " <> tShow causalHash) - --- | TODO: - ----- | Expands shorthashes to their full form (still as a ShortHash) ----- ----- Note: Be wary, this could cause hashes which are unknown to a user to leak if they're not --- expandShortHashesOf :: (HasCallStack) => Traversal s t ShortHash ShortHash -> s -> CodebaseM e t --- expandShortHashesOf trav s = do --- codebaseOwner <- asks Codebase.codebaseOwner --- s --- & unsafePartsOf (trav . prefix_) %%~ \shortHashes -> do --- let numberedShortHashes = zip [0 :: Int32 ..] shortHashes --- results :: [Text] <- --- queryListCol --- [sql| --- WITH hash_prefixes(ord, prefix) AS ( --- SELECT * FROM ^{toTable numberedShortHashes} --- ) --- SELECT sh.prefix --- FROM hash_prefixes sh --- JOIN short_hashes sh2 ON sh.prefix = sh2.prefix --- ORDER BY sh.ord ASC --- |] --- if length results /= length shortHashes --- then error "expandShortHashesOf: Missing expected short hash" --- else pure results --- where --- prefix_ :: Traversal' ShortHash Text --- prefix_ f = \case --- SH.Builtin b -> pure $ SH.Builtin b --- SH.ShortHash p c i -> SH.ShortHash <$> f p <*> pure c <*> pure i diff --git a/src/Share/Postgres/Projects/Queries.hs b/src/Share/Postgres/Projects/Queries.hs index 546a4c1..2b56178 100644 --- a/src/Share/Postgres/Projects/Queries.hs +++ b/src/Share/Postgres/Projects/Queries.hs @@ -5,30 +5,31 @@ module Share.Postgres.Projects.Queries listProjectMaintainers, addMaintainers, updateMaintainers, + expectProjectShortHandsOf, ) where import Control.Lens import Control.Monad.Except (MonadError (..), runExceptT) import Share.IDs -import Share.Postgres qualified as PG +import Share.Postgres import Share.Prelude import Share.Web.Authorization.Types (ProjectMaintainerPermissions (..)) import Share.Web.Share.Projects.Types (Maintainer (..)) -isPremiumProject :: ProjectId -> PG.Transaction e Bool +isPremiumProject :: ProjectId -> Transaction e Bool isPremiumProject projId = fromMaybe False <$> do - PG.query1Col - [PG.sql| + query1Col + [sql| SELECT EXISTS (SELECT FROM premium_projects WHERE project_id = #{projId}) |] -listProjectMaintainers :: ProjectId -> PG.Transaction e [Maintainer UserId] +listProjectMaintainers :: ProjectId -> Transaction e [Maintainer UserId] listProjectMaintainers projId = do results <- - PG.queryListRows @(UserId, Bool, Bool, Bool) - [PG.sql| + queryListRows @(UserId, Bool, Bool, Bool) + [sql| SELECT pm.user_id, pm.can_view, pm.can_maintain, pm.can_admin FROM project_maintainers pm WHERE pm.project_id = #{projId} @@ -40,16 +41,16 @@ listProjectMaintainers projId = do in Maintainer {user = userId, permissions} & pure -addMaintainers :: ProjectId -> [Maintainer UserId] -> PG.Transaction e (Either [UserId] [Maintainer UserId]) +addMaintainers :: ProjectId -> [Maintainer UserId] -> Transaction e (Either [UserId] [Maintainer UserId]) addMaintainers projId maintainers = runExceptT $ do let userIds = fmap user maintainers -- Check if any of the maintainers already exist alreadyExistingUserIds <- lift $ - PG.queryListCol @UserId - [PG.sql| + queryListCol @UserId + [sql| WITH values(user_id) AS ( - SELECT * FROM ^{PG.singleColumnTable userIds} + SELECT * FROM ^{singleColumnTable userIds} ) SELECT values.user_id FROM values WHERE EXISTS (SELECT FROM project_maintainers pm @@ -64,26 +65,26 @@ addMaintainers projId maintainers = runExceptT $ do let newMaintainersTable = maintainers <&> \Maintainer {user, permissions = ProjectMaintainerPermissions {canView, canMaintain, canAdmin}} -> (projId, user, canView, canMaintain, canAdmin) -- Insert the maintainers lift $ - PG.execute_ - [PG.sql| + execute_ + [sql| WITH values(project_id, user_id, can_view, can_maintain, can_admin) AS ( - SELECT * FROM ^{PG.toTable newMaintainersTable} + SELECT * FROM ^{toTable newMaintainersTable} ) INSERT INTO project_maintainers (project_id, user_id, can_view, can_maintain, can_admin) SELECT v.project_id, v.user_id, v.can_view, v.can_maintain, v.can_admin FROM values v |] lift $ listProjectMaintainers projId -updateMaintainers :: ProjectId -> [Maintainer UserId] -> PG.Transaction e (Either [UserId] [Maintainer UserId]) +updateMaintainers :: ProjectId -> [Maintainer UserId] -> Transaction e (Either [UserId] [Maintainer UserId]) updateMaintainers projId maintainers = runExceptT $ do let userIds = fmap user maintainers -- Check if any of the maintainers don't already exist missingUserIds <- lift $ - PG.queryListCol @UserId - [PG.sql| + queryListCol @UserId + [sql| WITH values(user_id) AS ( - SELECT * FROM ^{PG.singleColumnTable userIds} + SELECT * FROM ^{singleColumnTable userIds} ) SELECT values.user_id FROM values WHERE NOT EXISTS (SELECT FROM project_maintainers pm @@ -97,10 +98,10 @@ updateMaintainers projId maintainers = runExceptT $ do [] -> do let updatedMaintainersTable = maintainers <&> \Maintainer {user, permissions = ProjectMaintainerPermissions {canView, canMaintain, canAdmin}} -> (projId, user, canView, canMaintain, canAdmin) lift $ - PG.execute_ - [PG.sql| + execute_ + [sql| WITH values(project_id, user_id, can_view, can_maintain, can_admin) AS ( - SELECT * FROM ^{PG.toTable updatedMaintainersTable} + SELECT * FROM ^{toTable updatedMaintainersTable} ) UPDATE project_maintainers SET can_view = v.can_view, can_maintain = v.can_maintain, can_admin = v.can_admin FROM values v @@ -109,8 +110,8 @@ updateMaintainers projId maintainers = runExceptT $ do |] -- Delete any maintainers that have no permissions lift $ - PG.execute_ - [PG.sql| + execute_ + [sql| DELETE FROM project_maintainers pm WHERE pm.project_id = #{projId} AND pm.can_view = false @@ -118,3 +119,25 @@ updateMaintainers projId maintainers = runExceptT $ do AND pm.can_admin = false |] lift $ listProjectMaintainers projId + +expectProjectShortHandsOf :: Traversal s t ProjectId ProjectShortHand -> s -> Transaction e t +expectProjectShortHandsOf trav s = do + s + & unsafePartsOf trav %%~ \projIds -> do + let numberedProjIds = zip [1 :: Int32 ..] projIds + results :: [ProjectShortHand] <- + queryListRows @(UserHandle, ProjectSlug) + [sql| + WITH proj_ids(ord, id) AS ( + SELECT * FROM ^{toTable numberedProjIds} + ) + SELECT u.handle, p.slug + FROM proj_ids JOIN projects p ON proj_ids.id = p.id + JOIN users u ON p.owner_user_id = u.id + ORDER BY proj_ids.ord ASC + |] + <&> fmap \(userHandle, projectSlug) -> ProjectShortHand {userHandle, projectSlug} + + if length results /= length projIds + then error "expectProjectShortHandsOf: Missing expected project short hand" + else pure results diff --git a/src/Share/Postgres/Releases/Queries.hs b/src/Share/Postgres/Releases/Queries.hs new file mode 100644 index 0000000..11df966 --- /dev/null +++ b/src/Share/Postgres/Releases/Queries.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE TypeOperators #-} + +module Share.Postgres.Releases.Queries + ( expectReleaseVersionsOf, + ) +where + +import Control.Lens +import Share.IDs +import Share.Postgres +import Share.Prelude + +expectReleaseVersionsOf :: Traversal s t ReleaseId ReleaseVersion -> s -> Transaction e t +expectReleaseVersionsOf trav s = do + s + & unsafePartsOf trav %%~ \releaseIds -> do + let numberedReleaseIds = zip [1 :: Int32 ..] releaseIds + results :: [ReleaseVersion] <- + queryListRows @ReleaseVersion + [sql| + WITH release_ids(ord, id) AS ( + SELECT * FROM ^{toTable numberedReleaseIds} + ) + SELECT r.major_version, r.minor_version, r.patch_version + FROM release_ids JOIN project_releases r ON release_ids.id = r.id + ORDER BY release_ids.ord ASC + |] + if length results /= length releaseIds + then error "expectReleaseVersionsOf: Missing expected release version" + else pure results diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index a9bc249..2a9bb09 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -79,18 +79,17 @@ claimUnsyncedRelease = do insertDefinitionDocuments :: [DefinitionDocument ProjectId ReleaseId Name (Name, ShortHash)] -> Transaction e () insertDefinitionDocuments docs = pipelined $ do let docsTable = docRow <$> docs - for_ docsTable \(projectId, releaseId, fqn, tokens, arity, tag, metadata) -> do - -- Ideally we'd do a bulk insert, but Hasql doesn't provide any method for passing arrays of - -- arrays as parameters, so instead we insert each record individually so we can use our - -- only level of array-ness for the tokens. - execute_ $ - [sql| - INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, arity, tag, metadata) - VALUES (#{projectId}, #{releaseId}, #{fqn}, array_to_tsvector(#{tokens}), #{arity}, #{tag}::definition_tag, #{metadata}::jsonb) + execute_ $ + [sql| + WITH docs(project_id, release_id, name, token_text, arity, tag, metadata) AS ( + SELECT * FROM ^{toTable docsTable} + ) INSERT INTO global_definition_search_docs (project_id, release_id, name, search_tokens, arity, tag, metadata) + SELECT d.project_id, d.release_id, d.name, tsvector(d.token_text::text), d.arity, d.tag::definition_tag, d.metadata + FROM docs d ON CONFLICT DO NOTHING |] where - docRow :: DefinitionDocument ProjectId ReleaseId Name (Name, ShortHash) -> (ProjectId, ReleaseId, Text, [Text], Arity, TermOrTypeTag, Hasql.Jsonb) + docRow :: DefinitionDocument ProjectId ReleaseId Name (Name, ShortHash) -> (ProjectId, ReleaseId, Text, Text, Arity, TermOrTypeTag, Hasql.Jsonb) docRow DefinitionDocument {project, release, fqn, tokens, arity, tag, metadata} = let expandedTokens :: [DefnSearchToken (Either Name ShortHash)] expandedTokens = @@ -105,7 +104,7 @@ insertDefinitionDocuments docs = pipelined $ do in ( project, release, Name.toText fqn, - searchTokenToText False <$> expandedTokens, + Text.unwords (searchTokenToText False <$> expandedTokens), arity, tag, Hasql.Jsonb $ Aeson.toJSON metadata @@ -145,8 +144,8 @@ cleanIndexForRelease releaseId = do -- >>> searchTokenToText False (TypeMentionToken (Right $ fromJust $ SH.fromText "#2tWjVAuc7") Nothing) -- "mh,r,#2tWjVAuc7" -- --- >>> searchTokenToText False (TypeVarToken (VarId 1) (Just $ Occurrence 1)) --- "v,1,1" +-- >>> searchTokenToText False (TypeVarToken (VarId 1) (Just $ Occurrence 2)) +-- "v,2,1" -- -- >>> searchTokenToText False (TermTagToken Doc) -- "t,doc" @@ -290,7 +289,7 @@ defNameSearch mayCaller mayFilter (Query query) limit = do ORDER BY (br.tag <> 'doc'::definition_tag, br.tag <> 'test'::definition_tag, br.name LIKE ('%' || like_escape(#{query})), similarity(#{query}, br.name)) DESC |] -definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either Name ShortHash)) -> Maybe Int32 -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeSummary)] +definitionSearch :: Maybe UserId -> Maybe DefnNameSearchFilter -> Limit -> Set (DefnSearchToken (Either Name ShortHash)) -> Maybe Arity -> Transaction e [(ProjectId, ReleaseId, Name, TermOrTypeSummary)] definitionSearch mayCaller mayFilter limit searchTokens preferredArity = do let filters = case mayFilter of Just (ProjectFilter projId) -> [sql| AND doc.project_id = #{projId} |] @@ -301,8 +300,8 @@ definitionSearch mayCaller mayFilter limit searchTokens preferredArity = do rows <- queryListRows @(ProjectId, ReleaseId, Name, Hasql.Jsonb) [sql| - WITH matches_deduped_by_project(project_id, release_id, name, arity, metadata) AS ( - SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.arity, doc.metadata FROM global_definition_search_docs doc + WITH matches_deduped_by_project(project_id, release_id, name, arity, metadata, num_search_tokens) AS ( + SELECT DISTINCT ON (doc.project_id, doc.name) doc.project_id, doc.release_id, doc.name, doc.arity, doc.metadata, length(doc.search_tokens) FROM global_definition_search_docs doc JOIN projects p ON p.id = doc.project_id JOIN project_releases r ON r.id = doc.release_id WHERE @@ -315,7 +314,7 @@ definitionSearch mayCaller mayFilter limit searchTokens preferredArity = do FROM matches_deduped_by_project m -- prefer results which have at LEAST the requested arity, then prefer shorter -- arities. - ORDER BY (m.arity >= #{preferredArity}) DESC, m.arity ASC + ORDER BY (m.arity >= #{preferredArity}) DESC, m.arity ASC, m.num_search_tokens ASC LIMIT #{limit} |] rows & traverseOf (traversed . _4) \(Hasql.Jsonb v) -> do diff --git a/src/Share/Web/Share/DefinitionSearch.hs b/src/Share/Web/Share/DefinitionSearch.hs index d85006b..2d98946 100644 --- a/src/Share/Web/Share/DefinitionSearch.hs +++ b/src/Share/Web/Share/DefinitionSearch.hs @@ -10,14 +10,12 @@ import Data.Map.Monoidal qualified as MonMap import Data.Monoid (Sum (..)) import Data.Set qualified as Set import Data.Text qualified as Text -import Share.BackgroundJobs.Search.DefinitionSync.Types (DefnSearchToken (..), Occurrence, VarId (..)) +import Share.BackgroundJobs.Search.DefinitionSync.Types (Arity, DefnSearchToken (..), Occurrence, VarId (..)) import Share.Prelude import Text.Megaparsec qualified as MP import Text.Megaparsec.Char qualified as MP import Text.Megaparsec.Char.Lexer qualified as MP hiding (space) import Unison.Name (Name) -import Unison.Name qualified as Name -import Unison.NameSegment (NameSegment) import Unison.ShortHash (ShortHash) import Unison.ShortHash qualified as SH import Unison.Syntax.Name qualified as Name @@ -92,7 +90,7 @@ type P = MP.Parsec QueryError Text -- Horribly mishapen query: -- >>> queryToTokens "[{ &Text !{𝕖} (Optional)" -- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Optional"})) (Just 1),TypeMentionToken (Left (NameSegment {toUnescapedText = "Text"})) (Just 1)],Nothing) -queryToTokens :: Text -> Either Text (Set (DefnSearchToken (Either NameSegment ShortHash)), Maybe Int) +queryToTokens :: Text -> Either Text (Set (DefnSearchToken (Either Name ShortHash)), Maybe Arity) queryToTokens query = let cleanQuery = query @@ -115,7 +113,7 @@ queryToTokens query = & foldMap \case (HashMention hash, _occ) -> ([HashToken hash], []) (NameMention name, _occ) -> ([NameToken name], []) - (TypeNameMention name, occ) -> ([TypeMentionToken (Left $ Name.lastSegment name) $ Just occ], []) + (TypeNameMention name, occ) -> ([TypeMentionToken (Left name) $ Just occ], []) (TypeVarMention var, occ) -> ([], [(var, occ)]) -- Normalize type vars so varIds are sorted according to number of occurences. @@ -131,7 +129,7 @@ queryToTokens query = else Just n in Right (Set.fromList $ hashAndNameTokens <> normalizedTypeVarTokens, arity) -queryParser :: P (Maybe (Sum Int), MonoidalMap MentionRef Occurrence) +queryParser :: P (Maybe (Sum Arity), MonoidalMap MentionRef Occurrence) queryParser = do MP.choice [ (Nothing,) <$> MP.try simpleHashQueryP, @@ -158,7 +156,7 @@ simpleNameQueryP = do pure $ MonMap.singleton (NameMention name) 1 -- | Parse a type query, returning the arity of the top-level type -typeQueryP :: P (Sum Int, MonoidalMap MentionRef Occurrence) +typeQueryP :: P (Sum Arity, MonoidalMap MentionRef Occurrence) typeQueryP = do _ <- optional $ lexeme (MP.char ':') fmap fold . many $ do diff --git a/src/Share/Web/Share/Impl.hs b/src/Share/Web/Share/Impl.hs index 886fefa..b2dc1f2 100644 --- a/src/Share/Web/Share/Impl.hs +++ b/src/Share/Web/Share/Impl.hs @@ -6,8 +6,8 @@ module Share.Web.Share.Impl where +import Control.Lens import Servant -import Share.BackgroundJobs.Search.DefinitionSync.Types (DefinitionDocument (..)) import Share.Codebase qualified as Codebase import Share.Codebase.Types qualified as Codebase import Share.IDs (TourId, UserHandle) @@ -18,7 +18,9 @@ import Share.OAuth.Types (UserId) import Share.Postgres qualified as PG import Share.Postgres.IDs (CausalHash) import Share.Postgres.Ops qualified as PGO +import Share.Postgres.Projects.Queries qualified as PQ import Share.Postgres.Queries qualified as Q +import Share.Postgres.Releases.Queries qualified as RQ import Share.Postgres.Search.DefinitionSearch.Queries qualified as DDQ import Share.Postgres.Users.Queries qualified as UsersQ import Share.Prelude @@ -404,9 +406,14 @@ searchDefinitionsEndpoint callerUserId (Query query) mayLimit userFilter project Logging.logErrorText $ "Failed to parse query: " <> query pure $ DefinitionSearchResults [] Right (searchTokens, mayArity) -> do - matches <- PG.runTransaction $ DDQ.definitionSearch callerUserId filter limit searchTokens mayArity + matches <- + PG.runTransaction $ + DDQ.definitionSearch callerUserId filter limit searchTokens mayArity + >>= PQ.expectProjectShortHandsOf (traversed . _1) + >>= RQ.expectReleaseVersionsOf (traversed . _2) + <&> over (traversed . _2) IDs.ReleaseShortHand let results = - matches <&> \DefinitionDocument {fqn, metadata = summary, project, release} -> + matches <&> \(project, release, fqn, summary) -> DefinitionSearchResult { fqn, summary, From 93575b2e2793d1bfa564e27618637166c1539fd3 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 13:18:49 -0700 Subject: [PATCH 37/48] Submit release to be synced when created --- src/Share/Postgres/Queries.hs | 8 ++++++-- src/Share/Postgres/Search/DefinitionSearch/Queries.hs | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Share/Postgres/Queries.hs b/src/Share/Postgres/Queries.hs index 93a9400..97ea928 100644 --- a/src/Share/Postgres/Queries.hs +++ b/src/Share/Postgres/Queries.hs @@ -26,6 +26,7 @@ import Share.Postgres qualified as PG import Share.Postgres.IDs import Share.Postgres.LooseCode.Queries qualified as LCQ import Share.Postgres.NameLookups.Types (NameLookupReceipt) +import Share.Postgres.Search.DefinitionSearch.Queries qualified as DDQ import Share.Prelude import Share.Project import Share.Release @@ -720,8 +721,9 @@ createRelease :: UserId -> m (Release CausalId UserId) createRelease !_nlReceipt projectId ReleaseVersion {major, minor, patch} squashedCausalId unsquashedCausalId creatorId = do - PG.queryExpect1Row - [PG.sql| + release@Release {releaseId} <- + PG.queryExpect1Row + [PG.sql| INSERT INTO project_releases( project_id, created_by, @@ -748,6 +750,8 @@ createRelease !_nlReceipt projectId ReleaseVersion {major, minor, patch} squashe minor_version, patch_version |] + DDQ.submitReleaseToBeSynced releaseId + pure release setBranchCausalHash :: NameLookupReceipt -> diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index 2a9bb09..7efc220 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -48,7 +48,7 @@ instance Logging.Loggable DefinitionSearchError where Logging.textLog ("Failed to decode metadata: " <> tShow v <> " " <> err) & Logging.withSeverity Logging.Error -submitReleaseToBeSynced :: ReleaseId -> Transaction e () +submitReleaseToBeSynced :: (QueryM m) => ReleaseId -> m () submitReleaseToBeSynced releaseId = do execute_ [sql| From c7d0d7395f6992af3701495493684bda0b849c86 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 13:18:49 -0700 Subject: [PATCH 38/48] ReadCommitted for definition searches, no need to wait for locking --- src/Share/BackgroundJobs/Search/DefinitionSync.hs | 10 ++-------- src/Share/Web/Share/Impl.hs | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 5526b2c..3fa30ec 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -30,9 +30,8 @@ import Share.Postgres.NameLookups.Types qualified as NL import Share.Postgres.Queries qualified as PG import Share.Postgres.Search.DefinitionSearch.Queries qualified as DDQ import Share.Prelude -import Share.Project (Project (..), ProjectVisibility (..)) +import Share.Project (Project (..)) import Share.Release (Release (..)) -import Share.User (User (..), UserVisibility (..)) import Share.Utils.Logging qualified as Logging import Share.Web.Authorization qualified as AuthZ import U.Codebase.Referent (Referent) @@ -91,12 +90,7 @@ syncRelease authZReceipt releaseId = fmap (fromMaybe []) . runMaybeT $ do -- Wipe out any existing rows for this release. Normally there should be none, but this -- makes it easy to re-index later if we change how we tokenize things. lift $ DDQ.cleanIndexForRelease releaseId - Project {ownerUserId, visibility = projectVis} <- lift $ PG.expectProjectById projectId - User {visibility = userVis} <- PG.expectUserByUserId ownerUserId - -- Don't sync private projects - guard $ projectVis == ProjectPublic - -- Don't sync private users - guard $ userVis == UserPublic + Project {ownerUserId} <- lift $ PG.expectProjectById projectId Debug.debugM Debug.Temp "Syncing release" releaseId lift $ do bhId <- HashQ.expectNamespaceIdsByCausalIdsOf id squashedCausal diff --git a/src/Share/Web/Share/Impl.hs b/src/Share/Web/Share/Impl.hs index b2dc1f2..ad7e69a 100644 --- a/src/Share/Web/Share/Impl.hs +++ b/src/Share/Web/Share/Impl.hs @@ -407,7 +407,7 @@ searchDefinitionsEndpoint callerUserId (Query query) mayLimit userFilter project pure $ DefinitionSearchResults [] Right (searchTokens, mayArity) -> do matches <- - PG.runTransaction $ + PG.runTransactionMode PG.ReadCommitted PG.Read $ DDQ.definitionSearch callerUserId filter limit searchTokens mayArity >>= PQ.expectProjectShortHandsOf (traversed . _1) >>= RQ.expectReleaseVersionsOf (traversed . _2) From 6fe4e4b07395665f3c520efb75899edcd1d4330c Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 13:18:49 -0700 Subject: [PATCH 39/48] Use separate docker-compose file for transcripts --- .github/workflows/ci.yaml | 2 +- docker/transcripts.docker-compose.yml | 84 +++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 docker/transcripts.docker-compose.yml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 07acb0a..9fbef81 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -247,7 +247,7 @@ jobs: docker volume rm docker_postgresVolume 2>/dev/null || true # Start share and it's dependencies in the background - docker compose -f docker/docker-compose.yml up --wait + docker compose -f docker/transcripts.docker-compose.yml up --wait # Run the transcript tests zsh ./transcripts/run-transcripts.zsh diff --git a/docker/transcripts.docker-compose.yml b/docker/transcripts.docker-compose.yml new file mode 100644 index 0000000..3d7bdc2 --- /dev/null +++ b/docker/transcripts.docker-compose.yml @@ -0,0 +1,84 @@ +version: "3.9" + +services: + postgres: + image: postgres:15.4 + container_name: postgres + restart: always + healthcheck: + # Ensure the database is up, and the tables are initialized + test: ["CMD", "psql", "-U", "postgres", "-c", "SELECT from users;"] + interval: 3s + timeout: 10s + retries: 5 + ports: + - "5432:5432" + environment: + POSTGRES_PASSWORD: sekrit + volumes: + - ../sql:/docker-entrypoint-initdb.d + - ./postgresql.conf:/etc/postgresql/postgresql.conf + command: postgres -c config_file=/etc/postgresql/postgresql.conf -c log_statement=all + + + redis: + image: redis:6.2.6 + container_name: redis + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 3s + timeout: 10s + retries: 3 + ports: + - "6379:6379" + + share: + image: share-api + container_name: share-api + depends_on: + - redis + - postgres + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5424/health"] + interval: 3s + timeout: 10s + retries: 3 + ports: + - "5424:5424" + - "5425:5425" + - "5426:5426" + + environment: + # Placeholder values for development + - SHARE_API_ORIGIN=http://share-api + - SHARE_SERVER_PORT=5424 + - SHARE_ADMIN_PORT=5425 + - SHARE_CLIENT_HOSTNAME=0.0.0.0 + - SHARE_CLIENT_PORT=5426 + - SHARE_REDIS=redis://redis:6379 + - SHARE_POSTGRES=postgresql://postgres:sekrit@postgres:5432 + - SHARE_HMAC_KEY=test-key-test-key-test-key-test-key- + - SHARE_DEPLOYMENT=local + - SHARE_IP=share-api + - SHARE_AWS_CREDENTIAL_URL=invalid + - AWS_REGION=invalid + - SHARE_POSTGRES_CONN_TTL=30 + - SHARE_POSTGRES_CONN_MAX=10 + - SHARE_SHARE_UI_ORIGIN=http://localhost:1234 + - SHARE_CLOUD_UI_ORIGIN=http://localhost:5678 + - SHARE_HOMEPAGE_ORIGIN=http://localhost:1111 + - SHARE_CLOUD_HOMEPAGE_ORIGIN=http://localhost:2222 + - SHARE_LOG_LEVEL=DEBUG + - SHARE_COMMIT=dev + - SHARE_MAX_PARALLELISM_PER_DOWNLOAD_REQUEST=1 + - SHARE_MAX_PARALLELISM_PER_UPLOAD_REQUEST=5 + - SHARE_ZENDESK_API_USER=invaliduser@example.com + - SHARE_ZENDESK_API_TOKEN=bad-password + - SHARE_GITHUB_CLIENTID=invalid + - SHARE_GITHUB_CLIENT_SECRET=invalid + - AWS_ACCESS_KEY_ID=invalid + - AWS_SECRET_ACCESS_KEY=invalid + + links: + - redis + - postgres From ab95c7dae572426ff79cdb74260daf50074ffbc9 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 13:18:49 -0700 Subject: [PATCH 40/48] Fix submit release query --- src/Share/Postgres/Search/DefinitionSearch/Queries.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs index 7efc220..b642ad7 100644 --- a/src/Share/Postgres/Search/DefinitionSearch/Queries.hs +++ b/src/Share/Postgres/Search/DefinitionSearch/Queries.hs @@ -52,7 +52,7 @@ submitReleaseToBeSynced :: (QueryM m) => ReleaseId -> m () submitReleaseToBeSynced releaseId = do execute_ [sql| - INSERT INTO global_definition_search_release_queue (id) + INSERT INTO global_definition_search_release_queue (release_id) VALUES (#{releaseId}) |] From 3c70179174577b3fce7efc29680944ddcab45f90 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 13:18:49 -0700 Subject: [PATCH 41/48] Remove transcripts docker-compose --- docker/docker-compose.yml | 2 +- docker/transcripts.docker-compose.yml | 84 --------------------------- 2 files changed, 1 insertion(+), 85 deletions(-) delete mode 100644 docker/transcripts.docker-compose.yml diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 06a14a9..f1cfab2 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -20,7 +20,7 @@ services: # Optionally persist the data between container invocations - postgresVolume:/var/lib/postgresql/data - ./postgresql.conf:/etc/postgresql/postgresql.conf - command: postgres -c config_file=/etc/postgresql/postgresql.conf -c log_statement=all + command: postgres -c config_file=/etc/postgresql/postgresql.conf redis: diff --git a/docker/transcripts.docker-compose.yml b/docker/transcripts.docker-compose.yml deleted file mode 100644 index 3d7bdc2..0000000 --- a/docker/transcripts.docker-compose.yml +++ /dev/null @@ -1,84 +0,0 @@ -version: "3.9" - -services: - postgres: - image: postgres:15.4 - container_name: postgres - restart: always - healthcheck: - # Ensure the database is up, and the tables are initialized - test: ["CMD", "psql", "-U", "postgres", "-c", "SELECT from users;"] - interval: 3s - timeout: 10s - retries: 5 - ports: - - "5432:5432" - environment: - POSTGRES_PASSWORD: sekrit - volumes: - - ../sql:/docker-entrypoint-initdb.d - - ./postgresql.conf:/etc/postgresql/postgresql.conf - command: postgres -c config_file=/etc/postgresql/postgresql.conf -c log_statement=all - - - redis: - image: redis:6.2.6 - container_name: redis - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 3s - timeout: 10s - retries: 3 - ports: - - "6379:6379" - - share: - image: share-api - container_name: share-api - depends_on: - - redis - - postgres - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5424/health"] - interval: 3s - timeout: 10s - retries: 3 - ports: - - "5424:5424" - - "5425:5425" - - "5426:5426" - - environment: - # Placeholder values for development - - SHARE_API_ORIGIN=http://share-api - - SHARE_SERVER_PORT=5424 - - SHARE_ADMIN_PORT=5425 - - SHARE_CLIENT_HOSTNAME=0.0.0.0 - - SHARE_CLIENT_PORT=5426 - - SHARE_REDIS=redis://redis:6379 - - SHARE_POSTGRES=postgresql://postgres:sekrit@postgres:5432 - - SHARE_HMAC_KEY=test-key-test-key-test-key-test-key- - - SHARE_DEPLOYMENT=local - - SHARE_IP=share-api - - SHARE_AWS_CREDENTIAL_URL=invalid - - AWS_REGION=invalid - - SHARE_POSTGRES_CONN_TTL=30 - - SHARE_POSTGRES_CONN_MAX=10 - - SHARE_SHARE_UI_ORIGIN=http://localhost:1234 - - SHARE_CLOUD_UI_ORIGIN=http://localhost:5678 - - SHARE_HOMEPAGE_ORIGIN=http://localhost:1111 - - SHARE_CLOUD_HOMEPAGE_ORIGIN=http://localhost:2222 - - SHARE_LOG_LEVEL=DEBUG - - SHARE_COMMIT=dev - - SHARE_MAX_PARALLELISM_PER_DOWNLOAD_REQUEST=1 - - SHARE_MAX_PARALLELISM_PER_UPLOAD_REQUEST=5 - - SHARE_ZENDESK_API_USER=invaliduser@example.com - - SHARE_ZENDESK_API_TOKEN=bad-password - - SHARE_GITHUB_CLIENTID=invalid - - SHARE_GITHUB_CLIENT_SECRET=invalid - - AWS_ACCESS_KEY_ID=invalid - - AWS_SECRET_ACCESS_KEY=invalid - - links: - - redis - - postgres From 9b40199af7aa44ab6358cd12942785859ecea939 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 14:53:54 -0700 Subject: [PATCH 42/48] Fix type query parser to not infinitely loop --- src/Share/Web/Share/DefinitionSearch.hs | 145 +++++++++++++----------- 1 file changed, 76 insertions(+), 69 deletions(-) diff --git a/src/Share/Web/Share/DefinitionSearch.hs b/src/Share/Web/Share/DefinitionSearch.hs index 2d98946..47d18b3 100644 --- a/src/Share/Web/Share/DefinitionSearch.hs +++ b/src/Share/Web/Share/DefinitionSearch.hs @@ -16,6 +16,7 @@ import Text.Megaparsec qualified as MP import Text.Megaparsec.Char qualified as MP import Text.Megaparsec.Char.Lexer qualified as MP hiding (space) import Unison.Name (Name) +import Unison.Name qualified as Name import Unison.ShortHash (ShortHash) import Unison.ShortHash qualified as SH import Unison.Syntax.Name qualified as Name @@ -36,6 +37,7 @@ data MentionRef | NameMention Name | TypeNameMention Name | TypeVarMention Text + | TypeHashMention ShortHash deriving stock (Show, Eq, Ord) type P = MP.Parsec QueryError Text @@ -69,65 +71,73 @@ type P = MP.Parsec QueryError Text -- Right (fromList [HashToken (Builtin "Nat")],Nothing) -- -- >>> queryToTokens "Nat Text #deadbeef Abort" --- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Abort"})) (Just 1),TypeMentionToken (Left (NameSegment {toUnescapedText = "Nat"})) (Just 1),TypeMentionToken (Left (NameSegment {toUnescapedText = "Text"})) (Just 1),HashToken (ShortHash {prefix = "deadbeef", cycle = Nothing, cid = Nothing})],Nothing) +-- Right (fromList [TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Abort"} :| []))) (Just 1),TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Nat"} :| []))) (Just 1),TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Text"} :| []))) (Just 1),TypeMentionToken (Right (ShortHash {prefix = "deadbeef", cycle = Nothing, cid = Nothing})) (Just 1)],Nothing) -- -- >>> queryToTokens "k -> v -> Map k v -> Map k v" --- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Map"})) (Just 2),TypeVarToken 0 (Just 3),TypeVarToken 1 (Just 3)],Just 3) +-- Right (fromList [TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Map"} :| []))) (Just 2),TypeVarToken 0 (Just 3),TypeVarToken 1 (Just 3)],Just 3) -- -- >>> queryToTokens ": b -> a -> b" -- Right (fromList [TypeVarToken 0 (Just 1),TypeVarToken 1 (Just 2)],Just 2) -- -- >>> queryToTokens "(a ->{𝕖} b) -> [a] ->{𝕖} [b]" --- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "List"})) (Just 2),TypeVarToken 0 (Just 2),TypeVarToken 1 (Just 2)],Just 2) +-- Right (fromList [TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "List"} :| []))) (Just 2),TypeVarToken 0 (Just 2),TypeVarToken 1 (Just 2)],Just 2) -- -- >>> queryToTokens "'{Abort} ()" --- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Abort"})) (Just 1)],Nothing) +-- Right (fromList [TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Abort"} :| []))) (Just 1)],Nothing) -- -- Unfinished query: -- >>> queryToTokens "(Text -> Text" --- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Text"})) (Just 2)],Nothing) +-- Right (fromList [TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Text"} :| []))) (Just 2)],Nothing) -- -- Horribly mishapen query: -- >>> queryToTokens "[{ &Text !{𝕖} (Optional)" --- Right (fromList [TypeMentionToken (Left (NameSegment {toUnescapedText = "Optional"})) (Just 1),TypeMentionToken (Left (NameSegment {toUnescapedText = "Text"})) (Just 1)],Nothing) +-- Right (fromList [TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Optional"} :| []))) (Just 1),TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Text"} :| []))) (Just 1)],Nothing) +-- +-- >>> queryToTokens "e -> abilities.Exception" +-- Right (fromList [TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Exception"} :| [NameSegment {toUnescapedText = "abilities"}]))) (Just 1),TypeVarToken 0 (Just 1)],Just 1) queryToTokens :: Text -> Either Text (Set (DefnSearchToken (Either Name ShortHash)), Maybe Arity) queryToTokens query = let cleanQuery = query & Text.filter Char.isAscii - in MP.runParser queryParser "query" cleanQuery - & \case - (Left _err) -> - let simpleQuery = - query - & Text.map (\c -> if Char.isAlphaNum c || c `elem` ("#." :: String) then c else ' ') - & Text.words - & Text.unwords - in -- If even the lax parser fails, try simplifying the query even further to see if - -- we can parse anything at all. - queryToTokens simpleQuery - (Right (mayArity, occurrences)) -> - let (hashAndNameTokens, typeVarMentions) = - MonMap.toList - occurrences - & foldMap \case - (HashMention hash, _occ) -> ([HashToken hash], []) - (NameMention name, _occ) -> ([NameToken name], []) - (TypeNameMention name, occ) -> ([TypeMentionToken (Left name) $ Just occ], []) - (TypeVarMention var, occ) -> ([], [(var, occ)]) - - -- Normalize type vars so varIds are sorted according to number of occurences. - normalizedTypeVarTokens = - List.sortOn snd typeVarMentions - & imap (\i (_vId, occ) -> TypeVarToken (VarId i) $ Just occ) - -- if there's no indication the user is trying to do a 'real' type query then - -- ignore arity. - arity = do - Sum n <- mayArity - if n <= 0 - then Nothing - else Just n - in Right (Set.fromList $ hashAndNameTokens <> normalizedTypeVarTokens, arity) + parseResult = + MP.runParser queryParser "query" cleanQuery + & \case + (Left _err) -> + let simpleQuery = + cleanQuery + & Text.map (\c -> if Char.isAlphaNum c || c `elem` ("#." :: String) then c else ' ') + & Text.words + & Text.unwords + in -- If even the lax parser fails, try simplifying the query even further to see if + -- we can parse anything at all. + mapLeft (Text.pack . MP.errorBundlePretty) $ MP.runParser queryParser "query" simpleQuery + (Right r) -> Right r + in case parseResult of + Left err -> Left err + Right (mayArity, occurrences) -> + let (hashAndNameTokens, typeVarMentions) = + MonMap.toList + occurrences + & foldMap \case + (HashMention hash, _occ) -> ([HashToken hash], []) + (NameMention name, _occ) -> ([NameToken name], []) + (TypeNameMention name, occ) -> ([TypeMentionToken (Left name) $ Just occ], []) + (TypeVarMention var, occ) -> ([], [(var, occ)]) + (TypeHashMention hash, occ) -> ([TypeMentionToken (Right hash) $ Just occ], []) + + -- Normalize type vars so varIds are sorted according to number of occurences. + normalizedTypeVarTokens = + List.sortOn snd typeVarMentions + & imap (\i (_vId, occ) -> TypeVarToken (VarId i) $ Just occ) + -- if there's no indication the user is trying to do a 'real' type query then + -- ignore arity. + arity = do + Sum n <- mayArity + if n <= 0 + then Nothing + else Just n + in Right (Set.fromList $ hashAndNameTokens <> normalizedTypeVarTokens, arity) queryParser :: P (Maybe (Sum Arity), MonoidalMap MentionRef Occurrence) queryParser = do @@ -150,7 +160,7 @@ simpleHashQueryP = do simpleNameQueryP :: P (MonoidalMap MentionRef Occurrence) simpleNameQueryP = do - name <- nameP False + name <- initialNameP -- Simple queries have ONLY the name MP.eof pure $ MonMap.singleton (NameMention name) 1 @@ -159,7 +169,7 @@ simpleNameQueryP = do typeQueryP :: P (Sum Arity, MonoidalMap MentionRef Occurrence) typeQueryP = do _ <- optional $ lexeme (MP.char ':') - fmap fold . many $ do + fmap fold . some $ do tokens <- lexeme $ MP.choice @@ -197,7 +207,7 @@ tupleP = MP.between (MP.char '(') (MP.char ')') do pure $ MonMap.singleton (TypeNameMention (Name.unsafeParseText "Tuple")) 1 listP :: P (MonoidalMap MentionRef Occurrence) -listP = MP.between (lexeme (MP.char '[')) (optional $ lexeme (MP.char ']')) do +listP = MP.between (lexeme (MP.char '[')) (lexeme (MP.char ']')) do (_, tokens) <- typeQueryP pure $ tokens <> MonMap.singleton (TypeNameMention (Name.unsafeParseText "List")) 1 @@ -205,27 +215,24 @@ typeQueryTokenP :: P (MonoidalMap MentionRef Occurrence) typeQueryTokenP = do MP.choice [ hashMentionTokenP, - typeVarMentionTokenP, - typeNameMentionTokenP + typeMentionP ] where hashMentionTokenP :: P (MonoidalMap MentionRef Occurrence) hashMentionTokenP = do hash <- hashP - pure $ MonMap.singleton (HashMention hash) 1 - - typeNameMentionTokenP :: P (MonoidalMap MentionRef Occurrence) - typeNameMentionTokenP = do - name <- nameP True - pure $ MonMap.singleton (TypeNameMention name) 1 + pure $ MonMap.singleton (TypeHashMention hash) 1 - typeVarMentionTokenP :: P (MonoidalMap MentionRef Occurrence) - typeVarMentionTokenP = do - varText <- typeVarP - pure $ MonMap.singleton (TypeVarMention varText) 1 - -typeVarP :: P Text -typeVarP = Text.pack <$> liftA2 (:) (MP.oneOf $ ['a' .. 'z'] <> "_") (many $ MP.alphaNumChar <|> MP.char '_') +typeMentionP :: P (MonoidalMap MentionRef Occurrence) +typeMentionP = do + name <- nameP + case name of + n + | Just (c, _) <- Text.uncons . NameSegment.toEscapedText . Name.lastSegment $ n, + Char.isLower c, + Name.countSegments n == 1 -> + pure $ MonMap.singleton (TypeVarMention (Name.toText n)) 1 + | otherwise -> pure $ MonMap.singleton (TypeNameMention name) 1 hashP :: P ShortHash hashP = do @@ -236,16 +243,16 @@ hashP = do Nothing -> MP.customFailure . InvalidHash $ Text.pack possibleHash Just hash -> pure hash -nameP :: Bool -> P Name -nameP mustBeType = do - firstChar <- - if mustBeType - then MP.satisfy Char.isUpper - else MP.satisfy (\c -> NameSegment.symbolyIdChar c || NameSegment.wordyIdChar c) - nameRemainder <- - if mustBeType - then many (MP.satisfy $ \c -> NameSegment.wordyIdChar c || c == '.') - else (many (MP.satisfy $ \c -> NameSegment.symbolyIdChar c || NameSegment.wordyIdChar c || c == '.')) - case Name.parseTextEither (Text.pack $ firstChar : nameRemainder) of - Left _ -> MP.customFailure . InvalidName $ Text.pack (firstChar : nameRemainder) +nameP :: P Name +nameP = do + name <- List.intercalate "." <$> MP.sepBy (liftA2 (:) (MP.satisfy NameSegment.wordyIdStartChar) (many (MP.satisfy NameSegment.wordyIdChar))) (MP.char '.') + case Name.parseTextEither (Text.pack name) of + Left _ -> MP.customFailure . InvalidName $ Text.pack name + Right name -> pure name + +initialNameP :: P Name +initialNameP = do + name <- some (MP.satisfy NameSegment.symbolyIdChar) <|> (liftA2 (:) (MP.satisfy NameSegment.wordyIdStartChar) (some (MP.satisfy NameSegment.wordyIdChar))) + case Name.parseTextEither (Text.pack name) of + Left _ -> MP.customFailure . InvalidName $ Text.pack name Right name -> pure name From 2f474666129a7cd7ec54bcfeb391e47ac50ea22a Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 14:53:54 -0700 Subject: [PATCH 43/48] Fix transcripts in ci --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9fbef81..07acb0a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -247,7 +247,7 @@ jobs: docker volume rm docker_postgresVolume 2>/dev/null || true # Start share and it's dependencies in the background - docker compose -f docker/transcripts.docker-compose.yml up --wait + docker compose -f docker/docker-compose.yml up --wait # Run the transcript tests zsh ./transcripts/run-transcripts.zsh From 9b24f52119739947455e1ffa0e1c5adffdff6896 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 14:53:54 -0700 Subject: [PATCH 44/48] Fix search by name for namespaced names --- src/Share/Web/Share/DefinitionSearch.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Share/Web/Share/DefinitionSearch.hs b/src/Share/Web/Share/DefinitionSearch.hs index 47d18b3..4282f20 100644 --- a/src/Share/Web/Share/DefinitionSearch.hs +++ b/src/Share/Web/Share/DefinitionSearch.hs @@ -95,6 +95,9 @@ type P = MP.Parsec QueryError Text -- -- >>> queryToTokens "e -> abilities.Exception" -- Right (fromList [TypeMentionToken (Left (Name Relative (NameSegment {toUnescapedText = "Exception"} :| [NameSegment {toUnescapedText = "abilities"}]))) (Just 1),TypeVarToken 0 (Just 1)],Just 1) +-- +-- >>> queryToTokens "Json.Text" +-- Right (fromList [NameToken (Name Relative (NameSegment {toUnescapedText = "Text"} :| [NameSegment {toUnescapedText = "Json"}]))],Nothing) queryToTokens :: Text -> Either Text (Set (DefnSearchToken (Either Name ShortHash)), Maybe Arity) queryToTokens query = let cleanQuery = @@ -252,7 +255,7 @@ nameP = do initialNameP :: P Name initialNameP = do - name <- some (MP.satisfy NameSegment.symbolyIdChar) <|> (liftA2 (:) (MP.satisfy NameSegment.wordyIdStartChar) (some (MP.satisfy NameSegment.wordyIdChar))) + name <- List.intercalate "." <$> MP.sepBy (some (MP.satisfy NameSegment.symbolyIdChar) <|> (liftA2 (:) (MP.satisfy NameSegment.wordyIdStartChar) (many (MP.satisfy NameSegment.wordyIdChar)))) (MP.char '.') case Name.parseTextEither (Text.pack name) of Left _ -> MP.customFailure . InvalidName $ Text.pack name Right name -> pure name From f5364c5a3f2de6455e9a889fe873b7a43eeb5879 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 24 Jul 2024 14:53:54 -0700 Subject: [PATCH 45/48] Fix formatting of definitions response --- docker/docker-compose.yml | 2 +- .../BackgroundJobs/Search/DefinitionSync.hs | 10 ++++++- src/Share/Web/Share/Types.hs | 30 ++++++++++++++++--- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index f1cfab2..2e73248 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -20,7 +20,7 @@ services: # Optionally persist the data between container invocations - postgresVolume:/var/lib/postgresql/data - ./postgresql.conf:/etc/postgresql/postgresql.conf - command: postgres -c config_file=/etc/postgresql/postgresql.conf + command: postgres -c config_file=/etc/postgresql/postgresql.conf # -c log_statement=all redis: diff --git a/src/Share/BackgroundJobs/Search/DefinitionSync.hs b/src/Share/BackgroundJobs/Search/DefinitionSync.hs index 3fa30ec..aa8bb50 100644 --- a/src/Share/BackgroundJobs/Search/DefinitionSync.hs +++ b/src/Share/BackgroundJobs/Search/DefinitionSync.hs @@ -43,6 +43,7 @@ import Unison.Debug qualified as Debug import Unison.HashQualifiedPrime qualified as HQ' import Unison.LabeledDependency qualified as LD import Unison.Name (Name) +import Unison.Name qualified as Name import Unison.PrettyPrintEnv qualified as PPE import Unison.PrettyPrintEnvDecl qualified as PPED import Unison.PrettyPrintEnvDecl.Postgres qualified as PPEPostgres @@ -123,7 +124,14 @@ syncTerms namesPerspective bhId projectId releaseId termsCursor = do PG.timeTransaction "Building terms" $ terms & foldMapM \(fqn, ref) -> fmap (either (\err -> ([err], [])) (\doc -> ([], [doc]))) . runExceptT $ do typ <- lift (Codebase.loadTypeOfReferent ref) `whenNothingM` throwError (NoTypeSigForTerm fqn ref) - termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just fqn) bhId Nothing Nothing + let displayName = + fqn + & Name.reverseSegments + -- For now we treat the display name for search as just the last 2 segments of the name. + & \case + (ns :| rest) -> ns :| take 1 rest + & Name.fromReverseSegments + termSummary <- lift $ Summary.termSummaryForReferent ref typ (Just displayName) bhId Nothing Nothing let sh = Referent.toShortHash ref let (refTokens, arity) = tokensForTerm fqn ref typ termSummary let dd = diff --git a/src/Share/Web/Share/Types.hs b/src/Share/Web/Share/Types.hs index 5ac6c82..1c2764e 100644 --- a/src/Share/Web/Share/Types.hs +++ b/src/Share/Web/Share/Types.hs @@ -16,6 +16,7 @@ import Share.Utils.API (NullableUpdate, parseNullableUpdate) import Share.Utils.URI import Unison.Name (Name) import Unison.Server.Doc (Doc) +import Unison.Server.Share.DefinitionSummary.Types (TermSummary (..), TypeSummary (..)) data UpdateUserRequest = UpdateUserRequest { name :: NullableUpdate Text, @@ -189,8 +190,29 @@ data DefinitionSearchResult instance ToJSON DefinitionSearchResult where toJSON DefinitionSearchResult {..} = Aeson.object - [ "fqn" .= fqn, - "summary" .= summary, - "project" .= project, - "release" .= release + [ "fqn" Aeson..= fqn, + "projectRef" Aeson..= project, + "branchRef" Aeson..= release, + "kind" Aeson..= kind, + "definition" Aeson..= definition ] + where + (kind, definition) = case summary of + DefSync.ToTTermSummary TermSummary {displayName, hash, summary, tag} -> + ( Aeson.String "term", + Aeson.object + [ "displayName" Aeson..= displayName, + "hash" Aeson..= hash, + "summary" Aeson..= summary, + "tag" Aeson..= tag + ] + ) + DefSync.ToTTypeSummary TypeSummary {displayName, hash, summary, tag} -> + ( Aeson.String "type", + Aeson.object + [ "displayName" Aeson..= displayName, + "hash" Aeson..= hash, + "summary" Aeson..= summary, + "tag" Aeson..= tag + ] + ) From 10416b9f18bd99f54dbb426a1295f238e8bb0508 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 25 Jul 2024 14:38:28 -0700 Subject: [PATCH 46/48] Fix project/user searches --- package.yaml | 2 +- share-api.cabal | 4 ++-- src/Share/App.hs | 4 ++-- src/Share/Postgres/Queries.hs | 9 ++++---- src/Share/Web/Share/Impl.hs | 22 ++++++++++++++----- .../project-search-with-user.json | 15 +++++++++++++ transcripts/share-apis/projects-flow/run.zsh | 3 +++ 7 files changed, 45 insertions(+), 14 deletions(-) create mode 100644 transcripts/share-apis/projects-flow/project-search-with-user.json diff --git a/package.yaml b/package.yaml index 8bdb42e..158703f 100644 --- a/package.yaml +++ b/package.yaml @@ -20,7 +20,7 @@ description: Please see the README on GitHub at = minSeverity) $ do timestamp <- asks timeCache >>= liftIO - liftIO . log . Logging.logFmtFormatter timestamp $ msg + liftIO . log' . Logging.logFmtFormatter timestamp $ msg instance Cryptonite.MonadRandom (AppM reqCtx) where getRandomBytes = diff --git a/src/Share/Postgres/Queries.hs b/src/Share/Postgres/Queries.hs index 1f822c6..18c6b71 100644 --- a/src/Share/Postgres/Queries.hs +++ b/src/Share/Postgres/Queries.hs @@ -39,7 +39,7 @@ import Share.Web.Share.Releases.Types (ReleaseStatusFilter (..), StatusUpdate (. import Unison.Util.List qualified as Utils import Unison.Util.Monoid (intercalateMap) -userByUserId :: PG.QueryM m => UserId -> m (Maybe User) +userByUserId :: (PG.QueryM m) => UserId -> m (Maybe User) userByUserId uid = do PG.query1Row [PG.sql| @@ -260,8 +260,8 @@ searchUsersByNameOrHandlePrefix (Query prefix) (Limit limit) = do -- -- The PG.queryListRows accepts strings as web search queries, see -- https://www.postgresql.org/docs/current/textsearch-controls.html -searchProjectsByUserQuery :: Maybe UserId -> Query -> Limit -> PG.Transaction e [(Project, UserHandle)] -searchProjectsByUserQuery caller (Query query) limit = do +searchProjects :: Maybe UserId -> Maybe UserId -> Query -> Limit -> PG.Transaction e [(Project, UserHandle)] +searchProjects caller userIdFilter (Query query) limit = do let prefixQuery = query -- Remove any chars with special meaning for tsqueries. @@ -280,6 +280,7 @@ searchProjectsByUserQuery caller (Query query) limit = do JOIN users AS owner ON p.owner_user_id = owner.id WHERE (webquery @@ p.project_text_document OR prefixquery @@ p.project_text_document) AND (NOT p.private OR (#{caller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects WHERE user_id = #{caller} AND project_id = p.id))) + AND (#{userIdFilter} IS NULL OR p.owner_user_id = #{userIdFilter}) ORDER BY (ts_rank_cd(p.project_text_document, webquery), ts_rank_cd(p.project_text_document, prefixquery)) DESC LIMIT #{limit} |] @@ -698,7 +699,7 @@ createBranch !_nlReceipt projectId branchName contributorId causalId mergeTarget |] createRelease :: - PG.QueryM m => + (PG.QueryM m) => NameLookupReceipt -> ProjectId -> ReleaseVersion -> diff --git a/src/Share/Web/Share/Impl.hs b/src/Share/Web/Share/Impl.hs index 0648348..d929e94 100644 --- a/src/Share/Web/Share/Impl.hs +++ b/src/Share/Web/Share/Impl.hs @@ -6,9 +6,11 @@ module Share.Web.Share.Impl where +import Data.Text qualified as Text +import Servant import Share.Codebase qualified as Codebase import Share.Codebase.Types qualified as Codebase -import Share.IDs (TourId, UserHandle) +import Share.IDs (TourId, UserHandle (..)) import Share.IDs qualified as IDs import Share.JWT qualified as JWT import Share.OAuth.Session @@ -21,6 +23,7 @@ import Share.Postgres.Users.Queries qualified as UsersQ import Share.Prelude import Share.Project (Project (..)) import Share.User (User (..)) +import Share.User qualified as User import Share.UserProfile (UserProfile (..)) import Share.Utils.API import Share.Utils.Caching @@ -35,7 +38,6 @@ import Share.Web.Share.CodeBrowsing.API (CodeBrowseAPI) import Share.Web.Share.Contributions.Impl qualified as Contributions import Share.Web.Share.Projects.Impl qualified as Projects import Share.Web.Share.Types -import Servant import Unison.Codebase.Path qualified as Path import Unison.HashQualified qualified as HQ import Unison.Name (Name) @@ -332,13 +334,23 @@ getUserReadmeEndpoint (AuthN.MaybeAuthedUserID callerUserId) userHandle = do -- all private users in the PG query itself. searchEndpoint :: Maybe Session -> Query -> Maybe Limit -> WebApp [SearchResult] searchEndpoint _caller (Query "") _limit = pure [] -searchEndpoint (MaybeAuthedUserID callerUserId) query (fromMaybe (Limit 20) -> limit) = do +searchEndpoint (MaybeAuthedUserID callerUserId) (Query query) (fromMaybe (Limit 20) -> limit) = do + (userQuery :: Query, (projectUserFilter :: Maybe UserId, projectQuery :: Query)) <- + fromMaybe query (Text.stripPrefix "@" query) + & Text.splitOn "/" + & \case + (userQuery : projectQueryText : _rest) -> do + mayUserId <- PG.runTransaction $ fmap User.user_id <$> Q.userByHandle (UserHandle userQuery) + pure (Query query, (mayUserId, Query projectQueryText)) + [projectOrUserQuery] -> pure (Query projectOrUserQuery, (Nothing, Query projectOrUserQuery)) + -- This is impossible + [] -> pure (Query query, (Nothing, Query query)) -- We don't have a great way to order users and projects together, so we just limit to a max -- of 5 users (who match the query as a prefix), then return the rest of the results from -- projects. (users, projects) <- PG.runTransaction $ do - users <- Q.searchUsersByNameOrHandlePrefix query (Limit 5) - projects <- Q.searchProjectsByUserQuery callerUserId query limit + users <- Q.searchUsersByNameOrHandlePrefix userQuery (Limit 5) + projects <- Q.searchProjects callerUserId projectUserFilter projectQuery limit pure (users, projects) let userResults = users diff --git a/transcripts/share-apis/projects-flow/project-search-with-user.json b/transcripts/share-apis/projects-flow/project-search-with-user.json new file mode 100644 index 0000000..d395ae3 --- /dev/null +++ b/transcripts/share-apis/projects-flow/project-search-with-user.json @@ -0,0 +1,15 @@ +{ + "body": [ + { + "projectRef": "@test/publictestproject", + "summary": "test project summary", + "tag": "Project", + "visibility": "public" + } + ], + "status": [ + { + "status_code": 200 + } + ] +} diff --git a/transcripts/share-apis/projects-flow/run.zsh b/transcripts/share-apis/projects-flow/run.zsh index 08e1ae0..902da5d 100755 --- a/transcripts/share-apis/projects-flow/run.zsh +++ b/transcripts/share-apis/projects-flow/run.zsh @@ -78,6 +78,9 @@ fetch "$transcript_user" GET project-catalog-get '/catalog' # Should find projects we have access to (e.g. Unison's private project), but none that we don't. fetch "$transcript_user" GET project-search '/search?query=test' +# Should filter project search by user if provided a full valid handle: +fetch "$transcript_user" GET project-search-with-user '/search?query=@test/public' + # Transcript user should not find 'test' user's private project fetch "$transcript_user" GET project-search-inaccessible '/search?query=privatetestproject' From 158b64cfc5f1865db977416b54f13b1275c535aa Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 25 Jul 2024 11:20:22 -0700 Subject: [PATCH 47/48] Add definition search transcripts --- .gitignore | 3 + transcripts/run-transcripts.zsh | 1 + .../search/complex-type-mention-search.json | 159 +++++++++++++ .../share-apis/search/create-release.json | 21 ++ .../share-apis/search/name-search-prefix.json | 13 ++ .../share-apis/search/name-search-suffix.json | 17 ++ transcripts/share-apis/search/prelude.md | 20 ++ transcripts/share-apis/search/run.zsh | 47 ++++ .../share-apis/search/type-var-search.json | 221 ++++++++++++++++++ transcripts/transcript_helpers.sh | 26 ++- 10 files changed, 520 insertions(+), 8 deletions(-) create mode 100644 transcripts/share-apis/search/complex-type-mention-search.json create mode 100644 transcripts/share-apis/search/create-release.json create mode 100644 transcripts/share-apis/search/name-search-prefix.json create mode 100644 transcripts/share-apis/search/name-search-suffix.json create mode 100644 transcripts/share-apis/search/prelude.md create mode 100755 transcripts/share-apis/search/run.zsh create mode 100644 transcripts/share-apis/search/type-var-search.json diff --git a/.gitignore b/.gitignore index 890ba1e..58d7006 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ docker/tmp *.prof *.prof.html prelude.output.md + +# Scratch files +*.u diff --git a/transcripts/run-transcripts.zsh b/transcripts/run-transcripts.zsh index 0f2ed0a..f41e984 100755 --- a/transcripts/run-transcripts.zsh +++ b/transcripts/run-transcripts.zsh @@ -11,6 +11,7 @@ source "$(realpath "$(dirname "$0")")/transcript_helpers.sh" typeset -A transcripts transcripts=( + search transcripts/share-apis/search/ users transcripts/share-apis/users/ contribution-diffs transcripts/share-apis/contribution-diffs/ definition-diffs transcripts/share-apis/definition-diffs/ diff --git a/transcripts/share-apis/search/complex-type-mention-search.json b/transcripts/share-apis/search/complex-type-mention-search.json new file mode 100644 index 0000000..c47a5aa --- /dev/null +++ b/transcripts/share-apis/search/complex-type-mention-search.json @@ -0,0 +1,159 @@ +{ + "body": { + "results": [ + { + "branchRef": "releases/1.2.3", + "definition": { + "displayName": "List.map", + "hash": "#53u6nne5tneggsh2ngr76khq4kfdpvuf4l7kv6sq39kr9hjvh3qg4midcc5b69qhjlfii3io7pe2rn1on7rr6h76qsmjbc66n2ivpeo", + "summary": { + "contents": [ + { + "annotation": null, + "segment": "(" + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "a" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "TypeOperator" + }, + "segment": "->" + }, + { + "annotation": { + "tag": "AbilityBraces" + }, + "segment": "{" + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "g" + }, + { + "annotation": { + "tag": "AbilityBraces" + }, + "segment": "}" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "b" + }, + { + "annotation": null, + "segment": ")" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "TypeOperator" + }, + "segment": "->" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "contents": "#22ffou62u91ks8av7bdmhq10jct5ulot40c84j0k3kfdrh5rj2o6a3ditsfpo6sv6mkde2p13um06mkrsdckudmeh6k4oa7v53887f8", + "tag": "TypeReference" + }, + "segment": "List" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "a" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "TypeOperator" + }, + "segment": "->" + }, + { + "annotation": { + "tag": "AbilityBraces" + }, + "segment": "{" + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "g" + }, + { + "annotation": { + "tag": "AbilityBraces" + }, + "segment": "}" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "contents": "#22ffou62u91ks8av7bdmhq10jct5ulot40c84j0k3kfdrh5rj2o6a3ditsfpo6sv6mkde2p13um06mkrsdckudmeh6k4oa7v53887f8", + "tag": "TypeReference" + }, + "segment": "List" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "b" + } + ], + "tag": "UserObject" + }, + "tag": "Plain" + }, + "fqn": "List.map", + "kind": "term", + "projectRef": "@transcripts/search" + } + ] + }, + "status": [ + { + "status_code": 200 + } + ] +} diff --git a/transcripts/share-apis/search/create-release.json b/transcripts/share-apis/search/create-release.json new file mode 100644 index 0000000..792cd50 --- /dev/null +++ b/transcripts/share-apis/search/create-release.json @@ -0,0 +1,21 @@ +{ + "body": { + "causalHashSquashed": "#ih4vtflqotqkapjcmcfsbn7ohs94rp5mr4lo4tddlbja0hf6n0bp964dbqumfli49jbvu40uu9i3257aq406kt3v0nt6jrei9ajf9j0", + "causalHashUnsquashed": "#ih4vtflqotqkapjcmcfsbn7ohs94rp5mr4lo4tddlbja0hf6n0bp964dbqumfli49jbvu40uu9i3257aq406kt3v0nt6jrei9ajf9j0", + "createdAt": "", + "createdBy": "@transcripts", + "projectRef": "@transcripts/search", + "status": { + "publishedAt": "", + "publishedBy": "@transcripts", + "status": "published" + }, + "updatedAt": "", + "version": "1.2.3" + }, + "status": [ + { + "status_code": 201 + } + ] +} diff --git a/transcripts/share-apis/search/name-search-prefix.json b/transcripts/share-apis/search/name-search-prefix.json new file mode 100644 index 0000000..4be7867 --- /dev/null +++ b/transcripts/share-apis/search/name-search-prefix.json @@ -0,0 +1,13 @@ +{ + "body": [ + { + "tag": "plain", + "token": "function.const" + } + ], + "status": [ + { + "status_code": 200 + } + ] +} diff --git a/transcripts/share-apis/search/name-search-suffix.json b/transcripts/share-apis/search/name-search-suffix.json new file mode 100644 index 0000000..e710bbb --- /dev/null +++ b/transcripts/share-apis/search/name-search-suffix.json @@ -0,0 +1,17 @@ +{ + "body": [ + { + "tag": "plain", + "token": "function.const" + }, + { + "tag": "data-constructor", + "token": "List.Cons" + } + ], + "status": [ + { + "status_code": 200 + } + ] +} diff --git a/transcripts/share-apis/search/prelude.md b/transcripts/share-apis/search/prelude.md new file mode 100644 index 0000000..3367312 --- /dev/null +++ b/transcripts/share-apis/search/prelude.md @@ -0,0 +1,20 @@ +```unison +structural type List a = Nil | Cons a (List a) + +function.const : a -> b -> a +function.const a b = a + +structural ability Throw e where + throw : e -> a + +List.map : (a -> {g} b) -> List a -> {g} List b +List.map f = cases + (Cons a rest) -> Cons (f a) (List.map f rest) + Nil -> Nil +``` + + +```ucm +scratch/main> add +scratch/main> push @transcripts/search/main +``` diff --git a/transcripts/share-apis/search/run.zsh b/transcripts/share-apis/search/run.zsh new file mode 100755 index 0000000..9c16ec6 --- /dev/null +++ b/transcripts/share-apis/search/run.zsh @@ -0,0 +1,47 @@ +#!/usr/bin/env zsh + +set -e +source ../../transcript_helpers.sh + +# Reset DB to a known state +pg_reset_fixtures + +login_user_for_ucm 'transcripts' +transcript_ucm transcript prelude.md + +echo 'get-causal-hash' +causalHash="$(fetch_data "$transcript_user" GET 'get-causal-hash' '/users/transcripts/projects/search/branches/main/browse' 2>/dev/null | jq -r '.namespaceListingHash')" + +echo 'create-release' +# Create a release so it will be indexed +fetch "$transcript_user" POST create-release '/users/transcripts/projects/search/releases' "{ + \"causalHash\": \"${causalHash}\", + \"major\": 1, + \"minor\": 2, + \"patch\": 3 +}" + +echo 'check-indexed' +# We have to wait for it to be indexed +for i in {1..10}; do + if fetch_data "$transcript_user" GET 'check-indexed' '/search-names?query=const' | jq -e '(. | length) > 0' 2>/dev/null >/dev/null; then + echo 'Found definition search results, continuing...'; + break; + # If we're on the last iteration fail the transcript + elif [[ "$i" -ge 10 ]] then + echo 'Failed to find any definition search results before timeout.'; + exit 1; + fi + sleep 3; +done + +# Name searches +fetch "$transcript_user" GET 'name-search-suffix' '/search-names?query=const' +fetch "$transcript_user" GET 'name-search-prefix' '/search-names?query=Func' + +# Type searches +# "b -> a -> a" +fetch "$transcript_user" GET 'type-var-search' '/search-definitions?query=b%20-%3E%20a%20-%3E%20a' + +# (a -> b) -> List a -> List b +fetch "$transcript_user" GET 'complex-type-mention-search' '/search-definitions?query=(a%20-%3E%20b)%20-%3E%20List%20a%20-%3E%20List%20b' diff --git a/transcripts/share-apis/search/type-var-search.json b/transcripts/share-apis/search/type-var-search.json new file mode 100644 index 0000000..4163433 --- /dev/null +++ b/transcripts/share-apis/search/type-var-search.json @@ -0,0 +1,221 @@ +{ + "body": { + "results": [ + { + "branchRef": "releases/1.2.3", + "definition": { + "displayName": "function.const", + "hash": "#20991ok5ht19nlsedaet8das0lq77c0hjjthlbmicvued7s733uhj7jmeao4is1mu380u98qlf1iosf15b8atsfl1mo4bv3kl3i7hdo", + "summary": { + "contents": [ + { + "annotation": { + "tag": "Var" + }, + "segment": "a" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "TypeOperator" + }, + "segment": "->" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "b" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "TypeOperator" + }, + "segment": "->" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "a" + } + ], + "tag": "UserObject" + }, + "tag": "Plain" + }, + "fqn": "function.const", + "kind": "term", + "projectRef": "@transcripts/search" + }, + { + "branchRef": "releases/1.2.3", + "definition": { + "displayName": "List.map", + "hash": "#53u6nne5tneggsh2ngr76khq4kfdpvuf4l7kv6sq39kr9hjvh3qg4midcc5b69qhjlfii3io7pe2rn1on7rr6h76qsmjbc66n2ivpeo", + "summary": { + "contents": [ + { + "annotation": null, + "segment": "(" + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "a" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "TypeOperator" + }, + "segment": "->" + }, + { + "annotation": { + "tag": "AbilityBraces" + }, + "segment": "{" + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "g" + }, + { + "annotation": { + "tag": "AbilityBraces" + }, + "segment": "}" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "b" + }, + { + "annotation": null, + "segment": ")" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "TypeOperator" + }, + "segment": "->" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "contents": "#22ffou62u91ks8av7bdmhq10jct5ulot40c84j0k3kfdrh5rj2o6a3ditsfpo6sv6mkde2p13um06mkrsdckudmeh6k4oa7v53887f8", + "tag": "TypeReference" + }, + "segment": "List" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "a" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "TypeOperator" + }, + "segment": "->" + }, + { + "annotation": { + "tag": "AbilityBraces" + }, + "segment": "{" + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "g" + }, + { + "annotation": { + "tag": "AbilityBraces" + }, + "segment": "}" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "contents": "#22ffou62u91ks8av7bdmhq10jct5ulot40c84j0k3kfdrh5rj2o6a3ditsfpo6sv6mkde2p13um06mkrsdckudmeh6k4oa7v53887f8", + "tag": "TypeReference" + }, + "segment": "List" + }, + { + "annotation": null, + "segment": " " + }, + { + "annotation": { + "tag": "Var" + }, + "segment": "b" + } + ], + "tag": "UserObject" + }, + "tag": "Plain" + }, + "fqn": "List.map", + "kind": "term", + "projectRef": "@transcripts/search" + } + ] + }, + "status": [ + { + "status_code": 200 + } + ] +} diff --git a/transcripts/transcript_helpers.sh b/transcripts/transcript_helpers.sh index 3602678..622f641 100755 --- a/transcripts/transcript_helpers.sh +++ b/transcripts/transcript_helpers.sh @@ -88,6 +88,22 @@ clean_for_transcript() { } fetch() { + testname="$3" + result_file="$(mktemp)" + status_code_file="$(mktemp)" + api_path="$4" + echo "${testname}" "${api_path}" + fetch_data "$@" 2> "${status_code_file}" | clean_for_transcript > "${result_file}" + # Try embedding the json response as-is, but if it's not valid json (e.g. it's an error message instead), embed it as a string. + jq --sort-keys -n --slurpfile status "${status_code_file}" --slurpfile body "${result_file}" '{"status": $status, "body": ($body | .[0])}' > "./$testname.json" 2> /dev/null || { + jq --sort-keys -n --slurpfile status "${status_code_file}" --rawfile body "${result_file}" '{"status": $status, "body": $body}' > "./$testname.json" + } +} + +# fetch which returns the result, +# stderr gets '{"status_code:xxx"}' +# stdout gets the body +fetch_data() { if [ "$#" -lt 4 ]; then echo "fetch requires at least 4 arguments: user_id, method, testname, api_path, [data]" >&2 exit 1 @@ -101,22 +117,16 @@ fetch() { result_file="$(mktemp)" status_code_file="$(mktemp)" - echo "${testname}" "${api_path}" case $method in GET) - curl --request "GET" -L -s --cookie "$cookie_jar" -H "Accept: application/json" -w '%{stderr} {"status_code":%{http_code}}' "$url" 2> "${status_code_file}" | clean_for_transcript > "${result_file}" + curl --request "GET" -L -s --cookie "$cookie_jar" -H "Accept: application/json" -w '%{stderr} {"status_code":%{http_code}}' "$url" ;; *) - curl --request "$method" -L -s --cookie "$cookie_jar" -H "Accept: application/json" -H "Content-Type: application/json" --data-raw "$data" -w '%{stderr} {"status_code":%{http_code}}' "$url" 2> "${status_code_file}" | clean_for_transcript > "${result_file}" + curl --request "$method" -L -s --cookie "$cookie_jar" -H "Accept: application/json" -H "Content-Type: application/json" --data-raw "$data" -w '%{stderr} {"status_code":%{http_code}}' "$url" ;; esac - # Try embedding the json response as-is, but if it's not valid json (e.g. it's an error message instead), embed it as a string. - jq --sort-keys -n --slurpfile status "${status_code_file}" --slurpfile body "${result_file}" '{"status": $status, "body": ($body | .[0])}' > "./$testname.json" 2> /dev/null || { - jq --sort-keys -n --slurpfile status "${status_code_file}" --rawfile body "${result_file}" '{"status": $status, "body": $body}' > "./$testname.json" - } } - # Credentials setup login_user_for_ucm() { From 15161e1543bbc06f42db0d578b2f78f48f7f14b0 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 25 Jul 2024 15:30:17 -0700 Subject: [PATCH 48/48] If user is provided but project query is empty, just list projects by the user --- src/Share/Postgres/Queries.hs | 16 ++++++++++++++++ ...r.json => project-search-with-only-user.json} | 0 ...oject-search-with-user-and-project-query.json | 15 +++++++++++++++ transcripts/share-apis/projects-flow/run.zsh | 5 ++++- 4 files changed, 35 insertions(+), 1 deletion(-) rename transcripts/share-apis/projects-flow/{project-search-with-user.json => project-search-with-only-user.json} (100%) create mode 100644 transcripts/share-apis/projects-flow/project-search-with-user-and-project-query.json diff --git a/src/Share/Postgres/Queries.hs b/src/Share/Postgres/Queries.hs index 18c6b71..6d8c90b 100644 --- a/src/Share/Postgres/Queries.hs +++ b/src/Share/Postgres/Queries.hs @@ -261,6 +261,22 @@ searchUsersByNameOrHandlePrefix (Query prefix) (Limit limit) = do -- The PG.queryListRows accepts strings as web search queries, see -- https://www.postgresql.org/docs/current/textsearch-controls.html searchProjects :: Maybe UserId -> Maybe UserId -> Query -> Limit -> PG.Transaction e [(Project, UserHandle)] +-- Don't search with an empty query +searchProjects _caller Nothing (Query "") _limit = pure [] +searchProjects caller (Just userId) (Query "") limit = do + -- If we have a userId filter but no query, just return all the projects owned by that user + -- which the caller has access to. + PG.queryListRows @(Project PG.:. PG.Only UserHandle) + [PG.sql| + SELECT p.id, p.owner_user_id, p.slug, p.summary, p.tags, p.private, p.created_at, p.updated_at, owner.handle + FROM projects p + JOIN users owner ON p.owner_user_id = owner.id + WHERE p.owner_user_id = #{userId} + AND (NOT p.private OR (#{caller} IS NOT NULL AND EXISTS (SELECT FROM accessible_private_projects WHERE user_id = #{caller} AND project_id = p.id))) + ORDER BY p.created_at DESC + LIMIT #{limit} + |] + <&> fmap \(project PG.:. PG.Only handle) -> (project, handle) searchProjects caller userIdFilter (Query query) limit = do let prefixQuery = query diff --git a/transcripts/share-apis/projects-flow/project-search-with-user.json b/transcripts/share-apis/projects-flow/project-search-with-only-user.json similarity index 100% rename from transcripts/share-apis/projects-flow/project-search-with-user.json rename to transcripts/share-apis/projects-flow/project-search-with-only-user.json diff --git a/transcripts/share-apis/projects-flow/project-search-with-user-and-project-query.json b/transcripts/share-apis/projects-flow/project-search-with-user-and-project-query.json new file mode 100644 index 0000000..d395ae3 --- /dev/null +++ b/transcripts/share-apis/projects-flow/project-search-with-user-and-project-query.json @@ -0,0 +1,15 @@ +{ + "body": [ + { + "projectRef": "@test/publictestproject", + "summary": "test project summary", + "tag": "Project", + "visibility": "public" + } + ], + "status": [ + { + "status_code": 200 + } + ] +} diff --git a/transcripts/share-apis/projects-flow/run.zsh b/transcripts/share-apis/projects-flow/run.zsh index 902da5d..b58084d 100755 --- a/transcripts/share-apis/projects-flow/run.zsh +++ b/transcripts/share-apis/projects-flow/run.zsh @@ -79,7 +79,10 @@ fetch "$transcript_user" GET project-catalog-get '/catalog' fetch "$transcript_user" GET project-search '/search?query=test' # Should filter project search by user if provided a full valid handle: -fetch "$transcript_user" GET project-search-with-user '/search?query=@test/public' +fetch "$transcript_user" GET project-search-with-user-and-project-query '/search?query=@test/public' + +# Should return all projects in a user if provided a full valid handle, but no project query: +fetch "$transcript_user" GET project-search-with-only-user '/search?query=@test/' # Transcript user should not find 'test' user's private project fetch "$transcript_user" GET project-search-inaccessible '/search?query=privatetestproject'