diff --git a/sql/2025-01-31_dependencies-of-causal.sql b/sql/2025-01-31_dependencies-of-causal.sql new file mode 100644 index 0000000..458183b --- /dev/null +++ b/sql/2025-01-31_dependencies-of-causal.sql @@ -0,0 +1,131 @@ +-- Takes a causal_id and returns a table of ALL hashes which are dependencies of that causal. +CREATE OR REPLACE FUNCTION dependencies_of_causals(the_causal_ids INTEGER[]) RETURNS TABLE (hash TEXT) AS $$ + WITH RECURSIVE all_causals(causal_id, causal_hash, causal_namespace_hash_id) AS ( + -- Base causal + SELECT causal.id, causal.hash, causal.namespace_hash_id + FROM UNNEST(the_causal_ids) AS causal_id + JOIN causals causal ON causal.id = causal_id + UNION + -- This nested CTE is required because RECURSIVE CTEs can't refer + -- to the recursive table more than once. + -- I don't fully understand why or how this works, but it does + ( WITH rec AS ( + SELECT tc.causal_id, tc.causal_namespace_hash_id + FROM all_causals tc + ) + SELECT ancestor_causal.id, ancestor_causal.hash, ancestor_causal.namespace_hash_id + FROM causal_ancestors ca + JOIN rec tc ON ca.causal_id = tc.causal_id + JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id + UNION + SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id + FROM rec tc + JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id + JOIN causals child_causal ON nc.child_causal_id = child_causal.id + ) + ), all_namespaces(namespace_hash_id, namespace_hash) AS ( + SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash + FROM all_causals tc + JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id + ), all_patches(patch_id, patch_hash) AS ( + SELECT DISTINCT patch.id, patch.hash + FROM all_namespaces an + JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id + JOIN patches patch ON np.patch_id = patch.id + ), + -- term components to start transitively joining dependencies to + base_term_components(component_hash_id) AS ( + SELECT DISTINCT term.component_hash_id + FROM all_namespaces an + JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN terms term ON nt.term_id = term.id + UNION + SELECT DISTINCT term.component_hash_id + FROM all_patches ap + JOIN patch_term_mappings ptm ON ap.patch_id = ptm.patch_id + JOIN terms term ON ptm.to_term_id = term.id + UNION + -- term metadata + SELECT DISTINCT term.component_hash_id + FROM all_namespaces an + JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN namespace_term_metadata meta ON nt.id = meta.named_term + JOIN terms term ON meta.metadata_term_id = term.id + UNION + -- type metadata + SELECT DISTINCT term.component_hash_id + FROM all_namespaces an + JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN namespace_type_metadata meta ON nt.id = meta.named_type + JOIN terms term ON meta.metadata_term_id = term.id + ), + -- type components to start transitively joining dependencies to + base_type_components(component_hash_id) AS ( + SELECT DISTINCT typ.component_hash_id + FROM all_namespaces an + JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN types typ ON nt.type_id = typ.id + UNION + SELECT DISTINCT typ.component_hash_id + FROM all_namespaces an + JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN constructors con ON nt.constructor_id = con.id + JOIN types typ ON con.type_id = typ.id + UNION + SELECT DISTINCT typ.component_hash_id + FROM all_patches ap + JOIN patch_type_mappings ptm ON ap.patch_id = ptm.patch_id + JOIN types typ ON ptm.to_type_id = typ.id + UNION + SELECT DISTINCT typ.component_hash_id + FROM all_patches ap + JOIN patch_constructor_mappings pcm ON ap.patch_id = pcm.patch_id + JOIN constructors con ON pcm.to_constructor_id = con.id + JOIN types typ ON con.type_id = typ.id + ), + -- All the dependencies we join in transitively from the known term & type components we depend on. + all_components(component_hash_id) AS ( + SELECT DISTINCT btc.component_hash_id + FROM base_term_components btc + UNION + SELECT DISTINCT btc.component_hash_id + FROM base_type_components btc + UNION + ( WITH rec AS ( + SELECT DISTINCT ac.component_hash_id + FROM all_components ac + ) + -- recursively union in term dependencies + SELECT DISTINCT ref.component_hash_id + FROM rec atc + -- This joins in ALL the terms from the component, not just the one that caused the dependency on the + -- component + JOIN terms term ON atc.component_hash_id = term.component_hash_id + JOIN term_local_component_references ref ON term.id = ref.term_id + UNION + -- recursively union in type dependencies + SELECT DISTINCT ref.component_hash_id + FROM rec atc + -- This joins in ALL the types from the component, not just the one that caused the dependency on the + -- component + JOIN types typ ON atc.component_hash_id = typ.component_hash_id + JOIN type_local_component_references ref ON typ.id = ref.type_id + ) + ) + (SELECT ch.base32 AS hash + FROM all_components ac + JOIN component_hashes ch ON ac.component_hash_id = ch.id + ) + UNION ALL + (SELECT ap.patch_hash AS hash + FROM all_patches ap + ) + UNION ALL + (SELECT an.namespace_hash AS hash + FROM all_namespaces an + ) + UNION ALL + (SELECT ac.causal_hash AS hash + FROM all_causals ac + ) +$$ LANGUAGE SQL; diff --git a/src/Share/Web/UCM/SyncV2/Impl.hs b/src/Share/Web/UCM/SyncV2/Impl.hs index dfef906..87d7e2c 100644 --- a/src/Share/Web/UCM/SyncV2/Impl.hs +++ b/src/Share/Web/UCM/SyncV2/Impl.hs @@ -37,6 +37,7 @@ import Share.Web.UCM.Sync.HashJWT qualified as HashJWT import Share.Web.UCM.SyncV2.Queries qualified as SSQ import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..)) import U.Codebase.Sqlite.Orphans () +import Unison.Debug qualified as Debug import Unison.Hash32 (Hash32) import Unison.Share.API.Hash (HashJWTClaims (..)) import Unison.SyncV2.API qualified as SyncV2 @@ -108,10 +109,14 @@ causalDependenciesStreamImpl mayCallerUserId (SyncV2.CausalDependenciesRequest { codebase <- codebaseForBranchRef branchRef q <- UnliftIO.atomically $ STM.newTBMQueue 10 streamResults <- lift $ UnliftIO.toIO do + Logging.logInfoText "Starting causal dependencies stream" Codebase.runCodebaseTransaction codebase $ do (_bhId, causalId) <- CausalQ.expectCausalIdsOf id (hash32ToCausalHash causalHash) + Debug.debugLogM Debug.Temp "Getting cursor" cursor <- SSQ.spineAndLibDependenciesOfCausalCursor causalId + Debug.debugLogM Debug.Temp "Folding cursor" Cursor.foldBatched cursor batchSize \batch -> do + Debug.debugLogM Debug.Temp "Got batch" let depBatch = batch <&> \(causalHash, isCausalSpine, isLibRoot) -> let dependencyType = case (isCausalSpine, isLibRoot) of diff --git a/src/Share/Web/UCM/SyncV2/Queries.hs b/src/Share/Web/UCM/SyncV2/Queries.hs index c78ac2f..ca92b01 100644 --- a/src/Share/Web/UCM/SyncV2/Queries.hs +++ b/src/Share/Web/UCM/SyncV2/Queries.hs @@ -13,6 +13,7 @@ import Share.Postgres.IDs import Share.Prelude import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..)) import U.Codebase.Sqlite.TempEntity (TempEntity) +import Unison.Debug qualified as Debug import Unison.Hash32 (Hash32) import Unison.SyncV2.Types (CBORBytes) @@ -186,17 +187,23 @@ import Unison.SyncV2.Types (CBORBytes) allSerializedDependenciesOfCausalCursor :: CausalId -> Set CausalHash -> CodebaseM e (PGCursor (CBORBytes TempEntity, Hash32)) allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do ownerUserId <- asks codebaseOwner - execute_ [sql| CREATE TEMP TABLE except_causals ( causal_id INTEGER NOT NULL ) ON COMMIT DROP |] + Debug.debugLogM Debug.Temp "created except_hashes temp table." + -- Create a temp table for storing the dependencies we know the calling client already has. + execute_ [sql| CREATE TEMP TABLE except_hashes ( hash TEXT NOT NULL PRIMARY KEY ) ON COMMIT DROP |] + Debug.debugLogM Debug.Temp "filling in except_hashes temp table." execute_ - [sql| INSERT INTO except_causals (causal_id) - WITH the_causal_hashes(hash) AS (SELECT * FROM ^{singleColumnTable (toList exceptCausalHashes)}) - SELECT c.id + [sql| + WITH the_causal_hashes(hash) AS ( + SELECT * FROM ^{singleColumnTable (toList exceptCausalHashes)} + ), known_causal_ids(causal_id) AS ( + SELECT c.id FROM the_causal_hashes tch JOIN causals c ON tch.hash = c.hash + ) INSERT INTO except_hashes(hash) + SELECT DISTINCT deps.hash FROM dependencies_of_causals((SELECT ARRAY_AGG(kci.causal_id) FROM known_causal_ids kci)) AS deps + ON CONFLICT DO NOTHING |] - execute_ - [sql| - |] + Debug.debugLogM Debug.Temp "Running cursor query" cursor <- PGCursor.newRowCursor "serialized_entities" @@ -206,7 +213,7 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do FROM causals causal WHERE causal.id = #{cid} AND EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = #{ownerUserId} AND co.causal_id = causal.id) - AND NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = causal.id) + AND NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = causal.id) UNION -- This nested CTE is required because RECURSIVE CTEs can't refer -- to the recursive table more than once. @@ -218,23 +225,25 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do FROM causal_ancestors ca JOIN rec tc ON ca.causal_id = tc.causal_id JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id - WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = ancestor_causal.id) + WHERE NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = ancestor_causal.id) UNION SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id FROM rec tc JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id JOIN causals child_causal ON nc.child_causal_id = child_causal.id - WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = child_causal.id) + WHERE NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = child_causal.id) ) ), all_namespaces(namespace_hash_id, namespace_hash) AS ( SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash FROM transitive_causals tc JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id + WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = bh.base32) ), all_patches(patch_id, patch_hash) AS ( SELECT DISTINCT patch.id, patch.hash FROM all_namespaces an JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id JOIN patches patch ON np.patch_id = patch.id + WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = patch.hash) ), -- term components to start transitively joining dependencies to base_term_components(component_hash_id) AS ( @@ -321,6 +330,9 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do JOIN serialized_components sc ON sc.user_id = #{ownerUserId} AND tc.component_hash_id = sc.component_hash_id JOIN bytes ON sc.bytes_id = bytes.id JOIN component_hashes ch ON tc.component_hash_id = ch.id + WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = ch.base32) + -- TODO: Filter out components we know we already have, + -- We should do this earlier in the process if possible. ) UNION ALL (SELECT bytes.bytes, ap.patch_hash