Skip to content

Commit

Permalink
Exclude dependencies of known hashes as well - This is hella slow
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisPenner committed Feb 3, 2025
1 parent 2bc1d49 commit af361db
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 10 deletions.
131 changes: 131 additions & 0 deletions sql/2025-01-31_dependencies-of-causal.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
-- Takes a causal_id and returns a table of ALL hashes which are dependencies of that causal.
CREATE OR REPLACE FUNCTION dependencies_of_causals(the_causal_ids INTEGER[]) RETURNS TABLE (hash TEXT) AS $$
WITH RECURSIVE all_causals(causal_id, causal_hash, causal_namespace_hash_id) AS (
-- Base causal
SELECT causal.id, causal.hash, causal.namespace_hash_id
FROM UNNEST(the_causal_ids) AS causal_id
JOIN causals causal ON causal.id = causal_id
UNION
-- This nested CTE is required because RECURSIVE CTEs can't refer
-- to the recursive table more than once.
-- I don't fully understand why or how this works, but it does
( WITH rec AS (
SELECT tc.causal_id, tc.causal_namespace_hash_id
FROM all_causals tc
)
SELECT ancestor_causal.id, ancestor_causal.hash, ancestor_causal.namespace_hash_id
FROM causal_ancestors ca
JOIN rec tc ON ca.causal_id = tc.causal_id
JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id
UNION
SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id
FROM rec tc
JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id
JOIN causals child_causal ON nc.child_causal_id = child_causal.id
)
), all_namespaces(namespace_hash_id, namespace_hash) AS (
SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash
FROM all_causals tc
JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id
), all_patches(patch_id, patch_hash) AS (
SELECT DISTINCT patch.id, patch.hash
FROM all_namespaces an
JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id
JOIN patches patch ON np.patch_id = patch.id
),
-- term components to start transitively joining dependencies to
base_term_components(component_hash_id) AS (
SELECT DISTINCT term.component_hash_id
FROM all_namespaces an
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
JOIN terms term ON nt.term_id = term.id
UNION
SELECT DISTINCT term.component_hash_id
FROM all_patches ap
JOIN patch_term_mappings ptm ON ap.patch_id = ptm.patch_id
JOIN terms term ON ptm.to_term_id = term.id
UNION
-- term metadata
SELECT DISTINCT term.component_hash_id
FROM all_namespaces an
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
JOIN namespace_term_metadata meta ON nt.id = meta.named_term
JOIN terms term ON meta.metadata_term_id = term.id
UNION
-- type metadata
SELECT DISTINCT term.component_hash_id
FROM all_namespaces an
JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id
JOIN namespace_type_metadata meta ON nt.id = meta.named_type
JOIN terms term ON meta.metadata_term_id = term.id
),
-- type components to start transitively joining dependencies to
base_type_components(component_hash_id) AS (
SELECT DISTINCT typ.component_hash_id
FROM all_namespaces an
JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id
JOIN types typ ON nt.type_id = typ.id
UNION
SELECT DISTINCT typ.component_hash_id
FROM all_namespaces an
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
JOIN constructors con ON nt.constructor_id = con.id
JOIN types typ ON con.type_id = typ.id
UNION
SELECT DISTINCT typ.component_hash_id
FROM all_patches ap
JOIN patch_type_mappings ptm ON ap.patch_id = ptm.patch_id
JOIN types typ ON ptm.to_type_id = typ.id
UNION
SELECT DISTINCT typ.component_hash_id
FROM all_patches ap
JOIN patch_constructor_mappings pcm ON ap.patch_id = pcm.patch_id
JOIN constructors con ON pcm.to_constructor_id = con.id
JOIN types typ ON con.type_id = typ.id
),
-- All the dependencies we join in transitively from the known term & type components we depend on.
all_components(component_hash_id) AS (
SELECT DISTINCT btc.component_hash_id
FROM base_term_components btc
UNION
SELECT DISTINCT btc.component_hash_id
FROM base_type_components btc
UNION
( WITH rec AS (
SELECT DISTINCT ac.component_hash_id
FROM all_components ac
)
-- recursively union in term dependencies
SELECT DISTINCT ref.component_hash_id
FROM rec atc
-- This joins in ALL the terms from the component, not just the one that caused the dependency on the
-- component
JOIN terms term ON atc.component_hash_id = term.component_hash_id
JOIN term_local_component_references ref ON term.id = ref.term_id
UNION
-- recursively union in type dependencies
SELECT DISTINCT ref.component_hash_id
FROM rec atc
-- This joins in ALL the types from the component, not just the one that caused the dependency on the
-- component
JOIN types typ ON atc.component_hash_id = typ.component_hash_id
JOIN type_local_component_references ref ON typ.id = ref.type_id
)
)
(SELECT ch.base32 AS hash
FROM all_components ac
JOIN component_hashes ch ON ac.component_hash_id = ch.id
)
UNION ALL
(SELECT ap.patch_hash AS hash
FROM all_patches ap
)
UNION ALL
(SELECT an.namespace_hash AS hash
FROM all_namespaces an
)
UNION ALL
(SELECT ac.causal_hash AS hash
FROM all_causals ac
)
$$ LANGUAGE SQL;
5 changes: 5 additions & 0 deletions src/Share/Web/UCM/SyncV2/Impl.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import Share.Web.UCM.Sync.HashJWT qualified as HashJWT
import Share.Web.UCM.SyncV2.Queries qualified as SSQ
import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..))
import U.Codebase.Sqlite.Orphans ()
import Unison.Debug qualified as Debug
import Unison.Hash32 (Hash32)
import Unison.Share.API.Hash (HashJWTClaims (..))
import Unison.SyncV2.API qualified as SyncV2
Expand Down Expand Up @@ -108,10 +109,14 @@ causalDependenciesStreamImpl mayCallerUserId (SyncV2.CausalDependenciesRequest {
codebase <- codebaseForBranchRef branchRef
q <- UnliftIO.atomically $ STM.newTBMQueue 10
streamResults <- lift $ UnliftIO.toIO do
Logging.logInfoText "Starting causal dependencies stream"
Codebase.runCodebaseTransaction codebase $ do
(_bhId, causalId) <- CausalQ.expectCausalIdsOf id (hash32ToCausalHash causalHash)
Debug.debugLogM Debug.Temp "Getting cursor"
cursor <- SSQ.spineAndLibDependenciesOfCausalCursor causalId
Debug.debugLogM Debug.Temp "Folding cursor"
Cursor.foldBatched cursor batchSize \batch -> do
Debug.debugLogM Debug.Temp "Got batch"
let depBatch =
batch <&> \(causalHash, isCausalSpine, isLibRoot) ->
let dependencyType = case (isCausalSpine, isLibRoot) of
Expand Down
32 changes: 22 additions & 10 deletions src/Share/Web/UCM/SyncV2/Queries.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import Share.Postgres.IDs
import Share.Prelude
import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..))
import U.Codebase.Sqlite.TempEntity (TempEntity)
import Unison.Debug qualified as Debug
import Unison.Hash32 (Hash32)
import Unison.SyncV2.Types (CBORBytes)

Expand Down Expand Up @@ -186,17 +187,23 @@ import Unison.SyncV2.Types (CBORBytes)
allSerializedDependenciesOfCausalCursor :: CausalId -> Set CausalHash -> CodebaseM e (PGCursor (CBORBytes TempEntity, Hash32))
allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
ownerUserId <- asks codebaseOwner
execute_ [sql| CREATE TEMP TABLE except_causals ( causal_id INTEGER NOT NULL ) ON COMMIT DROP |]
Debug.debugLogM Debug.Temp "created except_hashes temp table."
-- Create a temp table for storing the dependencies we know the calling client already has.
execute_ [sql| CREATE TEMP TABLE except_hashes ( hash TEXT NOT NULL PRIMARY KEY ) ON COMMIT DROP |]
Debug.debugLogM Debug.Temp "filling in except_hashes temp table."
execute_
[sql| INSERT INTO except_causals (causal_id)
WITH the_causal_hashes(hash) AS (SELECT * FROM ^{singleColumnTable (toList exceptCausalHashes)})
SELECT c.id
[sql|
WITH the_causal_hashes(hash) AS (
SELECT * FROM ^{singleColumnTable (toList exceptCausalHashes)}
), known_causal_ids(causal_id) AS (
SELECT c.id
FROM the_causal_hashes tch
JOIN causals c ON tch.hash = c.hash
) INSERT INTO except_hashes(hash)
SELECT DISTINCT deps.hash FROM dependencies_of_causals((SELECT ARRAY_AGG(kci.causal_id) FROM known_causal_ids kci)) AS deps
ON CONFLICT DO NOTHING
|]
execute_
[sql|
|]
Debug.debugLogM Debug.Temp "Running cursor query"
cursor <-
PGCursor.newRowCursor
"serialized_entities"
Expand All @@ -206,7 +213,7 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
FROM causals causal
WHERE causal.id = #{cid}
AND EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = #{ownerUserId} AND co.causal_id = causal.id)
AND NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = causal.id)
AND NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = causal.id)
UNION
-- This nested CTE is required because RECURSIVE CTEs can't refer
-- to the recursive table more than once.
Expand All @@ -218,23 +225,25 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
FROM causal_ancestors ca
JOIN rec tc ON ca.causal_id = tc.causal_id
JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id
WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = ancestor_causal.id)
WHERE NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = ancestor_causal.id)
UNION
SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id
FROM rec tc
JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id
JOIN causals child_causal ON nc.child_causal_id = child_causal.id
WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = child_causal.id)
WHERE NOT EXISTS (SELECT FROM except_hashes ec WHERE ec.causal_id = child_causal.id)
)
), all_namespaces(namespace_hash_id, namespace_hash) AS (
SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash
FROM transitive_causals tc
JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = bh.base32)
), all_patches(patch_id, patch_hash) AS (
SELECT DISTINCT patch.id, patch.hash
FROM all_namespaces an
JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id
JOIN patches patch ON np.patch_id = patch.id
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = patch.hash)
),
-- term components to start transitively joining dependencies to
base_term_components(component_hash_id) AS (
Expand Down Expand Up @@ -321,6 +330,9 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do
JOIN serialized_components sc ON sc.user_id = #{ownerUserId} AND tc.component_hash_id = sc.component_hash_id
JOIN bytes ON sc.bytes_id = bytes.id
JOIN component_hashes ch ON tc.component_hash_id = ch.id
WHERE NOT EXISTS (SELECT FROM except_hashes eh WHERE eh.hash = ch.base32)
-- TODO: Filter out components we know we already have,
-- We should do this earlier in the process if possible.
)
UNION ALL
(SELECT bytes.bytes, ap.patch_hash
Expand Down

0 comments on commit af361db

Please sign in to comment.