Skip to content

Commit 77ca8bc

Browse files
committed
wb: short-circuit hash timeline
1 parent 5165d49 commit 77ca8bc

File tree

4 files changed

+70
-42
lines changed

4 files changed

+70
-42
lines changed

bench/locli/src/Cardano/Analysis/BlockProp.hs

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
module Cardano.Analysis.BlockProp
1212
( summariseMultiBlockProp
1313
, MachView
14-
, buildForgeTimeline
1514
, buildMachViews
1615
, rebuildChain
1716
, blockProp
@@ -28,10 +27,9 @@ import Control.Arrow ((***), (&&&))
2827
import Data.Aeson (ToJSON(..), FromJSON(..))
2928
import Data.Bifunctor
3029
import Data.Function (on)
31-
import Data.List (dropWhileEnd, intercalate, partition)
30+
import Data.List (break, dropWhileEnd, intercalate, partition, span)
3231
import Data.Map.Strict (Map)
3332
import Data.Map.Strict qualified as Map
34-
import Data.Set qualified as Set
3533
import Data.Maybe (catMaybes, mapMaybe, isNothing)
3634
import Data.Set (Set)
3735
import Data.Set qualified as Set
@@ -290,13 +288,6 @@ beForgedAt :: BlockEvents -> UTCTime
290288
beForgedAt BlockEvents{beForge=BlockForge{..}} =
291289
bfForged `afterSlot` bfSlotStart
292290

293-
buildForgeTimeline :: [(JsonLogfile, [LogObject])] -> IO [LogObject]
294-
buildForgeTimeline los = do
295-
loAllForges <- concat <$> mapConcurrentlyPure (filter match . snd) los
296-
pure $! sortOn loAt loAllForges
297-
where match LogObject{loBody = LOBlockForged{}} = True
298-
match _ = False
299-
300291
buildMachViews :: Run -> [(JsonLogfile, [LogObject])] -> IO [(JsonLogfile, MachView)]
301292
buildMachViews run = mapConcurrentlyPure (fst &&& blockEventMapsFromLogObjects run)
302293

@@ -844,15 +835,29 @@ collectEventErrors mbe phases =
844835
]
845836

846837
-- | Expects a log object stream of or including block forges.
847-
-- Returns those log objects where block hash doesn't reference a known forge.
848-
checkAllForgersKnown :: [LogObject] -> [LogObject]
849-
checkAllForgersKnown = go (Set.singleton $ Hash "GenesisHash")
838+
-- Returns the first log object + hash that we don't know a forge for.
839+
checkAllForgersKnown :: [LogObject] -> Maybe (LogObject, Hash)
840+
checkAllForgersKnown logobjs =
841+
getFirst $ First pass1 <> First pass2
850842
where
851-
go forged = \case
852-
[] -> []
853-
lo@LogObject{loBody = LOBlockForged{..}}:los ->
854-
let next = go (loBlock `Set.insert` forged)
855-
in if loPrev `Set.member` forged
856-
then next los
857-
else lo : next los
858-
lo:_ -> error $ "checkAllForgersKnown: unexpected LogObject: " ++ show lo
843+
(pass1, forged) = go1 (Set.singleton $ Hash "GenesisHash") logobjs
844+
pass2 = go2 forged logobjs
845+
846+
go1 forged = \case
847+
[] -> (Nothing, forged)
848+
lo@LogObject{loBody}:los -> case loBody of
849+
LOBlockForged{..}
850+
| loPrev `Set.member` forged -> go1 (loBlock `Set.insert` forged) los
851+
| otherwise -> (Just (lo, loPrev), forged)
852+
_ -> go1 forged los
853+
854+
go2 forged = \case
855+
[] -> Nothing
856+
lo@LogObject{loBody}:los -> case loBody of
857+
LOChainSyncClientSeenHeader{loBlock}
858+
| loBlock `Set.member` forged -> go2 forged los
859+
| otherwise -> Just (lo, loBlock)
860+
LOChainSyncServerSendHeader{loBlock}
861+
| loBlock `Set.member` forged -> go2 forged los
862+
| otherwise -> Just (lo, loBlock)
863+
_ -> go2 forged los

bench/locli/src/Cardano/Command.hs

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import Data.Tuple.Extra (both)
1919
import Options.Applicative
2020
import Options.Applicative qualified as Opt
2121

22+
import System.Directory (doesFileExist)
2223
import System.FilePath
2324
import System.Posix.Files qualified as IO
2425

@@ -55,7 +56,7 @@ data ChainCommand
5556
| Unlog (JsonInputFile (RunLogs ())) Bool (Maybe [LOAnyType])
5657
| DumpLogObjects
5758

58-
| BuildForgeTimeline
59+
| ValidateHashTimeline (JsonInputFile [LogObject])
5960

6061
| BuildMachViews
6162
| DumpMachViews
@@ -132,6 +133,9 @@ parseChainCommand =
132133
)
133134
, op "dump-logobjects" "Dump lifted log object streams, alongside input files"
134135
(DumpLogObjects & pure)
136+
, op "hash-timeline" "Quickly validate timeline by hashes"
137+
(ValidateHashTimeline
138+
<$> optJsonInputFile "timeline" "Hash timeline (JSON from prepare step)")
135139
]) <|>
136140

137141
subparser (mconcat [ commandGroup "Block propagation: machine views"
@@ -146,8 +150,6 @@ parseChainCommand =
146150
]) <|>
147151

148152
subparser (mconcat [ commandGroup "Block propagation"
149-
, op "forge-timeline" "Validate timeline of all forge events"
150-
(BuildForgeTimeline & pure)
151153
, op "rebuild-chain" "Rebuild chain"
152154
(RebuildChain
153155
<$> many
@@ -447,19 +449,22 @@ runChainCommand _ c@DumpLogObjects = missingCommandData c
447449

448450
-- runChainCommand s c@(ReadMachViews _ _) -- () -> [(JsonLogfile, MachView)]
449451

450-
runChainCommand s@State{sRunLogs=Just (rlLogs -> objs)}
451-
c@BuildForgeTimeline = do
452-
progress "machviews" (Q $ printf "gathering forges from %d machines" $ length objs)
453-
allForges <- buildForgeTimeline objs & liftIO
454-
case checkAllForgersKnown allForges of
455-
[] -> progress "machviews" (Q $ printf "all forgers known")
456-
xs -> throwE $ CommandError c $ LT.toStrict $ LT.unlines $
457-
"unknown forger for previous block hash of:" : map Aeson.encodeToLazyText xs
452+
runChainCommand s
453+
c@(ValidateHashTimeline timelineJson) = do
454+
progress "logs" (Q $ printf "validating hash timeline")
455+
let f = unJsonInputFile timelineJson
456+
hashTimeline <- liftIO $
457+
doesFileExist f >>= bool
458+
(return [])
459+
(readLogObjectStream f False Nothing)
460+
case (hashTimeline, checkAllForgersKnown hashTimeline) of
461+
([], _) -> progress "logs" (Q $ printf "%s not found - skipping" f)
462+
(_, Nothing) -> progress "logs" (Q $ printf "all forgers known")
463+
(_, Just (x, h)) -> throwE $ CommandError c $
464+
"unknown forger for block hash " <> (toText . unHash) h
465+
<> " in:\n" <> LT.toStrict (Aeson.encodeToLazyText x)
458466
pure s
459467

460-
runChainCommand _ c@BuildForgeTimeline = missingCommandData c
461-
["lifted logobjects"]
462-
463468
runChainCommand s@State{sRun=Just run, sRunLogs=Just (rlLogs -> objs)}
464469
BuildMachViews = do
465470
progress "machviews" (Q $ printf "building %d machviews" $ length objs)

bench/locli/src/Cardano/Unlog/LogObject.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ module Cardano.Unlog.LogObject
2525
, logObjectStreamInterpreterKeys
2626
, LOBody (..)
2727
, LOAnyType (..)
28+
, readLogObjectStream
2829
, textRefEquals
2930
)
3031
where
@@ -65,6 +66,7 @@ data TextRef
6566
deriving Generic
6667
deriving anyclass NFData
6768

69+
{-# NOINLINE lookupTextRef #-}
6870
lookupTextRef :: Int -> Text
6971
lookupTextRef ref = Map.findWithDefault Text.empty ref dict
7072
where

nix/workbench/analyse/analyse.sh

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -221,9 +221,9 @@ EOF
221221

222222
standard | full | std )
223223
local script=(
224+
hash-timeline
224225
logs $(test -n "$dump_logobjects" && echo 'dump-logobjects')
225226
read-context
226-
forge-timeline
227227

228228
build-mach-views $(test -n "$dump_machviews" && echo 'dump-mach-views')
229229
rebuild-chain
@@ -428,9 +428,9 @@ EOF
428428
vq=("${vp[@]/#read-summaries/ 'read-summaries' --summary \"$adir\"/summary.json }")
429429
vr=("${vq[@]/#summary-json/ 'render-summary' --json \"$adir\"/summary.json }")
430430
vs=("${vr[@]/#summary-report/ 'render-summary' --org-report \"$adir\"/summary.org ${locli_render[*]}}")
431-
vt=("${vs[@]/#forge-timeline/ 'forge-timeline'}")
431+
vt=("${vs[@]/#hash-timeline/ 'hash-timeline' --timeline \"$adir\"/hash-timeline.json }")
432432
local ops_final=()
433-
for v in "${vs[@]}"
433+
for v in "${vt[@]}"
434434
do eval ops_final+=($v); done
435435

436436
call_locli "$rtsmode" "${ops_final[@]}"
@@ -439,6 +439,7 @@ EOF
439439
fgrep -v -e '.flt.json' \
440440
-e '.logobjs.json' \
441441
-e 'chain.json' \
442+
-e 'hash-timeline.json' \
442443
-e 'log-manifest.json' \
443444
-e 'mach-views.json' \
444445
-e 'prof.json' \
@@ -573,7 +574,7 @@ EOF
573574
fi
574575

575576
if test ${#remanifest_reasons[*]} = 0
576-
then progress "analyse" "log manifest exists and is up to date"
577+
then progress "analyse" "log manifest up to date for raw logs"
577578
else progress "analyse" "assembling log manifest: ${remanifest_reasons[*]}"
578579
echo '{}' > $run_logs
579580
time {
@@ -622,6 +623,7 @@ EOF
622623
done
623624
wait
624625

626+
# in case consolidated logs have to be truncated, we document the actual timestamps from the raw logs
625627
for mach in $(jq_tolist '.rlHostLogs | keys' $run_logs)
626628
do jq_fmutate "$run_logs" '
627629
.rlHostLogs["'"$mach"'"].hlRawFirstAt = '"$(cat $adir/logs-$mach.flt.json | head -n1 | jq .at)"'
@@ -631,6 +633,7 @@ EOF
631633
}
632634
fi
633635

636+
progress "analyse" "log manifest updating for consolidated logs"
634637
for mach in $(jq_tolist '.rlHostLogs | keys' $run_logs)
635638
do jq_fmutate "$run_logs" '
636639
.rlHostLogs[$mach].hlRawSha256 = $raw_sha256
@@ -643,16 +646,29 @@ EOF
643646
- ($keypairs | map (.[1])) # old tracing .kinds
644647
- ["", "unknown0", "unknown1"]
645648
| unique)
646-
| .rlHostLogs[$mach].hlFilteredSha256 = $filtered_sha256
647649
' --sort-keys \
648650
--arg mach $mach \
649651
--rawfile raw_sha256 "$adir"/logs-$mach.sha256 \
650-
--arg filtered_sha256 $(sha256sum < $adir/logs-$mach.flt.json | \
651-
cut -d' ' -f1 | xargs echo -n) \
652652
--slurpfile freqs "$adir"/logs-$mach.tracefreq.json \
653653
--rawfile keys $keyfile
654654
done
655655

656+
local ht_json=$adir/hash-timeline.json
657+
if test "$(ls 2>/dev/null --sort=time $adir/logs-*.flt.json $ht_json | head -n1)" = "$ht_json"
658+
then progress "analyse" "hash timeline up to date"
659+
else progress "analyse" "building hash timeline"
660+
grep -h 'TraceForgedBlock\|DownloadedHeader' $adir/logs-*.flt.json | sort > $ht_json
661+
662+
# skip checksumming consolidated logs for now, to facilitate fast truncation of long runs
663+
#for mach in $(jq_tolist '.rlHostLogs | keys' $run_logs)
664+
#do jq_fmutate "$run_logs" '
665+
# .rlHostLogs[$mach].hlFilteredSha256 = $filtered_sha256
666+
# ' --arg mach $mach \
667+
# --arg filtered_sha256 $(sha256sum < $adir/logs-$mach.flt.json | \
668+
# cut -d' ' -f1 | xargs echo -n)
669+
#done
670+
fi
671+
656672
jq_fmutate "$run_logs" '
657673
.rlMissingTraces =
658674
( .rlHostLogs

0 commit comments

Comments
 (0)