Skip to content

Commit 3f50047

Browse files
authored
Merge pull request #5448 from input-output-hk/bench-master
Workbench | locli: analysis improvements and fixes
2 parents fa98c84 + 77ca8bc commit 3f50047

File tree

9 files changed

+163
-25
lines changed

9 files changed

+163
-25
lines changed

bench/locli/locli.cabal

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cabal-version: 3.0
22

33
name: locli
4-
version: 1.30
4+
version: 1.31
55
synopsis: Cardano log analysis CLI
66
description: Cardano log analysis CLI.
77
category: Cardano,
@@ -113,6 +113,7 @@ library
113113
, file-embed
114114
, filepath
115115
, fingertree
116+
, hashable
116117
, ghc
117118
, gnuplot
118119
, iohk-monitoring

bench/locli/src/Cardano/Analysis/API/Context.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import Cardano.Prelude
99
import Control.Monad (fail)
1010

1111
import Data.Aeson ( FromJSON (..), ToJSON (..), Value
12-
, withObject, object, (.:), (.:?), (.=))
12+
, withObject, object, (.:), (.:?), (.=), (.!=))
1313
import Data.Aeson.Key qualified as AE
1414
import Data.Aeson.KeyMap qualified as AE
1515
import Data.Aeson.Types qualified as AE
@@ -108,7 +108,7 @@ unknownComponent ciName = ComponentInfo
108108
instance FromJSON ComponentInfo where
109109
parseJSON = withObject "Component" $ \v -> do
110110
ciName <- v .: "name"
111-
ciCommit <- v .: "commit"
111+
ciCommit <- v .:? "commit" .!= Commit "unknown" -- workaround for commit hash missing from manifest
112112
ciBranch <- v .:? "branch"
113113
ciStatus <- v .:? "status"
114114
ciVersion <- v .: "version"

bench/locli/src/Cardano/Analysis/BlockProp.hs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ module Cardano.Analysis.BlockProp
1616
, blockProp
1717
, BlockPropError(..)
1818
, renderBlockPropError
19+
, checkAllForgersKnown
1920
)
2021
where
2122

@@ -26,7 +27,7 @@ import Control.Arrow ((***), (&&&))
2627
import Data.Aeson (ToJSON(..), FromJSON(..))
2728
import Data.Bifunctor
2829
import Data.Function (on)
29-
import Data.List (dropWhileEnd, intercalate, partition)
30+
import Data.List (break, dropWhileEnd, intercalate, partition, span)
3031
import Data.Map.Strict (Map)
3132
import Data.Map.Strict qualified as Map
3233
import Data.Maybe (catMaybes, mapMaybe, isNothing)
@@ -832,3 +833,31 @@ collectEventErrors mbe phases =
832833
, let neg = ((< 0) <$> proj) == SJust True
833834
, miss || neg
834835
]
836+
837+
-- | Expects a log object stream of or including block forges.
838+
-- Returns the first log object + hash that we don't know a forge for.
839+
checkAllForgersKnown :: [LogObject] -> Maybe (LogObject, Hash)
840+
checkAllForgersKnown logobjs =
841+
getFirst $ First pass1 <> First pass2
842+
where
843+
(pass1, forged) = go1 (Set.singleton $ Hash "GenesisHash") logobjs
844+
pass2 = go2 forged logobjs
845+
846+
go1 forged = \case
847+
[] -> (Nothing, forged)
848+
lo@LogObject{loBody}:los -> case loBody of
849+
LOBlockForged{..}
850+
| loPrev `Set.member` forged -> go1 (loBlock `Set.insert` forged) los
851+
| otherwise -> (Just (lo, loPrev), forged)
852+
_ -> go1 forged los
853+
854+
go2 forged = \case
855+
[] -> Nothing
856+
lo@LogObject{loBody}:los -> case loBody of
857+
LOChainSyncClientSeenHeader{loBlock}
858+
| loBlock `Set.member` forged -> go2 forged los
859+
| otherwise -> Just (lo, loBlock)
860+
LOChainSyncServerSendHeader{loBlock}
861+
| loBlock `Set.member` forged -> go2 forged los
862+
| otherwise -> Just (lo, loBlock)
863+
_ -> go2 forged los

bench/locli/src/Cardano/Analysis/MachPerf.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ timelineFromLogObjects run@Run{genesis} (f, xs') =
4141
$ foldl' (timelineStep run f) zeroTimelineAccum xs
4242
& (aRunScalars &&& reverse . aSlotStats)
4343
where
44-
xs = filter ((/= "DecodeError") . loKind) xs'
44+
xs = filter (not . (`textRefEquals` "DecodeError") . loKind) xs'
4545

4646
firstRelevantLogObjectTime :: UTCTime
4747
firstRelevantLogObjectTime = loAt (head xs) `max` systemStart genesis

bench/locli/src/Cardano/Analysis/Summary.hs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,16 +172,20 @@ computeSummary sumAnalysisTime
172172
where
173173
cdfLogLineRate = cdf stdCentiles lineRates
174174

175+
hostLogs =
176+
rlHostLogs
177+
& Map.elems
178+
175179
(,) logObjectsEmitted textLinesEmitted =
176-
rlHostLogs
177-
& Map.toList
178-
& fmap ((hlRawLogObjects &&& hlRawLines) . snd)
180+
hostLogs
181+
& fmap (hlRawLogObjects &&& hlRawLines)
179182
& unzip
183+
180184
objLists = rlLogs rl <&> snd
181185

182186
losFirsts, losLasts :: [UTCTime]
183-
losFirsts = objLists <&> loAt . Prelude.head
184-
losLasts = objLists <&> loAt . Prelude.last
187+
losFirsts = hostLogs <&> (\HostLogs{hlLogs, hlRawFirstAt} -> fromMaybe (loAt $ Prelude.head $ snd hlLogs) hlRawFirstAt)
188+
losLasts = hostLogs <&> (\HostLogs{hlLogs, hlRawLastAt} -> fromMaybe (loAt $ Prelude.last $ snd hlLogs) hlRawLastAt)
185189
runtimes :: [NominalDiffTime]
186190
runtimes = zipWith diffUTCTime losLasts losFirsts
187191
lineRates = zipWith (/) (textLinesEmitted <&> fromIntegral)

bench/locli/src/Cardano/Command.hs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import Data.Tuple.Extra (both)
1919
import Options.Applicative
2020
import Options.Applicative qualified as Opt
2121

22+
import System.Directory (doesFileExist)
2223
import System.FilePath
2324
import System.Posix.Files qualified as IO
2425

@@ -55,6 +56,8 @@ data ChainCommand
5556
| Unlog (JsonInputFile (RunLogs ())) Bool (Maybe [LOAnyType])
5657
| DumpLogObjects
5758

59+
| ValidateHashTimeline (JsonInputFile [LogObject])
60+
5861
| BuildMachViews
5962
| DumpMachViews
6063
| ReadMachViews [JsonLogfile]
@@ -130,6 +133,9 @@ parseChainCommand =
130133
)
131134
, op "dump-logobjects" "Dump lifted log object streams, alongside input files"
132135
(DumpLogObjects & pure)
136+
, op "hash-timeline" "Quickly validate timeline by hashes"
137+
(ValidateHashTimeline
138+
<$> optJsonInputFile "timeline" "Hash timeline (JSON from prepare step)")
133139
]) <|>
134140

135141
subparser (mconcat [ commandGroup "Block propagation: machine views"
@@ -443,6 +449,22 @@ runChainCommand _ c@DumpLogObjects = missingCommandData c
443449

444450
-- runChainCommand s c@(ReadMachViews _ _) -- () -> [(JsonLogfile, MachView)]
445451

452+
runChainCommand s
453+
c@(ValidateHashTimeline timelineJson) = do
454+
progress "logs" (Q $ printf "validating hash timeline")
455+
let f = unJsonInputFile timelineJson
456+
hashTimeline <- liftIO $
457+
doesFileExist f >>= bool
458+
(return [])
459+
(readLogObjectStream f False Nothing)
460+
case (hashTimeline, checkAllForgersKnown hashTimeline) of
461+
([], _) -> progress "logs" (Q $ printf "%s not found - skipping" f)
462+
(_, Nothing) -> progress "logs" (Q $ printf "all forgers known")
463+
(_, Just (x, h)) -> throwE $ CommandError c $
464+
"unknown forger for block hash " <> (toText . unHash) h
465+
<> " in:\n" <> LT.toStrict (Aeson.encodeToLazyText x)
466+
pure s
467+
446468
runChainCommand s@State{sRun=Just run, sRunLogs=Just (rlLogs -> objs)}
447469
BuildMachViews = do
448470
progress "machviews" (Q $ printf "building %d machviews" $ length objs)

bench/locli/src/Cardano/Unlog/LogObject.hs

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ module Cardano.Unlog.LogObject
2525
, logObjectStreamInterpreterKeys
2626
, LOBody (..)
2727
, LOAnyType (..)
28+
, readLogObjectStream
29+
, textRefEquals
2830
)
2931
where
3032

@@ -36,6 +38,7 @@ import qualified Data.Aeson.Key as Aeson
3638
import qualified Data.Aeson.KeyMap as KeyMap
3739
import Data.Aeson.Types (Parser)
3840
import qualified Data.ByteString.Lazy as LBS
41+
import Data.Hashable (hash)
3942
import qualified Data.Map.Strict as Map
4043
import qualified Data.Text as LText
4144
import Data.Text.Short (ShortText, fromText, toText)
@@ -54,6 +57,54 @@ import Cardano.Util
5457

5558
type Text = ShortText
5659

60+
-- | Us of the a TextRef replaces commonly expected string parses with references
61+
-- into a Map, reducing memory footprint - given that large runs can contain
62+
-- >25mio log objects.
63+
data TextRef
64+
= TextRef {-# UNPACK #-} !Int
65+
| TextLit {-# UNPACK #-} !Text
66+
deriving Generic
67+
deriving anyclass NFData
68+
69+
{-# NOINLINE lookupTextRef #-}
70+
lookupTextRef :: Int -> Text
71+
lookupTextRef ref = Map.findWithDefault Text.empty ref dict
72+
where
73+
dict = Map.fromList [(hash t, t) | t <- concat [allKeys, kinds, legacy]]
74+
kinds = map ("Cardano.Node." <>) allKeys
75+
legacy = map ("cardano.node." <>)
76+
[ "BlockFetchClient"
77+
, "BlockFetchServer"
78+
, "ChainDB"
79+
, "ChainSyncClient"
80+
, "ChainSyncHeaderServer"
81+
, "DnsSubscription"
82+
, "Forge"
83+
, "IpSubscription"
84+
, "LeadershipCheck"
85+
, "Mempool"
86+
, "resources"
87+
, "TxInbound"
88+
]
89+
allKeys =
90+
concatMap Map.keys [fst3 interpreters, snd3 interpreters, thd3 interpreters]
91+
& filter (not . Text.null)
92+
93+
toTextRef :: Text -> TextRef
94+
toTextRef t = let h = hash t in if Text.null (lookupTextRef h) then TextLit t else TextRef h
95+
96+
textRefEquals :: TextRef -> Text -> Bool
97+
textRefEquals (TextRef i) = (== lookupTextRef i)
98+
textRefEquals (TextLit t) = (== t)
99+
100+
instance Show TextRef where
101+
show (TextRef i) = show $ lookupTextRef i
102+
show (TextLit t) = show t
103+
104+
instance ToJSON TextRef where
105+
toJSON (TextRef i) = toJSON $ lookupTextRef i
106+
toJSON (TextLit t) = toJSON t
107+
57108
-- | Input data.
58109
data HostLogs a
59110
= HostLogs
@@ -65,6 +116,8 @@ data HostLogs a
65116
, hlLogs :: (JsonLogfile, a)
66117
, hlFilteredSha256 :: Hash
67118
, hlProfile :: [ProfileEntry I]
119+
, hlRawFirstAt :: Maybe UTCTime
120+
, hlRawLastAt :: Maybe UTCTime
68121
}
69122
deriving (Generic)
70123

@@ -128,7 +181,7 @@ readLogObjectStream f okDErr loAnyLimit =
128181
fmap (\bs ->
129182
AE.eitherDecode bs &
130183
either
131-
(LogObject zeroUTCTime "Cardano.Analysis.DecodeError" "DecodeError" "" (TId "0")
184+
(LogObject zeroUTCTime (TextLit "Cardano.Analysis.DecodeError") (TextLit "DecodeError") "" (TId "0")
132185
. LODecodeError (Text.fromByteString (LBS.toStrict bs)
133186
& fromMaybe "#<ERROR decoding input fromByteString>")
134187
. Text.fromText
@@ -143,8 +196,8 @@ readLogObjectStream f okDErr loAnyLimit =
143196
data LogObject
144197
= LogObject
145198
{ loAt :: !UTCTime
146-
, loNS :: !Text
147-
, loKind :: !Text
199+
, loNS :: !TextRef
200+
, loKind :: !TextRef
148201
, loHost :: !Host
149202
, loTid :: !TId
150203
, loBody :: !LOBody
@@ -348,6 +401,8 @@ interpreters = map3ple Map.fromList . unzip3 . fmap ent $
348401
map3ple :: (a -> b) -> (a,a,a) -> (b,b,b)
349402
map3ple f (x,y,z) = (f x, f y, f z)
350403

404+
405+
351406
logObjectStreamInterpreterKeysLegacy, logObjectStreamInterpreterKeys :: [Text]
352407
logObjectStreamInterpreterKeysLegacy =
353408
logObjectStreamInterpreterKeysLegacy1 <> logObjectStreamInterpreterKeysLegacy2
@@ -457,8 +512,8 @@ instance FromJSON LogObject where
457512
"The 'ns' field must be either a string, or a singleton-String vector, was: " <> show x
458513
LogObject
459514
<$> v .: "at"
460-
<*> pure ns
461-
<*> pure kind
515+
<*> pure (toTextRef ns)
516+
<*> pure (toTextRef kind)
462517
<*> v .: "host"
463518
<*> v .: "thread"
464519
<*> case Map.lookup ns (thd3 interpreters)

nix/workbench/analyse/analyse.sh

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ EOF
221221

222222
standard | full | std )
223223
local script=(
224+
hash-timeline
224225
logs $(test -n "$dump_logobjects" && echo 'dump-logobjects')
225226
read-context
226227

@@ -400,9 +401,9 @@ EOF
400401
local v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 va vb vc vd ve vf vg vh vi vj vk vl vm vn vo
401402
v0=( $* )
402403
v1=("${v0[@]/#logs/ 'unlog' --run-logs \"$adir\"/log-manifest.json ${analysis_allowed_loanys[*]/#/--ok-loany } }")
403-
v2=("${v1[@]/#read-context/ 'read-meta-genesis' --run-metafile \"$dir\"/meta.json --shelley-genesis \"$dir\"/genesis-shelley.json }")
404+
v2=("${v1[@]/#read-context/ 'read-meta-genesis' --run-metafile \"$dir\"/meta.json --shelley-genesis \"$dir\"/genesis-shelley.json }")
404405
v3=("${v2[@]/#write-context/ 'write-meta-genesis' --run-metafile \"$dir\"/meta.json --shelley-genesis \"$dir\"/genesis-shelley.json }")
405-
v4=("${v3[@]/#read-chain/ 'read-chain' --chain \"$adir\"/chain.json}")
406+
v4=("${v3[@]/#read-chain/ 'read-chain' --chain \"$adir\"/chain.json}")
406407
v5=("${v4[@]/#rebuild-chain/ 'rebuild-chain' ${filters[@]}}")
407408
v6=("${v5[@]/#dump-chain/ 'dump-chain' --chain \"$adir\"/chain.json --chain-rejecta \"$adir\"/chain-rejecta.json }")
408409
v7=("${v6[@]/#chain-timeline/ 'timeline-chain' --timeline \"$adir\"/chain.txt ${locli_render[*]} ${locli_timeline[*]} }")
@@ -427,8 +428,9 @@ EOF
427428
vq=("${vp[@]/#read-summaries/ 'read-summaries' --summary \"$adir\"/summary.json }")
428429
vr=("${vq[@]/#summary-json/ 'render-summary' --json \"$adir\"/summary.json }")
429430
vs=("${vr[@]/#summary-report/ 'render-summary' --org-report \"$adir\"/summary.org ${locli_render[*]}}")
431+
vt=("${vs[@]/#hash-timeline/ 'hash-timeline' --timeline \"$adir\"/hash-timeline.json }")
430432
local ops_final=()
431-
for v in "${vs[@]}"
433+
for v in "${vt[@]}"
432434
do eval ops_final+=($v); done
433435

434436
call_locli "$rtsmode" "${ops_final[@]}"
@@ -437,6 +439,7 @@ EOF
437439
fgrep -v -e '.flt.json' \
438440
-e '.logobjs.json' \
439441
-e 'chain.json' \
442+
-e 'hash-timeline.json' \
440443
-e 'log-manifest.json' \
441444
-e 'mach-views.json' \
442445
-e 'prof.json' \
@@ -564,13 +567,14 @@ EOF
564567
if test -z "$(ls 2>/dev/null $dir/node-*/*.json)"
565568
then remanifest_reasons+=("$(blue consolidated logs missing)")
566569
elif test ! -f "$run_logs"
567-
then remanifest_reasons+=("$(green logs-modified-after-manifest)")
568-
elif test "$(ls 2>/dev/null --sort=time $dir/node-*/*.json analysis/log-manifest.json | head -n1)" != "$run_logs"
569-
then remanifest_reasons+=("$(red logs-modified-after-manifest)")
570+
then remanifest_reasons+=("$(green missing $run_logs)")
571+
# with workbench runs, a node's log files are just called 'stdout'
572+
elif test "$(ls 2>/dev/null --sort=time $dir/node-*/*.json $dir/node-*/stdout $run_logs | head -n1)" != "$run_logs"
573+
then remanifest_reasons+=("$(red logs modified after manifest)")
570574
fi
571575

572576
if test ${#remanifest_reasons[*]} = 0
573-
then progress "analyse" "log manifest exist and is up to date"
577+
then progress "analyse" "log manifest up to date for raw logs"
574578
else progress "analyse" "assembling log manifest: ${remanifest_reasons[*]}"
575579
echo '{}' > $run_logs
576580
time {
@@ -618,9 +622,18 @@ EOF
618622

619623
done
620624
wait
625+
626+
# in case consolidated logs have to be truncated, we document the actual timestamps from the raw logs
627+
for mach in $(jq_tolist '.rlHostLogs | keys' $run_logs)
628+
do jq_fmutate "$run_logs" '
629+
.rlHostLogs["'"$mach"'"].hlRawFirstAt = '"$(cat $adir/logs-$mach.flt.json | head -n1 | jq .at)"'
630+
| .rlHostLogs["'"$mach"'"].hlRawLastAt = '"$(cat $adir/logs-$mach.flt.json | tail -n1 | jq .at)"'
631+
'
632+
done
621633
}
622634
fi
623635

636+
progress "analyse" "log manifest updating for consolidated logs"
624637
for mach in $(jq_tolist '.rlHostLogs | keys' $run_logs)
625638
do jq_fmutate "$run_logs" '
626639
.rlHostLogs[$mach].hlRawSha256 = $raw_sha256
@@ -633,16 +646,29 @@ EOF
633646
- ($keypairs | map (.[1])) # old tracing .kinds
634647
- ["", "unknown0", "unknown1"]
635648
| unique)
636-
| .rlHostLogs[$mach].hlFilteredSha256 = $filtered_sha256
637649
' --sort-keys \
638650
--arg mach $mach \
639651
--rawfile raw_sha256 "$adir"/logs-$mach.sha256 \
640-
--arg filtered_sha256 $(sha256sum < $adir/logs-$mach.flt.json | \
641-
cut -d' ' -f1 | xargs echo -n) \
642652
--slurpfile freqs "$adir"/logs-$mach.tracefreq.json \
643653
--rawfile keys $keyfile
644654
done
645655

656+
local ht_json=$adir/hash-timeline.json
657+
if test "$(ls 2>/dev/null --sort=time $adir/logs-*.flt.json $ht_json | head -n1)" = "$ht_json"
658+
then progress "analyse" "hash timeline up to date"
659+
else progress "analyse" "building hash timeline"
660+
grep -h 'TraceForgedBlock\|DownloadedHeader' $adir/logs-*.flt.json | sort > $ht_json
661+
662+
# skip checksumming consolidated logs for now, to facilitate fast truncation of long runs
663+
#for mach in $(jq_tolist '.rlHostLogs | keys' $run_logs)
664+
#do jq_fmutate "$run_logs" '
665+
# .rlHostLogs[$mach].hlFilteredSha256 = $filtered_sha256
666+
# ' --arg mach $mach \
667+
# --arg filtered_sha256 $(sha256sum < $adir/logs-$mach.flt.json | \
668+
# cut -d' ' -f1 | xargs echo -n)
669+
#done
670+
fi
671+
646672
jq_fmutate "$run_logs" '
647673
.rlMissingTraces =
648674
( .rlHostLogs

0 commit comments

Comments
 (0)