Skip to content

Commit 18c3e80

Browse files
authored
Merge pull request #4154 from SirTyson/archival-stats
Adds CLI tool to print BucketList archival stats Reviewed-by: marta-lokhova
2 parents aa2ab4e + 70d0f7c commit 18c3e80

File tree

4 files changed

+199
-0
lines changed

4 files changed

+199
-0
lines changed

docs/software/commands.md

+1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Command options can only by placed after command.
8484
See more examples in [ledger_query_examples.md](ledger_query_examples.md).
8585

8686
* **dump-xdr <FILE-NAME>**: Dumps the given XDR file and then exits.
87+
* **dump-archival-stats**: Logs state archival statistics about the BucketList.
8788
* **encode-asset**: Prints a base-64 encoded asset built from `--code <CODE>` and `--issuer <ISSUER>`. Prints the native asset if neither `--code` nor `--issuer` is given.
8889
* **fuzz <FILE-NAME>**: Run a single fuzz input and exit.
8990
* **gen-fuzz <FILE-NAME>**: Generate a random fuzzer input file.

src/main/ApplicationUtils.cpp

+181
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include "main/ApplicationUtils.h"
66
#include "bucket/Bucket.h"
7+
#include "bucket/BucketList.h"
78
#include "bucket/BucketManager.h"
89
#include "catchup/ApplyBucketsWork.h"
910
#include "catchup/CatchupConfiguration.h"
@@ -555,6 +556,186 @@ mergeBucketList(Config cfg, std::string const& outputDir)
555556
}
556557
}
557558

559+
// Per-LedgerKey metrics used for dumping archival state
560+
struct StateArchivalMetric
561+
{
562+
// True if the newest version of the entry is a DEADENTRY
563+
bool isDead{};
564+
565+
// Number of bytes that the newest version of the entry occupies in the
566+
// BucketList
567+
uint64_t newestBytes{};
568+
569+
// Number of bytes that all outdated versions of the entry occupy in the
570+
// BucketList
571+
uint64_t outdatedBytes{};
572+
};
573+
574+
static void
575+
processArchivalMetrics(
576+
std::shared_ptr<Bucket const> const b,
577+
UnorderedMap<LedgerKey, StateArchivalMetric>& ledgerEntries,
578+
UnorderedMap<LedgerKey, std::pair<StateArchivalMetric, uint32_t>>& ttls)
579+
{
580+
for (BucketInputIterator in(b); in; ++in)
581+
{
582+
auto const& be = *in;
583+
bool isDead = be.type() == DEADENTRY;
584+
LedgerKey k = isDead ? be.deadEntry() : LedgerEntryKey(be.liveEntry());
585+
bool isTTL = k.type() == TTL;
586+
587+
if (!isTemporaryEntry(k) && !isTTL)
588+
{
589+
continue;
590+
}
591+
592+
if (isTTL)
593+
{
594+
auto iter = ttls.find(k);
595+
if (iter == ttls.end())
596+
{
597+
StateArchivalMetric metric;
598+
metric.isDead = isDead;
599+
metric.newestBytes = xdr::xdr_size(be);
600+
if (isDead)
601+
{
602+
ttls.emplace(k, std::make_pair(metric, 0));
603+
}
604+
else
605+
{
606+
ttls.emplace(
607+
k, std::make_pair(
608+
metric,
609+
be.liveEntry().data.ttl().liveUntilLedgerSeq));
610+
}
611+
}
612+
else
613+
{
614+
iter->second.first.outdatedBytes += xdr::xdr_size(be);
615+
}
616+
}
617+
else
618+
{
619+
auto iter = ledgerEntries.find(k);
620+
if (iter == ledgerEntries.end())
621+
{
622+
StateArchivalMetric metric;
623+
metric.isDead = isDead;
624+
metric.newestBytes = xdr::xdr_size(be);
625+
ledgerEntries.emplace(k, metric);
626+
}
627+
else
628+
{
629+
iter->second.outdatedBytes += xdr::xdr_size(be);
630+
}
631+
}
632+
}
633+
}
634+
635+
int
636+
dumpStateArchivalStatistics(Config cfg)
637+
{
638+
ZoneScoped;
639+
VirtualClock clock;
640+
cfg.setNoListen();
641+
Application::pointer app = Application::create(clock, cfg, false);
642+
app->getLedgerManager().loadLastKnownLedger(/* restoreBucketlist */ false,
643+
/* isLedgerStateReady */ true);
644+
auto& lm = app->getLedgerManager();
645+
auto& bm = app->getBucketManager();
646+
HistoryArchiveState has = lm.getLastClosedLedgerHAS();
647+
648+
std::vector<Hash> hashes;
649+
for (uint32_t i = 0; i < BucketList::kNumLevels; ++i)
650+
{
651+
HistoryStateBucket const& hsb = has.currentBuckets.at(i);
652+
hashes.emplace_back(hexToBin256(hsb.curr));
653+
hashes.emplace_back(hexToBin256(hsb.snap));
654+
}
655+
656+
UnorderedMap<LedgerKey, StateArchivalMetric> ledgerEntries;
657+
658+
// key -> (metric, liveUntilLedger)
659+
UnorderedMap<LedgerKey, std::pair<StateArchivalMetric, uint32_t>> ttls;
660+
float blSize = 0;
661+
for (auto const& hash : hashes)
662+
{
663+
if (isZero(hash))
664+
{
665+
continue;
666+
}
667+
auto b = bm.getBucketByHash(hash);
668+
if (!b)
669+
{
670+
throw std::runtime_error(std::string("missing bucket: ") +
671+
binToHex(hash));
672+
}
673+
processArchivalMetrics(b, ledgerEntries, ttls);
674+
blSize += b->getSize();
675+
}
676+
677+
// *BytesNewest == bytes consumed only by newest version of BucketEntry
678+
// *BytesOutdated == bytes consumed only by outdated version of BucketEntry
679+
// live -> liveUntilLedger >= ledgerSeq
680+
// expired -> liveUntilLedger < ledgerSeq, but not yet evicted
681+
uint64_t liveBytesNewest{};
682+
uint64_t liveBytesOutdated{};
683+
uint64_t expiredBytesNewest{};
684+
uint64_t expiredBytesOutdated{};
685+
uint64_t evictedBytes{}; // All evicted bytes considered "outdated"
686+
687+
for (auto const& [k, leMetric] : ledgerEntries)
688+
{
689+
auto ttlIter = ttls.find(getTTLKey(k));
690+
releaseAssertOrThrow(ttlIter != ttls.end());
691+
auto const& [ttlMetric, liveUntilLedger] = ttlIter->second;
692+
693+
auto newestBytes = ttlMetric.newestBytes + leMetric.newestBytes;
694+
auto outdatedBytes = ttlMetric.outdatedBytes + leMetric.outdatedBytes;
695+
696+
if (ttlMetric.isDead)
697+
{
698+
releaseAssertOrThrow(leMetric.isDead);
699+
700+
// All bytes considered outdated for evicted entries
701+
evictedBytes += newestBytes + outdatedBytes;
702+
}
703+
else
704+
{
705+
releaseAssertOrThrow(!leMetric.isDead);
706+
707+
// If entry is live
708+
if (liveUntilLedger >=
709+
app->getLedgerManager().getLastClosedLedgerNum())
710+
{
711+
liveBytesNewest += newestBytes;
712+
liveBytesOutdated += outdatedBytes;
713+
}
714+
else
715+
{
716+
expiredBytesNewest += newestBytes;
717+
expiredBytesOutdated += outdatedBytes;
718+
}
719+
}
720+
}
721+
722+
CLOG_INFO(Bucket, "BucketList total bytes: {}", blSize);
723+
CLOG_INFO(Bucket,
724+
"Live Temporary Entries: Newest bytes {} ({}%), Outdated bytes "
725+
"{} ({}%)",
726+
liveBytesNewest, (liveBytesNewest / blSize) * 100,
727+
liveBytesOutdated, (liveBytesOutdated / blSize) * 100);
728+
CLOG_INFO(Bucket,
729+
"Expired but not evicted Temporary: Newest bytes {} ({}%), "
730+
"Outdated bytes {} ({}%)",
731+
expiredBytesNewest, (expiredBytesNewest / blSize) * 100,
732+
expiredBytesOutdated, (expiredBytesOutdated / blSize) * 100);
733+
CLOG_INFO(Bucket, "Evicted Temporary Entries: Outdated bytes {} ({}%)",
734+
evictedBytes, (evictedBytes / blSize) * 100);
735+
736+
return 0;
737+
}
738+
558739
int
559740
dumpLedger(Config cfg, std::string const& outputFile,
560741
std::optional<std::string> filterQuery,

src/main/ApplicationUtils.h

+5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ void initializeDatabase(Config cfg);
2424
void httpCommand(std::string const& command, unsigned short port);
2525
int selfCheck(Config cfg);
2626
int mergeBucketList(Config cfg, std::string const& outputDir);
27+
28+
// Logs state archival statistics, such as the number of expired entries
29+
// currently in the BucketList, number of bytes of evicted entries, etc.
30+
int dumpStateArchivalStatistics(Config cfg);
31+
2732
int dumpLedger(Config cfg, std::string const& outputFile,
2833
std::optional<std::string> filterQuery,
2934
std::optional<uint32_t> lastModifiedLedgerCount,

src/main/CommandLine.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -1168,6 +1168,15 @@ runMergeBucketList(CommandLineArgs const& args)
11681168
[&] { return mergeBucketList(configOption.getConfig(), outputDir); });
11691169
}
11701170

1171+
int
1172+
runDumpStateArchivalStatistics(CommandLineArgs const& args)
1173+
{
1174+
CommandLine::ConfigOption configOption;
1175+
return runWithHelp(args, {configurationParser(configOption)}, [&] {
1176+
return dumpStateArchivalStatistics(configOption.getConfig());
1177+
});
1178+
}
1179+
11711180
int
11721181
runDumpLedger(CommandLineArgs const& args)
11731182
{
@@ -1836,6 +1845,9 @@ handleCommandLine(int argc, char* const* argv)
18361845
{"self-check", "performs diagnostic checks", runSelfCheck},
18371846
{"merge-bucketlist", "writes diagnostic merged bucket list",
18381847
runMergeBucketList},
1848+
{"dump-archival-stats",
1849+
"prints statistics about expired/evicted entries in the BucketList",
1850+
runDumpStateArchivalStatistics},
18391851
{"new-db", "creates or restores the DB to the genesis ledger",
18401852
runNewDB},
18411853
{"new-hist", "initialize history archives", runNewHist},

0 commit comments

Comments
 (0)