Skip to content

Commit 1f515ee

Browse files
committed
Support reusable module cache with multithreaded pre-compilation
1 parent 211c7e0 commit 1f515ee

20 files changed

+1085
-26
lines changed

docs/metrics.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -238,4 +238,8 @@ soroban.config.ledger-max-read-entry | counter | soroban config settin
238238
soroban.config.ledger-max-read-ledger-byte | counter | soroban config setting `ledger_max_read_bytes`
239239
soroban.config.ledger-max-write-entry | counter | soroban config setting `ledger_max_write_ledger_entries`
240240
soroban.config.ledger-max-write-ledger-byte | counter | soroban config setting `ledger_max_write_bytes`
241-
soroban.config.bucket-list-target-size-byte | counter | soroban config setting `bucket_list_target_size_bytes`
241+
soroban.config.bucket-list-target-size-byte | counter | soroban config setting `bucket_list_target_size_bytes`
242+
soroban.module-cache.num-entries | counter | current number of entries in module cache
243+
soroban.module-cache.compilation-time | timer | times each contract compilation when adding to module cache
244+
soroban.module-cache.rebuild-time | timer | times each rebuild of module cache (including all compilations)
245+
soroban.module-cache.rebuild-bytes | counter | bytes of WASM bytecode compiled in last rebuild of module cache

docs/stellar-core_example.cfg

+6
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,12 @@ CATCHUP_RECENT=0
382382
# merging and vertification.
383383
WORKER_THREADS=11
384384

385+
# COMPILATION_THREADS (integer) default 6
386+
# Number of threads launched temporarily when compiling contracts at
387+
# startup. These are short lived, CPU-bound threads that are not
388+
# in competition with the worker threads.
389+
COMPILATION_THREADS=6
390+
385391
# QUORUM_INTERSECTION_CHECKER (boolean) default true
386392
# Enable/disable computation of quorum intersection monitoring
387393
QUORUM_INTERSECTION_CHECKER=true

src/crypto/ByteSlice.h

+9
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
// under the Apache License, Version 2.0. See the COPYING file at the root
55
// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0
66

7+
#include "rust/RustBridge.h"
8+
#include <cstdint>
79
#include <string>
810
#include <vector>
911
#include <xdrpp/message.h>
@@ -71,6 +73,13 @@ class ByteSlice
7173
: mData(bytes.data()), mSize(bytes.size())
7274
{
7375
}
76+
ByteSlice(::rust::Vec<uint8_t> const& bytes)
77+
: mData(bytes.data()), mSize(bytes.size())
78+
{
79+
}
80+
ByteSlice(RustBuf const& bytes) : ByteSlice(bytes.data)
81+
{
82+
}
7483
ByteSlice(char const* str) : ByteSlice((void const*)str, strlen(str))
7584
{
7685
}

src/ledger/LedgerManager.h

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "catchup/LedgerApplyManager.h"
88
#include "history/HistoryManager.h"
99
#include "ledger/NetworkConfig.h"
10+
#include "rust/RustBridge.h"
1011
#include <memory>
1112

1213
namespace stellar
@@ -200,6 +201,12 @@ class LedgerManager
200201
virtual void manuallyAdvanceLedgerHeader(LedgerHeader const& header) = 0;
201202

202203
virtual SorobanMetrics& getSorobanMetrics() = 0;
204+
virtual ::rust::Box<rust_bridge::SorobanModuleCache> getModuleCache() = 0;
205+
206+
// Compiles all contracts in the current ledger, for ledger protocols
207+
// starting at minLedgerVersion and running through to
208+
// Config::CURRENT_LEDGER_PROTOCOL_VERSION (to enable upgrades).
209+
virtual void compileAllContractsInLedger(uint32_t minLedgerVersion) = 0;
203210

204211
virtual ~LedgerManager()
205212
{

src/ledger/LedgerManagerImpl.cpp

+227-3
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@
2020
#include "history/HistoryManager.h"
2121
#include "ledger/FlushAndRotateMetaDebugWork.h"
2222
#include "ledger/LedgerHeaderUtils.h"
23+
#include "ledger/LedgerManager.h"
2324
#include "ledger/LedgerTxn.h"
2425
#include "ledger/LedgerTxnEntry.h"
2526
#include "ledger/LedgerTxnHeader.h"
27+
#include "ledger/SharedModuleCacheCompiler.h"
2628
#include "main/Application.h"
2729
#include "main/Config.h"
2830
#include "main/ErrorMessages.h"
31+
#include "rust/RustBridge.h"
2932
#include "transactions/MutableTransactionResult.h"
3033
#include "transactions/OperationFrame.h"
3134
#include "transactions/TransactionFrameBase.h"
@@ -41,8 +44,10 @@
4144
#include "util/XDRCereal.h"
4245
#include "util/XDRStream.h"
4346
#include "work/WorkScheduler.h"
47+
#include "xdr/Stellar-ledger-entries.h"
4448
#include "xdrpp/printer.h"
4549

50+
#include <cstdint>
4651
#include <fmt/format.h>
4752

4853
#include "xdr/Stellar-ledger-entries.h"
@@ -58,6 +63,8 @@
5863
#include <Tracy.hpp>
5964

6065
#include <chrono>
66+
#include <memory>
67+
#include <mutex>
6168
#include <optional>
6269
#include <regex>
6370
#include <sstream>
@@ -125,6 +132,27 @@ LedgerManager::ledgerAbbrev(LedgerHeaderHistoryEntry const& he)
125132
return ledgerAbbrev(he.header, he.hash);
126133
}
127134

135+
static std::vector<uint32_t>
136+
getModuleCacheProtocols()
137+
{
138+
std::vector<uint32_t> ledgerVersions;
139+
for (uint32_t i = (uint32_t)REUSABLE_SOROBAN_MODULE_CACHE_PROTOCOL_VERSION;
140+
i <= Config::CURRENT_LEDGER_PROTOCOL_VERSION; i++)
141+
{
142+
ledgerVersions.push_back(i);
143+
}
144+
auto extra = getenv("SOROBAN_TEST_EXTRA_PROTOCOL");
145+
if (extra)
146+
{
147+
uint32_t proto = static_cast<uint32_t>(atoi(extra));
148+
if (proto > 0)
149+
{
150+
ledgerVersions.push_back(proto);
151+
}
152+
}
153+
return ledgerVersions;
154+
}
155+
128156
LedgerManagerImpl::LedgerManagerImpl(Application& app)
129157
: mApp(app)
130158
, mSorobanMetrics(app.getMetrics())
@@ -157,6 +185,8 @@ LedgerManagerImpl::LedgerManagerImpl(Application& app)
157185
, mCatchupDuration(
158186
app.getMetrics().NewTimer({"ledger", "catchup", "duration"}))
159187
, mState(LM_BOOTING_STATE)
188+
, mModuleCache(::rust_bridge::new_module_cache())
189+
, mModuleCacheProtocols(getModuleCacheProtocols())
160190

161191
{
162192
setupLedgerCloseMetaStream();
@@ -405,6 +435,9 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist)
405435
updateNetworkConfig(ltx);
406436
mSorobanNetworkConfigReadOnly = mSorobanNetworkConfigForApply;
407437
}
438+
439+
// Prime module cache with ledger content.
440+
compileAllContractsInLedger(latestLedgerHeader->ledgerVersion);
408441
}
409442

410443
Database&
@@ -560,6 +593,118 @@ LedgerManagerImpl::getSorobanMetrics()
560593
return mSorobanMetrics;
561594
}
562595

596+
::rust::Box<rust_bridge::SorobanModuleCache>
597+
LedgerManagerImpl::getModuleCache()
598+
{
599+
std::lock_guard<std::recursive_mutex> guard(mLedgerStateMutex);
600+
finishAnyPendingCompilation();
601+
return mModuleCache->shallow_clone();
602+
}
603+
604+
void
605+
LedgerManagerImpl::finishAnyPendingCompilation()
606+
{
607+
std::lock_guard<std::recursive_mutex> guard(mLedgerStateMutex);
608+
if (mCompiler)
609+
{
610+
auto newCache = mCompiler->wait();
611+
mSorobanMetrics.mModuleCacheRebuildBytes.set_count(
612+
(int64)mCompiler->getBytesCompiled());
613+
mSorobanMetrics.mModuleCacheNumEntries.set_count(
614+
(int64)mCompiler->getContractsCompiled());
615+
mSorobanMetrics.mModuleCacheRebuildTime.Update(
616+
mCompiler->getCompileTime());
617+
mModuleCache.swap(newCache);
618+
mCompiler.reset();
619+
mApp.getAppConnector().setModuleCache(mModuleCache->shallow_clone());
620+
}
621+
}
622+
623+
void
624+
LedgerManagerImpl::compileAllContractsInLedger(uint32_t minLedgerVersion)
625+
{
626+
startCompilingAllContracts(minLedgerVersion);
627+
finishAnyPendingCompilation();
628+
}
629+
630+
void
631+
LedgerManagerImpl::startCompilingAllContracts(uint32_t minLedgerVersion)
632+
{
633+
std::lock_guard<std::recursive_mutex> guard(mLedgerStateMutex);
634+
// Always stop a previous compilation before starting a new one. Can only
635+
// have one running at any time.
636+
finishAnyPendingCompilation();
637+
std::vector<uint32_t> versions;
638+
for (auto const& v : mModuleCacheProtocols)
639+
{
640+
if (v >= minLedgerVersion)
641+
{
642+
versions.push_back(v);
643+
}
644+
}
645+
mCompiler = std::make_unique<SharedModuleCacheCompiler>(mApp, versions);
646+
mCompiler->start();
647+
}
648+
649+
void
650+
LedgerManagerImpl::maybeRebuildModuleCache(uint32_t minLedgerVersion)
651+
{
652+
std::lock_guard<std::recursive_mutex> guard(mLedgerStateMutex);
653+
// There is (currently) a grow-only arena underlying the module cache, so as
654+
// entries are uploaded and evicted that arena will still grow. To cap this
655+
// growth, we periodically rebuild the module cache from scratch.
656+
//
657+
// We could pick various size caps, but we want to avoid rebuilding
658+
// spuriously when there just happens to be "a fairly large" cache due to
659+
// having a fairly large live BL. I.e. we want to allow it to get as big as
660+
// we can -- or as big as the "natural" BL-limits-dictated size -- while
661+
// still rebuilding fairly often in DoS-attempt scenarios or just generally
662+
// if there's regular upload/expiry churn that would otherwise cause
663+
// unbounded growth.
664+
//
665+
// Unfortunately we do not know exactly how much memory is used by each byte
666+
// of contract we compile, and the size estimates from the cost model have
667+
// to assume a worst case which is almost a factor of _40_ larger than the
668+
// byte-size of the contracts. So for example if we assume 100MB of
669+
// contracts, the cost model says we ought to budget for 4GB of memory, just
670+
// in case _all 100MB of contracts_ are "the worst case contract" that's
671+
// just a continuous stream of function definitions.
672+
//
673+
// So: we take this multiplier, times the size of the contracts we _last_
674+
// drew from the BL when doing a full recompile, times two, as a cap on the
675+
// _current_ (post-rebuild, currently-growing) cache's budget-tracked
676+
// memory. This should avoid rebuilding spuriously, while still treating
677+
// events that double the size of the contract-set in the live BL as an
678+
// event that warrants a rebuild.
679+
680+
// We try to fish the current cost multiplier out of the soroban network
681+
// config's memory cost model, but fall back to a conservative default in
682+
// case there is no mem cost param for VmInstantiation (This should never
683+
// happen but just in case).
684+
uint64_t linearTerm = 5000;
685+
686+
// linearTerm is in 1/128ths in the cost model, to reduce rounding error.
687+
uint64_t scale = 128;
688+
689+
auto const& cfg = getSorobanNetworkConfigForApply();
690+
auto const& memParams = cfg.memCostParams();
691+
if (memParams.size() > (size_t)stellar::VmInstantiation)
692+
{
693+
auto const& param = memParams[(size_t)stellar::VmInstantiation];
694+
linearTerm = param.linearTerm;
695+
}
696+
auto lastBytesCompiled = mSorobanMetrics.mModuleCacheRebuildBytes.count();
697+
uint64_t limit = 2 * lastBytesCompiled * linearTerm / scale;
698+
if (mModuleCache->get_mem_bytes_consumed() > limit)
699+
{
700+
CLOG_DEBUG(Ledger,
701+
"Rebuilding module cache: worst-case estimate {} "
702+
"model-bytes consumed of {} limit",
703+
mModuleCache->get_mem_bytes_consumed(), limit);
704+
startCompilingAllContracts(minLedgerVersion);
705+
}
706+
}
707+
563708
void
564709
LedgerManagerImpl::publishSorobanMetrics()
565710
{
@@ -803,6 +948,9 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData,
803948
return;
804949
}
805950

951+
// Complete any pending wasm-module-compilation before closing the ledger.
952+
finishAnyPendingCompilation();
953+
806954
#ifdef BUILD_TESTS
807955
mLastLedgerTxMeta.clear();
808956
#endif
@@ -1149,11 +1297,14 @@ LedgerManagerImpl::setLastClosedLedger(
11491297
advanceLedgerPointers(advanceLedgerStateSnapshot(lastClosed.header, has));
11501298

11511299
LedgerTxn ltx2(mApp.getLedgerTxnRoot());
1152-
if (protocolVersionStartsFrom(ltx2.loadHeader().current().ledgerVersion,
1153-
SOROBAN_PROTOCOL_VERSION))
1300+
auto lv = ltx2.loadHeader().current().ledgerVersion;
1301+
if (protocolVersionStartsFrom(lv, SOROBAN_PROTOCOL_VERSION))
11541302
{
1155-
mApp.getLedgerManager().updateNetworkConfig(ltx2);
1303+
updateNetworkConfig(ltx2);
11561304
}
1305+
// This should not be additionally conditionalized on lv >= anything,
1306+
// since we want to support SOROBAN_TEST_EXTRA_PROTOCOL > lv.
1307+
compileAllContractsInLedger(lv);
11571308
}
11581309

11591310
void
@@ -1804,6 +1955,8 @@ LedgerManagerImpl::transferLedgerEntriesToBucketList(
18041955
ledgerCloseMeta->populateEvictedEntries(evictedState);
18051956
}
18061957

1958+
evictFromModuleCache(lh.ledgerVersion, evictedState);
1959+
18071960
ltxEvictions.commit();
18081961
}
18091962

@@ -1814,6 +1967,8 @@ LedgerManagerImpl::transferLedgerEntriesToBucketList(
18141967
ltx.getAllEntries(initEntries, liveEntries, deadEntries);
18151968
if (blEnabled)
18161969
{
1970+
addAnyContractsToModuleCache(lh.ledgerVersion, initEntries);
1971+
addAnyContractsToModuleCache(lh.ledgerVersion, liveEntries);
18171972
mApp.getBucketManager().addLiveBatch(mApp, lh, initEntries, liveEntries,
18181973
deadEntries);
18191974
}
@@ -1872,6 +2027,75 @@ LedgerManagerImpl::ledgerClosed(
18722027
res = advanceLedgerStateSnapshot(lh, has);
18732028
});
18742029

2030+
if (protocolVersionStartsFrom(
2031+
initialLedgerVers, REUSABLE_SOROBAN_MODULE_CACHE_PROTOCOL_VERSION))
2032+
{
2033+
maybeRebuildModuleCache(initialLedgerVers);
2034+
}
2035+
18752036
return res;
18762037
}
2038+
2039+
void
2040+
LedgerManagerImpl::evictFromModuleCache(uint32_t ledgerVersion,
2041+
EvictedStateVectors const& evictedState)
2042+
{
2043+
std::vector<Hash> keys;
2044+
for (auto const& key : evictedState.deletedKeys)
2045+
{
2046+
if (key.type() == CONTRACT_CODE)
2047+
{
2048+
keys.emplace_back(key.contractCode().hash);
2049+
}
2050+
}
2051+
for (auto const& entry : evictedState.archivedEntries)
2052+
{
2053+
if (entry.data.type() == CONTRACT_CODE)
2054+
{
2055+
Hash const& hash = entry.data.contractCode().hash;
2056+
keys.emplace_back(hash);
2057+
}
2058+
}
2059+
if (keys.size() > 0)
2060+
{
2061+
CLOG_DEBUG(Ledger, "evicting {} modules from module cache",
2062+
keys.size());
2063+
for (auto const& hash : keys)
2064+
{
2065+
CLOG_DEBUG(Ledger, "evicting {} from module cache", binToHex(hash));
2066+
::rust::Slice<uint8_t const> slice{hash.data(), hash.size()};
2067+
mModuleCache->evict_contract_code(slice);
2068+
mSorobanMetrics.mModuleCacheNumEntries.dec();
2069+
}
2070+
}
2071+
}
2072+
2073+
void
2074+
LedgerManagerImpl::addAnyContractsToModuleCache(
2075+
uint32_t ledgerVersion, std::vector<LedgerEntry> const& le)
2076+
{
2077+
for (auto const& e : le)
2078+
{
2079+
if (e.data.type() == CONTRACT_CODE)
2080+
{
2081+
for (auto const& v : mModuleCacheProtocols)
2082+
{
2083+
if (v >= ledgerVersion)
2084+
{
2085+
auto const& wasm = e.data.contractCode().code;
2086+
CLOG_DEBUG(Ledger,
2087+
"compiling wasm {} for protocol {} module cache",
2088+
binToHex(sha256(wasm)), v);
2089+
auto slice =
2090+
rust::Slice<const uint8_t>(wasm.data(), wasm.size());
2091+
mSorobanMetrics.mModuleCacheNumEntries.inc();
2092+
auto timer =
2093+
mSorobanMetrics.mModuleCompilationTime.TimeScope();
2094+
mModuleCache->compile(v, slice);
2095+
}
2096+
}
2097+
}
2098+
}
2099+
}
2100+
18772101
}

0 commit comments

Comments
 (0)