@@ -44,56 +44,42 @@ struct BuilderTx
44
44
{
45
45
size_t mId = 0 ;
46
46
uint32_t mInstructions = 0 ;
47
- BitSet mReadOnlyFootprint ;
48
- BitSet mReadWriteFootprint ;
47
+ // Set of ids of transactions that conflict with this transaction.
48
+ BitSet mConflictTxs ;
49
49
50
- BuilderTx (size_t txId, TransactionFrameBase const & tx,
51
- UnorderedMap<LedgerKey, size_t > const & entryIdMap)
50
+ BuilderTx (size_t txId, TransactionFrameBase const & tx)
52
51
: mId (txId), mInstructions (tx.sorobanResources().instructions)
53
52
{
54
- auto const & footprint = tx.sorobanResources ().footprint ;
55
- for (auto const & key : footprint.readOnly )
56
- {
57
- mReadOnlyFootprint .set (entryIdMap.at (key));
58
- }
59
- for (auto const & key : footprint.readWrite )
60
- {
61
- mReadWriteFootprint .set (entryIdMap.at (key));
62
- }
63
53
}
64
54
};
65
55
66
- // Cluster of (potentialy transitively) dependent transactions.
56
+ // Cluster of (potentially transitively) dependent transactions.
67
57
// Transactions are considered to be dependent if the have the same key in
68
58
// their footprints and for at least one of them this key belongs to read-write
69
59
// footprint.
70
60
struct Cluster
71
61
{
72
62
// Total number of instructions in the cluster. Since transactions are
73
- // dependenent , these are always 'sequential' instructions.
63
+ // dependent , these are always 'sequential' instructions.
74
64
uint64_t mInstructions = 0 ;
75
- // Union of read-only footprints of all transactions in the cluster.
76
- BitSet mReadOnlyEntries ;
77
- // Union of read-write footprints of all transactions in the cluster.
78
- BitSet mReadWriteEntries ;
65
+ // Set of ids of transactions that conflict with this cluster.
66
+ BitSet mConflictTxs ;
79
67
// Set of transaction ids in the cluster.
80
68
BitSet mTxIds ;
81
69
// Id of the bin within a stage in which the cluster is packed.
82
- size_t mBinId = 0 ;
70
+ size_t mutable mBinId = 0 ;
83
71
84
72
explicit Cluster (BuilderTx const & tx) : mInstructions(tx.mInstructions )
85
73
{
86
- mReadOnlyEntries .inplaceUnion (tx.mReadOnlyFootprint );
87
- mReadWriteEntries .inplaceUnion (tx.mReadWriteFootprint );
74
+ mConflictTxs .inplaceUnion (tx.mConflictTxs );
88
75
mTxIds .set (tx.mId );
89
76
}
90
77
91
78
void
92
79
merge (Cluster const & other)
93
80
{
94
81
mInstructions += other.mInstructions ;
95
- mReadOnlyEntries .inplaceUnion (other.mReadOnlyEntries );
96
- mReadWriteEntries .inplaceUnion (other.mReadWriteEntries );
82
+ mConflictTxs .inplaceUnion (other.mConflictTxs );
97
83
mTxIds .inplaceUnion (other.mTxIds );
98
84
}
99
85
};
@@ -129,14 +115,12 @@ class Stage
129
115
auto conflictingClusters = getConflictingClusters (tx);
130
116
131
117
bool packed = false ;
132
- // Then, create new clusters by merging the conflicting clusters
118
+ // Then, try creating new clusters by merging the conflicting clusters
133
119
// together and adding the new transaction to the resulting cluster.
134
120
auto newClusters = createNewClusters (tx, conflictingClusters, packed);
135
- releaseAssert (!newClusters.empty ());
136
-
137
- // If the new cluster exceeds the limit of instructions per cluster,
138
- // we can't add the transaction.
139
- if (newClusters.back ().mInstructions > mConfig .mInstructionsPerCluster )
121
+ // Fail fast if a new cluster will end up too large to fit into the
122
+ // stage.
123
+ if (newClusters.empty ())
140
124
{
141
125
return false ;
142
126
}
@@ -175,9 +159,9 @@ class Stage
175
159
for (auto const & cluster : mClusters )
176
160
{
177
161
size_t txId = 0 ;
178
- while (cluster. mTxIds .nextSet (txId))
162
+ while (cluster-> mTxIds .nextSet (txId))
179
163
{
180
- visitor (cluster. mBinId , txId);
164
+ visitor (cluster-> mBinId , txId);
181
165
++txId;
182
166
}
183
167
}
@@ -188,49 +172,38 @@ class Stage
188
172
getConflictingClusters (BuilderTx const & tx) const
189
173
{
190
174
std::unordered_set<Cluster const *> conflictingClusters;
191
- for (Cluster const & cluster : mClusters )
175
+ for (auto const & cluster : mClusters )
192
176
{
193
- bool isConflicting = tx.mReadOnlyFootprint .intersectionCount (
194
- cluster.mReadWriteEntries ) > 0 ||
195
- tx.mReadWriteFootprint .intersectionCount (
196
- cluster.mReadOnlyEntries ) > 0 ||
197
- tx.mReadWriteFootprint .intersectionCount (
198
- cluster.mReadWriteEntries ) > 0 ;
199
- if (isConflicting)
177
+ if (cluster->mConflictTxs .get (tx.mId ))
200
178
{
201
- conflictingClusters.insert (& cluster);
179
+ conflictingClusters.insert (cluster. get () );
202
180
}
203
181
}
204
182
return conflictingClusters;
205
183
}
206
184
207
- std::vector<Cluster>
185
+ std::vector<std::shared_ptr< Cluster const > >
208
186
createNewClusters (BuilderTx const & tx,
209
187
std::unordered_set<Cluster const *> const & txConflicts,
210
188
bool & packed)
211
189
{
212
- std::vector<Cluster> newClusters;
213
- newClusters.reserve (mClusters .size ());
214
- for (auto const & cluster : mClusters )
215
- {
216
- if (txConflicts.find (&cluster) == txConflicts.end ())
217
- {
218
- newClusters.push_back (cluster);
219
- }
220
- }
221
-
222
- newClusters.emplace_back (tx);
190
+ int64_t newInstructions = tx.mInstructions ;
223
191
for (auto const * cluster : txConflicts)
224
192
{
225
- newClusters. back (). merge (* cluster) ;
193
+ newInstructions += cluster-> mInstructions ;
226
194
}
195
+
227
196
// Fast-fail condition to ensure that the new cluster doesn't exceed
228
197
// the instructions limit.
229
- if (newClusters. back (). mInstructions > mConfig .mInstructionsPerCluster )
198
+ if (newInstructions > mConfig .mInstructionsPerCluster )
230
199
{
231
- return newClusters;
200
+ return {};
201
+ }
202
+ auto newCluster = std::make_shared<Cluster>(tx);
203
+ for (auto const * cluster : txConflicts)
204
+ {
205
+ newCluster->merge (*cluster);
232
206
}
233
-
234
207
// Remove the clusters that were merged from their respective bins.
235
208
for (auto const & cluster : txConflicts)
236
209
{
@@ -244,16 +217,27 @@ class Stage
244
217
// the bin-packing from scratch.
245
218
for (size_t binId = 0 ; binId < mConfig .mClustersPerStage ; ++binId)
246
219
{
247
- if (mBinInstructions [binId] + newClusters. back (). mInstructions <=
220
+ if (mBinInstructions [binId] + newCluster-> mInstructions <=
248
221
mConfig .mInstructionsPerCluster )
249
222
{
250
- mBinInstructions [binId] += newClusters. back (). mInstructions ;
251
- mBinPacking [binId].inplaceUnion (newClusters. back (). mTxIds );
252
- newClusters. back (). mBinId = binId;
223
+ mBinInstructions [binId] += newCluster-> mInstructions ;
224
+ mBinPacking [binId].inplaceUnion (newCluster-> mTxIds );
225
+ newCluster-> mBinId = binId;
253
226
packed = true ;
254
227
break ;
255
228
}
256
229
}
230
+
231
+ std::vector<std::shared_ptr<Cluster const >> newClusters;
232
+ newClusters.reserve (mClusters .size () + 1 - txConflicts.size ());
233
+ for (auto const & cluster : mClusters )
234
+ {
235
+ if (txConflicts.find (cluster.get ()) == txConflicts.end ())
236
+ {
237
+ newClusters.push_back (cluster);
238
+ }
239
+ }
240
+ newClusters.push_back (newCluster);
257
241
// If we couldn't pack the new cluster without full bin-packing, we
258
242
// recover the state of the bins (so that the transaction is not
259
243
// considered to have been added yet).
@@ -273,31 +257,33 @@ class Stage
273
257
// This has around 11/9 maximum approximation ratio, which probably has
274
258
// the best complexity/performance tradeoff out of all the heuristics.
275
259
std::vector<BitSet>
276
- binPacking (std::vector<Cluster>& clusters,
260
+ binPacking (std::vector<std::shared_ptr< Cluster const > >& clusters,
277
261
std::vector<uint64_t >& binInsns) const
278
262
{
279
263
// We could consider dropping the sort here in order to save some time
280
264
// and using just the first-fit heuristic, but that also raises the
281
265
// approximation ratio to 1.7.
282
266
std::sort (clusters.begin (), clusters.end (),
283
267
[](auto const & a, auto const & b) {
284
- return a. mInstructions > b. mInstructions ;
268
+ return a-> mInstructions > b-> mInstructions ;
285
269
});
286
270
size_t const binCount = mConfig .mClustersPerStage ;
287
271
std::vector<BitSet > bins (binCount);
288
272
binInsns.resize (binCount);
273
+ std::vector<size_t > newBinId (clusters.size ());
289
274
// Just add every cluster into the first bin it fits into.
290
- for (auto & cluster : clusters)
275
+ for (size_t clusterId = 0 ; clusterId < clusters. size (); ++clusterId )
291
276
{
277
+ auto const & cluster = clusters[clusterId];
292
278
bool packed = false ;
293
279
for (size_t i = 0 ; i < binCount; ++i)
294
280
{
295
- if (binInsns[i] + cluster. mInstructions <=
281
+ if (binInsns[i] + cluster-> mInstructions <=
296
282
mConfig .mInstructionsPerCluster )
297
283
{
298
- binInsns[i] += cluster. mInstructions ;
299
- bins[i].inplaceUnion (cluster. mTxIds );
300
- cluster. mBinId = i;
284
+ binInsns[i] += cluster-> mInstructions ;
285
+ bins[i].inplaceUnion (cluster-> mTxIds );
286
+ newBinId[clusterId] = i;
301
287
packed = true ;
302
288
break ;
303
289
}
@@ -307,10 +293,14 @@ class Stage
307
293
return std::vector<BitSet >();
308
294
}
309
295
}
296
+ for (size_t clusterId = 0 ; clusterId < clusters.size (); ++clusterId)
297
+ {
298
+ clusters[clusterId]->mBinId = newBinId[clusterId];
299
+ }
310
300
return bins;
311
301
}
312
302
313
- std::vector<Cluster> mClusters ;
303
+ std::vector<std::shared_ptr< Cluster const > > mClusters ;
314
304
std::vector<BitSet> mBinPacking ;
315
305
std::vector<uint64_t > mBinInstructions ;
316
306
int64_t mInstructions = 0 ;
@@ -327,35 +317,79 @@ buildSurgePricedParallelSorobanPhase(
327
317
std::vector<bool >& hadTxNotFittingLane)
328
318
{
329
319
ZoneScoped;
330
- // Map all the entries in the footprints to integers in order to be able to
331
- // use the bitset operations.
332
- UnorderedMap<LedgerKey, size_t > entryIdMap;
333
- auto addToMap = [&entryIdMap](LedgerKey const & key) {
334
- auto sz = entryIdMap.size ();
335
- entryIdMap.emplace (key, sz);
336
- };
337
- for (auto const & txFrame : txFrames)
320
+ // Simplify the transactions to the minimum necessary amount of data.
321
+ std::unordered_map<TransactionFrameBaseConstPtr, BuilderTx const *>
322
+ builderTxForTx;
323
+ std::vector<std::unique_ptr<BuilderTx>> builderTxs;
324
+ builderTxs.reserve (txFrames.size ());
325
+ for (size_t i = 0 ; i < txFrames.size (); ++i)
338
326
{
327
+ auto const & txFrame = txFrames[i];
328
+ builderTxs.emplace_back (std::make_unique<BuilderTx>(i, *txFrame));
329
+ builderTxForTx.emplace (txFrame, builderTxs.back ().get ());
330
+ }
331
+
332
+ // Before trying to include any transactions, find all the pairs of the
333
+ // conflicting transactions and mark the conflicts in the builderTxs.
334
+ //
335
+ // In order to find the conflicts, we build the maps from the footprint
336
+ // keys to transactions, then mark the conflicts between the transactions
337
+ // that share RW key, or between the transactions that share RO and RW key.
338
+ //
339
+ // The approach here is optimized towards the low number of conflicts,
340
+ // specifically when there are no conflicts at all, the complexity is just
341
+ // O(total_footprint_entry_count). The worst case is roughly
342
+ // O(max_tx_footprint_size * transaction_count ^ 2), which is equivalent
343
+ // to the complexity of the straightforward approach of iterating over all
344
+ // the transaction pairs.
345
+ //
346
+ // This also has the further optimization potential: we could populate the
347
+ // key maps and even the conflicting transactions eagerly in tx queue, thus
348
+ // amortizing the costs across the whole ledger duration.
349
+ UnorderedMap<LedgerKey, std::vector<size_t >> txsWithRoKey;
350
+ UnorderedMap<LedgerKey, std::vector<size_t >> txsWithRwKey;
351
+ for (size_t i = 0 ; i < txFrames.size (); ++i)
352
+ {
353
+ auto const & txFrame = txFrames[i];
339
354
auto const & footprint = txFrame->sorobanResources ().footprint ;
340
355
for (auto const & key : footprint.readOnly )
341
356
{
342
- addToMap ( key);
357
+ txsWithRoKey[ key]. push_back (i );
343
358
}
344
359
for (auto const & key : footprint.readWrite )
345
360
{
346
- addToMap ( key);
361
+ txsWithRwKey[ key]. push_back (i );
347
362
}
348
363
}
349
364
350
- // Simplify the transactions to the minimum necessary amount of data.
351
- std::unordered_map<TransactionFrameBaseConstPtr, BuilderTx> builderTxForTx;
352
- for (size_t i = 0 ; i < txFrames.size (); ++i)
365
+ for (auto const & [key, rwTxIds] : txsWithRwKey)
353
366
{
354
- auto const & txFrame = txFrames[i];
355
- builderTxForTx.emplace (txFrame, BuilderTx (i, *txFrame, entryIdMap));
367
+ // RW-RW conflicts
368
+ for (size_t i = 0 ; i < rwTxIds.size (); ++i)
369
+ {
370
+ for (size_t j = i + 1 ; j < rwTxIds.size (); ++j)
371
+ {
372
+ builderTxs[rwTxIds[i]]->mConflictTxs .set (rwTxIds[j]);
373
+ builderTxs[rwTxIds[j]]->mConflictTxs .set (rwTxIds[i]);
374
+ }
375
+ }
376
+ // RO-RW conflicts
377
+ auto roIt = txsWithRoKey.find (key);
378
+ if (roIt != txsWithRoKey.end ())
379
+ {
380
+ auto const & roTxIds = roIt->second ;
381
+ for (size_t i = 0 ; i < roTxIds.size (); ++i)
382
+ {
383
+ for (size_t j = 0 ; j < rwTxIds.size (); ++j)
384
+ {
385
+ builderTxs[roTxIds[i]]->mConflictTxs .set (rwTxIds[j]);
386
+ builderTxs[rwTxIds[j]]->mConflictTxs .set (roTxIds[i]);
387
+ }
388
+ }
389
+ }
356
390
}
357
391
358
- // Process the transactions in the surge pricing (drecreasing fee) order.
392
+ // Process the transactions in the surge pricing (decreasing fee) order.
359
393
// This also automatically ensures that the resource limits are respected
360
394
// for all the dimensions besides instructions.
361
395
SurgePricingPriorityQueue queue (
@@ -378,7 +412,7 @@ buildSurgePricedParallelSorobanPhase(
378
412
releaseAssert (builderTxIt != builderTxForTx.end ());
379
413
for (auto & stage : stages)
380
414
{
381
- if (stage.tryAdd (builderTxIt->second ))
415
+ if (stage.tryAdd (* builderTxIt->second ))
382
416
{
383
417
added = true ;
384
418
break ;
0 commit comments