Skip to content

Commit 49281ab

Browse files
holimankaralabe
andauthored
core/state/snapshot, true: reuse dirty data instead of hitting disk when generating (ethereum#22667)
* core/state/snapshot: reuse memory data instead of hitting disk when generating * trie: minor nitpicks wrt the resolver optimization * core/state/snapshot, trie: use key/value store for resolver * trie: fix linter Co-authored-by: Péter Szilágyi <[email protected]>
1 parent ea54c58 commit 49281ab

File tree

2 files changed

+64
-6
lines changed

2 files changed

+64
-6
lines changed

core/state/snapshot/generate.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/ethereum/go-ethereum/core/rawdb"
3232
"github.com/ethereum/go-ethereum/crypto"
3333
"github.com/ethereum/go-ethereum/ethdb"
34+
"github.com/ethereum/go-ethereum/ethdb/memorydb"
3435
"github.com/ethereum/go-ethereum/log"
3536
"github.com/ethereum/go-ethereum/metrics"
3637
"github.com/ethereum/go-ethereum/rlp"
@@ -434,6 +435,20 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
434435
}
435436
meter.Mark(1)
436437
}
438+
439+
// We use the snap data to build up a cache which can be used by the
440+
// main account trie as a primary lookup when resolving hashes
441+
var snapNodeCache ethdb.KeyValueStore
442+
if len(result.keys) > 0 {
443+
snapNodeCache = memorydb.New()
444+
snapTrieDb := trie.NewDatabase(snapNodeCache)
445+
snapTrie, _ := trie.New(common.Hash{}, snapTrieDb)
446+
for i, key := range result.keys {
447+
snapTrie.Update(key, result.vals[i])
448+
}
449+
root, _ := snapTrie.Commit(nil)
450+
snapTrieDb.Commit(root, false, nil)
451+
}
437452
tr := result.tr
438453
if tr == nil {
439454
tr, err = trie.New(root, dl.triedb)
@@ -442,9 +457,11 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
442457
return false, nil, errMissingTrie
443458
}
444459
}
460+
445461
var (
446462
trieMore bool
447-
iter = trie.NewIterator(tr.NodeIterator(origin))
463+
nodeIt = tr.NodeIterator(origin)
464+
iter = trie.NewIterator(nodeIt)
448465
kvkeys, kvvals = result.keys, result.vals
449466

450467
// counters
@@ -458,6 +475,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
458475
start = time.Now()
459476
internal time.Duration
460477
)
478+
nodeIt.AddResolver(snapNodeCache)
461479
for iter.Next() {
462480
if last != nil && bytes.Compare(iter.Key, last) > 0 {
463481
trieMore = true

trie/iterator.go

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"errors"
2323

2424
"github.com/ethereum/go-ethereum/common"
25+
"github.com/ethereum/go-ethereum/ethdb"
2526
"github.com/ethereum/go-ethereum/rlp"
2627
)
2728

@@ -102,6 +103,19 @@ type NodeIterator interface {
102103
// iterator is not positioned at a leaf. Callers must not retain references
103104
// to the value after calling Next.
104105
LeafProof() [][]byte
106+
107+
// AddResolver sets an intermediate database to use for looking up trie nodes
108+
// before reaching into the real persistent layer.
109+
//
110+
// This is not required for normal operation, rather is an optimization for
111+
// cases where trie nodes can be recovered from some external mechanism without
112+
// reading from disk. In those cases, this resolver allows short circuiting
113+
// accesses and returning them from memory.
114+
//
115+
// Before adding a similar mechanism to any other place in Geth, consider
116+
// making trie.Database an interface and wrapping at that level. It's a huge
117+
// refactor, but it could be worth it if another occurrence arises.
118+
AddResolver(ethdb.KeyValueStore)
105119
}
106120

107121
// nodeIteratorState represents the iteration state at one particular node of the
@@ -119,6 +133,8 @@ type nodeIterator struct {
119133
stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state
120134
path []byte // Path to the current node
121135
err error // Failure set in case of an internal error in the iterator
136+
137+
resolver ethdb.KeyValueStore // Optional intermediate resolver above the disk layer
122138
}
123139

124140
// errIteratorEnd is stored in nodeIterator.err when iteration is done.
@@ -143,6 +159,10 @@ func newNodeIterator(trie *Trie, start []byte) NodeIterator {
143159
return it
144160
}
145161

162+
func (it *nodeIterator) AddResolver(resolver ethdb.KeyValueStore) {
163+
it.resolver = resolver
164+
}
165+
146166
func (it *nodeIterator) Hash() common.Hash {
147167
if len(it.stack) == 0 {
148168
return common.Hash{}
@@ -262,7 +282,7 @@ func (it *nodeIterator) init() (*nodeIteratorState, error) {
262282
if root != emptyRoot {
263283
state.hash = root
264284
}
265-
return state, state.resolve(it.trie, nil)
285+
return state, state.resolve(it, nil)
266286
}
267287

268288
// peek creates the next state of the iterator.
@@ -286,7 +306,7 @@ func (it *nodeIterator) peek(descend bool) (*nodeIteratorState, *int, []byte, er
286306
}
287307
state, path, ok := it.nextChild(parent, ancestor)
288308
if ok {
289-
if err := state.resolve(it.trie, path); err != nil {
309+
if err := state.resolve(it, path); err != nil {
290310
return parent, &parent.index, path, err
291311
}
292312
return state, &parent.index, path, nil
@@ -319,7 +339,7 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by
319339
}
320340
state, path, ok := it.nextChildAt(parent, ancestor, seekKey)
321341
if ok {
322-
if err := state.resolve(it.trie, path); err != nil {
342+
if err := state.resolve(it, path); err != nil {
323343
return parent, &parent.index, path, err
324344
}
325345
return state, &parent.index, path, nil
@@ -330,9 +350,21 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by
330350
return nil, nil, nil, errIteratorEnd
331351
}
332352

333-
func (st *nodeIteratorState) resolve(tr *Trie, path []byte) error {
353+
func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) {
354+
if it.resolver != nil {
355+
if blob, err := it.resolver.Get(hash); err == nil && len(blob) > 0 {
356+
if resolved, err := decodeNode(hash, blob); err == nil {
357+
return resolved, nil
358+
}
359+
}
360+
}
361+
resolved, err := it.trie.resolveHash(hash, path)
362+
return resolved, err
363+
}
364+
365+
func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error {
334366
if hash, ok := st.node.(hashNode); ok {
335-
resolved, err := tr.resolveHash(hash, path)
367+
resolved, err := it.resolveHash(hash, path)
336368
if err != nil {
337369
return err
338370
}
@@ -517,6 +549,10 @@ func (it *differenceIterator) Path() []byte {
517549
return it.b.Path()
518550
}
519551

552+
func (it *differenceIterator) AddResolver(resolver ethdb.KeyValueStore) {
553+
panic("not implemented")
554+
}
555+
520556
func (it *differenceIterator) Next(bool) bool {
521557
// Invariants:
522558
// - We always advance at least one element in b.
@@ -624,6 +660,10 @@ func (it *unionIterator) Path() []byte {
624660
return (*it.items)[0].Path()
625661
}
626662

663+
func (it *unionIterator) AddResolver(resolver ethdb.KeyValueStore) {
664+
panic("not implemented")
665+
}
666+
627667
// Next returns the next node in the union of tries being iterated over.
628668
//
629669
// It does this by maintaining a heap of iterators, sorted by the iteration

0 commit comments

Comments
 (0)