Skip to content

Commit c95d131

Browse files
authored
snapshots: fewer copies in BTree index using BytesOrByteView (#2813)
1 parent 30da9a6 commit c95d131

File tree

9 files changed

+53
-48
lines changed

9 files changed

+53
-48
lines changed

Diff for: silkworm/db/datastore/snapshots/btree/btree.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ BTree::BTree(
3535
cache_{decode_nodes(encoded_nodes)} {
3636
}
3737

38-
using CompareResult = std::pair<int, Bytes>;
38+
using CompareResult = std::pair<int, BytesOrByteView>;
3939

4040
static CompareResult compare_key(
4141
ByteView key,
4242
BTree::DataIndex key_index,
4343
const BTree::KeyValueIndex& index) {
4444
auto data_key = index.lookup_key(key_index);
4545
ensure(data_key.has_value(), [&] { return "out-of-bounds key=" + to_hex(key) + " data_index=" + std::to_string(key_index); });
46-
int cmp = data_key->compare(key);
46+
int cmp = ByteView{*data_key}.compare(key);
4747
return {cmp, std::move(*data_key)};
4848
}
4949

@@ -106,7 +106,7 @@ BTree::SeekResult BTree::seek(ByteView seek_key, const KeyValueIndex& index) {
106106
return {found, std::move(kv_pair->first), std::move(kv_pair->second), left_index};
107107
}
108108

109-
std::optional<Bytes> BTree::get(ByteView key, const KeyValueIndex& index) {
109+
std::optional<BytesOrByteView> BTree::get(ByteView key, const KeyValueIndex& index) {
110110
if (key.empty() && num_nodes_ > 0) {
111111
auto kv_pair = index.lookup_key_value(0);
112112
if (!kv_pair) {
@@ -172,7 +172,7 @@ void BTree::warmup(const KeyValueIndex& index) {
172172
const size_t data_index = i - 1;
173173
auto [_, key] = compare_key({}, data_index, index);
174174
cache_.emplace_back(Node{data_index, Bytes{key}});
175-
cached_bytes += sizeof(Node) + key.length();
175+
cached_bytes += sizeof(Node) + ByteView{key}.length();
176176
}
177177
SILK_DEBUG << "BTree::warmup finished M=" << fanout_ << " N=" << num_nodes_ << " cache_size=" << cached_bytes;
178178
}

Diff for: silkworm/db/datastore/snapshots/btree/btree.hpp

+9-9
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,23 @@ namespace silkworm::snapshots::btree {
2727
class BTree {
2828
public:
2929
using DataIndex = uint64_t;
30-
using KeyValue = std::pair<Bytes, Bytes>;
30+
using KeyValue = std::pair<BytesOrByteView, BytesOrByteView>;
3131

3232
struct KeyValueIndex {
3333
virtual ~KeyValueIndex() = default;
3434
virtual std::optional<KeyValue> lookup_key_value(DataIndex) const = 0;
35-
virtual std::optional<Bytes> lookup_key(DataIndex) const = 0;
35+
virtual std::optional<BytesOrByteView> lookup_key(DataIndex) const = 0;
3636

37-
using LookupResult = std::pair<int, std::optional<Bytes>>;
37+
using LookupResult = std::pair<int, std::optional<BytesOrByteView>>;
3838
virtual std::optional<LookupResult> lookup_key_value(DataIndex, ByteView) const = 0;
39-
virtual std::optional<Bytes> advance_key_value(DataIndex, ByteView, size_t skip_max_count) const = 0;
39+
virtual std::optional<BytesOrByteView> advance_key_value(DataIndex, ByteView, size_t skip_max_count) const = 0;
4040
};
4141

4242
struct SeekResult {
43-
bool found;
44-
Bytes key;
45-
Bytes value;
46-
DataIndex key_index;
43+
bool found{false};
44+
BytesOrByteView key;
45+
BytesOrByteView value;
46+
DataIndex key_index{0};
4747
};
4848

4949
BTree(
@@ -68,7 +68,7 @@ class BTree {
6868
//! \brief Search and return key equal to the given \p key
6969
//! \param key the key to look for
7070
//! \param index the key-value data sequence
71-
std::optional<Bytes> get(ByteView key, const KeyValueIndex& index);
71+
std::optional<BytesOrByteView> get(ByteView key, const KeyValueIndex& index);
7272

7373
void check_against_data_keys(const KeyValueIndex& index);
7474

Diff for: silkworm/db/datastore/snapshots/btree/btree_index.cpp

+13-14
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ MemoryMappedRegion BTreeIndex::memory_file_region() const {
7373
std::optional<BTreeIndex::Cursor> BTreeIndex::seek(ByteView seek_key, const KVSegmentReader& kv_segment) const {
7474
KeyValueIndex index{kv_segment, data_offsets_, file_path_};
7575
auto [found, key, value, data_index] = btree_->seek(seek_key, index);
76-
if (key.compare(seek_key) >= 0) {
76+
if (ByteView{key}.compare(seek_key) >= 0) {
7777
return BTreeIndex::Cursor{
7878
this,
7979
std::move(key),
@@ -85,7 +85,7 @@ std::optional<BTreeIndex::Cursor> BTreeIndex::seek(ByteView seek_key, const KVSe
8585
return std::nullopt;
8686
}
8787

88-
std::optional<Bytes> BTreeIndex::get(ByteView key, const KVSegmentReader& kv_segment) const {
88+
std::optional<BytesOrByteView> BTreeIndex::get(ByteView key, const KVSegmentReader& kv_segment) const {
8989
const KeyValueIndex index{kv_segment, data_offsets_, file_path_};
9090
return btree_->get(key, index);
9191
}
@@ -96,30 +96,29 @@ std::optional<BTree::KeyValue> BTreeIndex::KeyValueIndex::lookup_key_value(DataI
9696
}
9797
const auto data_offset = data_offsets_->at(data_index);
9898

99-
segment::KVSegmentReader<RawDecoder<Bytes>, RawDecoder<Bytes>> reader{kv_segment_};
99+
segment::KVSegmentReader<RawDecoder<BytesOrByteView>, RawDecoder<BytesOrByteView>> reader{kv_segment_};
100100
auto data_it = reader.seek(data_offset);
101101
if (data_it == reader.end()) {
102102
throw std::runtime_error{"key/value not found data_index=" + std::to_string(data_index) + " for " + file_path_.string()};
103103
}
104104
auto kv_pair = *data_it;
105105

106-
return BTree::KeyValue{std::move(kv_pair.first), std::move(kv_pair.second)};
106+
return BTree::KeyValue{kv_pair.first, kv_pair.second};
107107
}
108108

109-
std::optional<Bytes> BTreeIndex::KeyValueIndex::lookup_key(DataIndex data_index) const {
109+
std::optional<BytesOrByteView> BTreeIndex::KeyValueIndex::lookup_key(DataIndex data_index) const {
110110
if (data_index >= data_offsets_->size()) {
111111
return std::nullopt;
112112
}
113113
const auto data_offset = data_offsets_->at(data_index);
114114

115-
segment::KVSegmentKeysReader<RawDecoder<Bytes>> reader{kv_segment_};
116-
auto data_it = reader.seek(data_offset);
115+
segment::KVSegmentKeysReader<RawDecoder<BytesOrByteView>> reader{kv_segment_};
116+
const auto data_it = reader.seek(data_offset);
117117
if (data_it == reader.end()) {
118118
throw std::runtime_error{"key not found data_index=" + std::to_string(data_index) + " for " + file_path_.string()};
119119
}
120-
Bytes key = std::move(*data_it);
121120

122-
return key;
121+
return *data_it;
123122
}
124123

125124
std::optional<BTreeIndex::KeyValueIndex::LookupResult> BTreeIndex::KeyValueIndex::lookup_key_value(DataIndex data_index, ByteView k) const {
@@ -130,15 +129,15 @@ std::optional<BTreeIndex::KeyValueIndex::LookupResult> BTreeIndex::KeyValueIndex
130129

131130
const auto& decompressor = kv_segment_.decompressor();
132131
auto it = decompressor.seek(data_offset);
133-
if ((it == decompressor.end()) || !it.has_next()) {
132+
if (it == decompressor.end() || !it.has_next()) {
134133
throw std::runtime_error{"key not found data_index=" + std::to_string(data_index) + " for " + file_path_.string()};
135134
}
136135

137-
const int key_compare = ByteView{*it}.compare(k);
138-
if (key_compare != 0) {
136+
if (const int key_compare = ByteView{*it}.compare(k); key_compare != 0) {
139137
return LookupResult{key_compare, std::nullopt};
140138
}
141139

140+
// Key matches: advance and read value
142141
++it;
143142
return LookupResult{0, std::move(*it)};
144143
}
@@ -178,7 +177,7 @@ static seg::Decompressor::Iterator kv_decompressor_seek_to_key(
178177
return it;
179178
}
180179

181-
std::optional<Bytes> BTreeIndex::KeyValueIndex::advance_key_value(const DataIndex data_index, const ByteView k, const size_t skip_max_count) const {
180+
std::optional<BytesOrByteView> BTreeIndex::KeyValueIndex::advance_key_value(const DataIndex data_index, const ByteView k, const size_t skip_max_count) const {
182181
if (data_index >= data_offsets_->size()) {
183182
return std::nullopt;
184183
}
@@ -191,7 +190,7 @@ std::optional<Bytes> BTreeIndex::KeyValueIndex::advance_key_value(const DataInde
191190
}
192191

193192
++it;
194-
return std::optional<Bytes>{std::move(*it)};
193+
return std::move(*it);
195194
}
196195

197196
bool BTreeIndex::Cursor::next() {

Diff for: silkworm/db/datastore/snapshots/btree/btree_index.hpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class BTreeIndex {
4242

4343
class Cursor {
4444
public:
45-
using value_type = std::pair<Bytes, Bytes>;
45+
using value_type = std::pair<BytesOrByteView, BytesOrByteView>;
4646
using iterator_category [[maybe_unused]] = std::input_iterator_tag;
4747
using difference_type = std::ptrdiff_t;
4848
using pointer = value_type*;
@@ -86,8 +86,8 @@ class BTreeIndex {
8686

8787
Cursor(
8888
const BTreeIndex* index,
89-
Bytes key,
90-
Bytes value,
89+
BytesOrByteView key,
90+
BytesOrByteView value,
9191
DataIndex data_index,
9292
const KVSegmentReader* kv_segment)
9393
: index_{index},
@@ -130,7 +130,7 @@ class BTreeIndex {
130130
//! \param key the data key to match exactly
131131
//! \param kv_segment reader of the key-value data sequence
132132
//! \return the value associated at \p key or std::nullopt if not found
133-
std::optional<Bytes> get(ByteView key, const KVSegmentReader& kv_segment) const;
133+
std::optional<BytesOrByteView> get(ByteView key, const KVSegmentReader& kv_segment) const;
134134

135135
private:
136136
class KeyValueIndex : public BTree::KeyValueIndex {
@@ -145,9 +145,9 @@ class BTreeIndex {
145145
~KeyValueIndex() override = default;
146146

147147
std::optional<BTree::KeyValue> lookup_key_value(DataIndex data_index) const override;
148-
std::optional<Bytes> lookup_key(DataIndex data_index) const override;
148+
std::optional<BytesOrByteView> lookup_key(DataIndex data_index) const override;
149149
std::optional<LookupResult> lookup_key_value(DataIndex, ByteView) const override;
150-
std::optional<Bytes> advance_key_value(DataIndex, ByteView, size_t skip_max_count) const override;
150+
std::optional<BytesOrByteView> advance_key_value(DataIndex, ByteView, size_t skip_max_count) const override;
151151

152152
private:
153153
const KVSegmentReader& kv_segment_;

Diff for: silkworm/db/datastore/snapshots/btree/btree_index_test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ TEST_CASE("BTreeIndex", "[snapshots][btree]") {
122122
SECTION("BTreeIndex::get") {
123123
// Check that all values retrieved through BT index match
124124
size_t key_count{0};
125-
segment::KVSegmentReader<RawDecoder<Bytes>, RawDecoder<Bytes>> reader{kv_segment};
125+
segment::KVSegmentReader<RawDecoder<ByteView>, RawDecoder<ByteView>> reader{kv_segment};
126126
for (const auto [key, value] : reader) {
127127
const auto v = bt_index.get(key, kv_segment);
128128
CHECK(v == value);

Diff for: silkworm/db/datastore/snapshots/common/raw_codec.hpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
namespace silkworm::snapshots {
2424

2525
template <class TBytes>
26-
concept BytesOrByteViewConcept = std::same_as<TBytes, Bytes> || std::same_as<TBytes, ByteView>;
26+
concept BytesOrByteViewConcept = std::same_as<TBytes, Bytes> || std::same_as<TBytes, ByteView> || std::same_as<TBytes, BytesOrByteView>;
2727

2828
template <BytesOrByteViewConcept TBytes>
2929
struct RawDecoder : public Decoder {
@@ -41,8 +41,18 @@ struct RawDecoder : public Decoder {
4141
}
4242
};
4343

44+
template <>
45+
struct RawDecoder<BytesOrByteView> : public Decoder {
46+
BytesOrByteView value;
47+
~RawDecoder() override = default;
48+
void decode_word(Word& word) override {
49+
value = std::move(word);
50+
}
51+
};
52+
4453
static_assert(DecoderConcept<RawDecoder<Bytes>>);
4554
static_assert(DecoderConcept<RawDecoder<ByteView>>);
55+
static_assert(DecoderConcept<RawDecoder<BytesOrByteView>>);
4656

4757
template <BytesOrByteViewConcept TBytes>
4858
struct RawEncoder : public Encoder {

Diff for: silkworm/db/datastore/snapshots/domain_get_latest_query.hpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,13 @@ struct DomainGetLatestSegmentQuery {
4848
return std::nullopt;
4949
}
5050

51-
std::optional<Bytes> value_data = entity_.btree_index.get(key_data, entity_.kv_segment);
51+
std::optional<Word> value_data = entity_.btree_index.get(key_data, entity_.kv_segment);
5252
if (!value_data) {
5353
return std::nullopt;
5454
}
5555

5656
TValueDecoder value_decoder;
57-
Word value{std::move(*value_data)};
58-
value_decoder.decode_word(value);
57+
value_decoder.decode_word(*value_data);
5958
return std::move(value_decoder.value);
6059
}
6160

Diff for: silkworm/db/datastore/snapshots/domain_range_latest_query.hpp

+7-9
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ struct DomainRangeLatestSegmentQuery {
5252
auto begin_it = entity_.btree_index.seek(key_start, entity_.kv_segment).value_or(btree::BTreeIndex::Cursor{});
5353

5454
return std::ranges::subrange{std::move(begin_it), std::default_sentinel} |
55-
std::views::take_while([key_end = std::move(key_end)](const auto& kv_pair) { return kv_pair.first < key_end; });
55+
std::views::take_while([key_end = std::move(key_end)](const auto& kv_pair) { return ByteView{kv_pair.first} < key_end; });
5656
}
5757

5858
auto exec(Bytes key_start, Bytes key_end, bool ascending) {
@@ -77,27 +77,25 @@ struct DomainRangeLatestQuery {
7777
using ResultItemKey = decltype(TKeyDecoder::value);
7878
using ResultItemValue = decltype(TValueDecoder::value);
7979
using ResultItem = std::pair<ResultItemKey, ResultItemValue>;
80-
using Word = snapshots::Decoder::Word;
80+
using Word = Decoder::Word;
8181

82-
static ResultItem decode_kv_pair(std::pair<Bytes, Bytes>&& kv_pair) {
83-
if constexpr (std::same_as<ResultItem, std::pair<Bytes, Bytes>>) {
82+
static ResultItem decode_kv_pair(std::pair<Word, Word>&& kv_pair) {
83+
if constexpr (std::same_as<ResultItem, std::pair<Word, Word>>) {
8484
return std::move(kv_pair);
8585
}
8686

8787
TKeyDecoder key_decoder;
88-
Word key_byte_word{std::move(kv_pair.first)};
89-
key_decoder.decode_word(key_byte_word);
88+
key_decoder.decode_word(kv_pair.first);
9089
ResultItemKey& key = key_decoder.value;
9190

9291
TValueDecoder value_decoder;
93-
Word value_byte_word{std::move(kv_pair.second)};
94-
value_decoder.decode_word(value_byte_word);
92+
value_decoder.decode_word(kv_pair.second);
9593
ResultItemValue& value = value_decoder.value;
9694

9795
return ResultItem{std::move(key), std::move(value)};
9896
}
9997

100-
static constexpr auto kDecodeKVPairFunc = [](std::pair<Bytes, Bytes>& kv_pair) -> ResultItem {
98+
static constexpr auto kDecodeKVPairFunc = [](std::pair<Word, Word>& kv_pair) -> ResultItem {
10199
return decode_kv_pair(std::move(kv_pair));
102100
};
103101

Diff for: silkworm/db/datastore/snapshots/segment/kv_segment_reader.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333

3434
#include "../common/codec.hpp"
3535
#include "../common/snapshot_path.hpp"
36-
#include "../common/util/iterator/iterator_read_into_vector.hpp"
3736
#include "seg/decompressor.hpp"
3837

3938
namespace silkworm::snapshots::segment {

0 commit comments

Comments
 (0)