Skip to content

Commit 4084072

Browse files
committed
chore: prefetch keys in opmget
Also, stop deduplicating keys by default, but keep it as a run-time flag. All in all, this PR improves MGET (100% miss) throughput by at least 7%: from 1.23M to 1.32M qps. (100% miss traffic allows measuring more effectively the impact of this code) Also, finally fix TieredStorageTest.FlushAll test. Signed-off-by: Roman Gershman <[email protected]>
1 parent 8acf849 commit 4084072

File tree

2 files changed

+36
-11
lines changed

2 files changed

+36
-11
lines changed

src/server/string_family.cc

+32-9
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@
3535
#include "server/transaction.h"
3636
#include "util/fibers/future.h"
3737

38+
ABSL_FLAG(bool, mget_prefetch_keys, true, "If true, MGET will prefetch keys before reading them");
39+
40+
ABSL_FLAG(bool, mget_dedup_keys, false, "If true, MGET will deduplicate keys");
41+
3842
namespace dfly {
3943

4044
namespace {
@@ -547,23 +551,42 @@ MGetResponse OpMGet(fb2::BlockingCounter wait_bc, uint8_t fetch_mask, const Tran
547551

548552
absl::InlinedVector<Item, 32> items(keys.Size());
549553

554+
// First, fetch all iterators and count total size ahead
555+
size_t total_size = 0;
556+
unsigned index = 0;
557+
static bool mget_prefetch_keys = absl::GetFlag(FLAGS_mget_prefetch_keys);
558+
static bool mget_dedup_keys = absl::GetFlag(FLAGS_mget_dedup_keys);
559+
550560
// We can not make it thread-local because we may preempt during the Find loop due to
551561
// serialization with the bumpup calls.
552562

553563
// TODO: consider separating BumpUps from finds because it becomes too complicated
554564
// to reason about.
555565
absl::flat_hash_map<string_view, unsigned> key_index;
566+
if (mget_dedup_keys) {
567+
key_index.reserve(keys.Size());
568+
}
556569

557-
// First, fetch all iterators and count total size ahead
558-
size_t total_size = 0;
559-
unsigned index = 0;
560-
key_index.reserve(keys.Size());
570+
PrimeTable& pt = db_slice.GetDBTable(t->GetDbIndex())->prime;
571+
572+
constexpr unsigned kPrefetchLimit = 32;
573+
if (mget_prefetch_keys) {
574+
unsigned prefetched = 0;
575+
for (string_view key : keys) {
576+
pt.Prefetch(key);
577+
if (++prefetched >= kPrefetchLimit) {
578+
break;
579+
}
580+
}
581+
}
561582

562583
for (string_view key : keys) {
563-
auto [it, inserted] = key_index.try_emplace(key, index);
564-
if (!inserted) { // duplicate -> point to the first occurrence.
565-
items[index++].source_index = it->second;
566-
continue;
584+
if (mget_dedup_keys) {
585+
auto [it, inserted] = key_index.try_emplace(key, index);
586+
if (!inserted) { // duplicate -> point to the first occurrence.
587+
items[index++].source_index = it->second;
588+
continue;
589+
}
567590
}
568591

569592
auto it_res = db_slice.FindReadOnly(t->GetDbContext(), key, OBJ_STRING);
@@ -618,8 +641,8 @@ MGetResponse OpMGet(fb2::BlockingCounter wait_bc, uint8_t fetch_mask, const Tran
618641
}
619642
}
620643
}
621-
key_index.clear();
622644

645+
key_index.clear();
623646
return response;
624647
}
625648

src/server/tiered_storage_test.cc

+4-2
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,10 @@ TEST_F(TieredStorageTest, FlushAll) {
276276
Metrics metrics;
277277
ExpectConditionWithinTimeout([&] {
278278
metrics = GetMetrics();
279-
return metrics.events.hits > 2;
279+
280+
// Note that metrics.events.hits is not consistent with total_fetches
281+
// and it can happen that hits is greater than total_fetches due to in-progress reads.
282+
return metrics.tiered_stats.total_fetches > 2;
280283
});
281284
LOG(INFO) << FormatMetrics(metrics);
282285

@@ -290,7 +293,6 @@ TEST_F(TieredStorageTest, FlushAll) {
290293
LOG(INFO) << FormatMetrics(metrics);
291294

292295
EXPECT_EQ(metrics.db_stats.front().tiered_entries, 0u);
293-
EXPECT_GT(metrics.tiered_stats.total_fetches, 2u);
294296
}
295297

296298
TEST_F(TieredStorageTest, FlushPending) {

0 commit comments

Comments
 (0)