Skip to content

Commit 40c6fc3

Browse files
committed
Add power and mementone, adapt balance for non random removals
1 parent 07df6bb commit 40c6fc3

File tree

8 files changed

+489
-7
lines changed

8 files changed

+489
-7
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,24 @@ add_executable(speed_test speed_test.cpp
3030
vcpkg.json
3131
memento/memento.h
3232
memento/mementoengine.h
33+
memento/mementoneengine.h
3334
anchor/AnchorHashQre.cpp anchor/AnchorHashQre.hpp anchor/misc/crc32c_sse42_u64.h
3435
anchor/anchorengine.h
3536
memento/mashtable.h
3637
jump/jumpengine.h
38+
power/powerengine.h
3739
)
3840

3941
add_executable(balance balance.cpp
4042
vcpkg.json
4143
memento/memento.h
4244
memento/mementoengine.h
45+
memento/mementoneengine.h
4346
anchor/AnchorHashQre.cpp anchor/AnchorHashQre.hpp anchor/misc/crc32c_sse42_u64.h
4447
anchor/anchorengine.h
4548
memento/mashtable.h
4649
jump/jumpengine.h
50+
power/powerengine.h
4751
)
4852

4953
add_executable(mashtable_test mashtable_test.cpp memento/mashtable.h)

anchor/anchorengine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class AnchorEngine final {
2828
* This version uses the same hash function as Anchor
2929
*
3030
* @param key the key to map
31-
* @param key the initial seed for CRC32c
31+
* @param seed the initial seed for CRC32c
3232
* @return the related bucket
3333
*/
3434
uint32_t getBucketCRC32c(uint64_t key, uint64_t seed) noexcept

balance.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include "memento/mashtable.h"
2626
#include "memento/mementoengine.h"
2727
#include "jump/jumpengine.h"
28+
#include "power/powerengine.h"
29+
#include "memento/mementoneengine.h"
2830
#include <fmt/core.h>
2931
#include <fstream>
3032
#include <unordered_map>
@@ -63,8 +65,8 @@ int bench(const std::string_view name, const std::string &filename,
6365
uint32_t removed = rand() % working_set;
6466
#endif
6567
if (bucket_status[removed] == 1) {
66-
engine.removeBucket(removed);
67-
bucket_status[removed] = 0;
68+
auto rnode = engine.removeBucket(removed);
69+
bucket_status[rnode] = 0; // Remove the actually removed node
6870
i++;
6971
}
7072
}
@@ -128,7 +130,7 @@ int main(int argc, char *argv[]) {
128130
cxxopts::Options options("speed_test", "MementoHash vs AnchorHash benchmark");
129131
options.add_options()(
130132
"Algorithm",
131-
"Algorithm (null|baseline|anchor|memento|mementoboost|mementomash|mementostd|mementogtl|jump)",
133+
"Algorithm (null|baseline|anchor|memento|mementoboost|mementomash|mementostd|mementogtl|jump|power|mementone)",
132134
cxxopts::value<std::string>())(
133135
"AnchorSet", "Size of the AnchorSet (ignored by Memento)",
134136
cxxopts::value<int>())("WorkingSet", "Size of the WorkingSet",
@@ -207,6 +209,14 @@ int main(int argc, char *argv[]) {
207209
return bench<JumpEngine>("JumpEngine", filename,
208210
anchor_set, working_set,
209211
num_removals, num_keys);
212+
} else if (algorithm == "power") {
213+
return bench<PowerEngine>("PowerEngine", filename,
214+
anchor_set, working_set,
215+
num_removals, num_keys);
216+
} else if (algorithm == "mementone") {
217+
return bench<MementoneEngine<boost::unordered_flat_map>>(
218+
"Mementone<boost::unordered_flat_map>", filename, anchor_set, working_set,
219+
num_removals, num_keys);
210220
} else {
211221
fmt::println("Unknown algorithm {}", algorithm);
212222
return 2;

jump/jumpengine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class JumpEngine final {
3636
* This implementations is the same as provided by Jump authors
3737
*
3838
* @param key the key to map
39-
* @param key the initial seed for CRC32c
39+
* @param seed the initial seed for CRC32c
4040
* @return the related bucket
4141
*/
4242
uint32_t getBucketCRC32c(uint64_t key, uint64_t seed) noexcept

memento/mementoengine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class MementoEngine final {
8585
* This version uses the same hash function as Anchor
8686
*
8787
* @param key the key to map
88-
* @param key the initial seed for CRC32c
88+
* @param seed the initial seed for CRC32c
8989
* @return the related bucket
9090
*/
9191
uint32_t getBucketCRC32c(uint64_t key, uint64_t seed) const noexcept {

memento/mementoneengine.h

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
/*
2+
* Copyright (c) 2023 Amos Brocco.
3+
*
4+
* This program is free software: you can redistribute it and/or modify
5+
* it under the terms of the GNU General Public License as published by
6+
* the Free Software Foundation, version 3.
7+
*
8+
* This program is distributed in the hope that it will be useful, but
9+
* WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11+
* General Public License for more details.
12+
*
13+
* You should have received a copy of the GNU General Public License
14+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
15+
*/
16+
#ifndef MEMENTONEENGINE_H
17+
#define MEMENTONEENGINE_H
18+
#include "memento.h"
19+
#include "pcg_random.hpp"
20+
#include <cmath>
21+
#include <xxhash.h>
22+
23+
template <template <typename...> class MementoMap, typename... Args>
24+
class MementoneEngine final {
25+
public:
26+
/**
27+
* Creates a new MementoHash engine.
28+
*
29+
* @param size initial number of working buckets (0 < size)
30+
*/
31+
MementoneEngine(uint32_t, uint32_t size)
32+
: m_bArraySize{size}, m_m{smallestPow2(size)}, m_mH{m_m >> 1}, m_mHm1{m_mH - 1}, m_mm1{m_m - 1}, m_lastRemoved{size} {
33+
}
34+
35+
/**
36+
* Returns the bucket where the given key should be mapped.
37+
* This version uses the same hash function as Anchor
38+
*
39+
* @param key the key to map
40+
* @param seed the initial seed for CRC32c
41+
* @return the related bucket
42+
*/
43+
uint32_t getBucketCRC32c(uint64_t key, uint64_t seed) const noexcept {
44+
const auto hash = crc32c_sse42_u64(key, seed);
45+
46+
/*
47+
* We invoke PowerHash to get a bucket
48+
* in the range [0,bArraySize-1].
49+
*/
50+
auto b = powerConsistentHash(hash, m_bArraySize, m_mm1, m_mHm1);
51+
52+
/*
53+
* We check if the bucket was removed, if not we are done.
54+
* If the bucket was removed the replacing bucket is >= 0,
55+
* otherwise it is -1.
56+
*/
57+
auto replacer = m_memento.replacer(b);
58+
while (replacer >= 0) {
59+
60+
/*
61+
* If the bucket was removed, we must re-hash and find
62+
* a new bucket in the remaining slots. To know the
63+
* remaining slots, we look at 'replacer' that also
64+
* represents the size of the working set when the bucket
65+
* was removed and get a new bucket in [0,replacer-1].
66+
*/
67+
const auto h = crc32c_sse42_u64(key, b);
68+
b = h % replacer;
69+
70+
/*
71+
* If we hit a removed bucket we follow the replacements
72+
* until we get a working bucket or a bucket in the range
73+
* [0,replacer-1]
74+
*/
75+
auto r = m_memento.replacer(b);
76+
while (r >= replacer) {
77+
b = r;
78+
r = m_memento.replacer(b);
79+
}
80+
81+
/* Finally we update the entry of the external loop. */
82+
replacer = r;
83+
}
84+
85+
return b;
86+
}
87+
88+
/**
89+
* Adds a new bucket to the engine.
90+
*
91+
* @return the added bucket
92+
*/
93+
uint32_t addBucket() noexcept {
94+
/* The new bucket to add is the last removed one. */
95+
auto bucket = m_lastRemoved;
96+
97+
/**
98+
* We restore the bucket from the replacement set
99+
* and update the value of the last removed bucket.
100+
*/
101+
m_lastRemoved = m_memento.restore(bucket);
102+
103+
/**
104+
* If the restored bucket is 'bArraySize'
105+
* we must update the actual size by 1.
106+
*/
107+
m_bArraySize = m_bArraySize > bucket ? m_bArraySize : bucket + 1;
108+
109+
updatePowerParameters();
110+
111+
return bucket;
112+
}
113+
114+
/**
115+
* Removes the given bucket from the engine.
116+
*
117+
* @param bucket the bucket to remove
118+
* @return the removed bucket
119+
*/
120+
uint32_t removeBucket(uint32_t bucket) noexcept {
121+
/*
122+
* If the lookup table is empty and the bucket to remove is the last one,
123+
* we are in the same use case as JumpHash. In this case we don't need
124+
* to remember the bucket, we just need to reduce the size of the b-array.
125+
*/
126+
if ((m_lastRemoved == m_bArraySize) && bucket == m_bArraySize - 1) {
127+
m_lastRemoved = m_bArraySize = bucket;
128+
updatePowerParameters();
129+
return bucket;
130+
}
131+
132+
/* Otherwise, we add the entry to the memento table using the removed bucket
133+
* as the key. */
134+
m_lastRemoved = m_memento.remember(bucket, size() - 1, m_lastRemoved);
135+
136+
updatePowerParameters();
137+
138+
return bucket;
139+
}
140+
141+
/**
142+
* Returns the size of the working set.
143+
*
144+
* @return size of the working set.
145+
*/
146+
uint32_t size() const noexcept { return m_bArraySize - m_memento.size(); }
147+
148+
/**
149+
* Returns the size of the b-array.
150+
*
151+
* @return the size of the b-array.
152+
*/
153+
uint32_t bArraySize() const noexcept { return m_bArraySize; }
154+
155+
private:
156+
157+
void updatePowerParameters() noexcept {
158+
m_m = smallestPow2(m_bArraySize);
159+
m_mH = m_m >> 1;
160+
m_mHm1 = m_mH - 1;
161+
m_mm1 = m_m - 1;
162+
}
163+
164+
// From AnchorHash
165+
static uint32_t crc32c_sse42_u64(uint64_t key, uint64_t seed) {
166+
__asm__ volatile("crc32q %[key], %[seed];"
167+
: [seed] "+r"(seed)
168+
: [key] "rm"(key));
169+
return seed;
170+
}
171+
172+
static uint32_t smallestPow2(uint32_t x) {
173+
--x;
174+
x |= x >> 1;
175+
x |= x >> 2;
176+
x |= x >> 4;
177+
x |= x >> 8;
178+
x |= x >> 16;
179+
return x+1;
180+
}
181+
182+
/**
183+
* Powerhash algorithm
184+
*
185+
* @param key
186+
* @param n
187+
* @param m-1
188+
* @param m/2-1
189+
* @return
190+
*/
191+
static uint32_t powerConsistentHash(uint32_t k, uint32_t n, uint32_t mm1, uint mHm1) noexcept {
192+
pcg32 rng;
193+
// r1 = f (key, m) (we pass m-1 because f expects that)
194+
auto r1 = f(k, mm1, rng);
195+
if (r1 < n) {
196+
return r1;
197+
}
198+
// r2 = g(key, n, m/2 − 1)
199+
auto r2 = g(k, n, mHm1, rng);
200+
if (r2 > mHm1) {
201+
return r2;
202+
}
203+
// f (key, m/2) (we pass m/2-1 because f expects that)
204+
return f(k, mHm1, rng);
205+
}
206+
207+
/**
208+
* Algorithm-f, described in Section VI.A, pages 7 and 8
209+
*
210+
* @param key
211+
* @param m
212+
* @return
213+
*/
214+
static uint32_t f(uint32_t key, uint32_t mm1, pcg32& rng) {
215+
// (...) extracts log2(m) bits from the given key
216+
auto kBits = (key & mm1);
217+
if (kBits == 0) {
218+
return 0;
219+
}
220+
// [Computes] the bit index of the most significant bit set
221+
// to 1 in kBits
222+
auto j = (sizeof(kBits)<<3) - __builtin_clz(kBits) - 1;
223+
// (...) computes 2^j
224+
auto h = static_cast<uint32_t>(1) << j;
225+
// [must return] a pseudo-random integer deterministacally based on
226+
// the values of key and j
227+
rng.seed(key, j);
228+
// (...) produces a random integer r in the ranger [h,2h-1] with equal
229+
// probability
230+
auto r = h + (rng() & (h - 1));
231+
return r;
232+
}
233+
234+
/**
235+
* Algorithm-g, described in Section VI.B, pages 8 and 9
236+
*
237+
* @param key
238+
* @param n
239+
* @param s
240+
* @return
241+
*/
242+
static uint32_t g(uint32_t key, uint32_t n, uint32_t s, pcg32& rng) {
243+
auto x = s; // (...) Initially, x is set to the value of s
244+
for (;;) {
245+
// (...) 1. Generate U
246+
// U denotes the next random number from a generator U (0, 1)
247+
// that generates random numbers
248+
// uniformly over range (0, 1) and deterministically based on the given key.
249+
auto u = (static_cast<double>(rng())/static_cast<double>(rng.max()));
250+
// (...) 2. Compute r = min{j: U>(x+1)/(j+1)
251+
auto r = (uint32_t) ceil((static_cast<uint64_t>(x) + 1) / u) - 1;
252+
// (...) 3. Set x = r if r < n
253+
if (r < n) {
254+
x = r;
255+
} else {
256+
// (...) Otherwise, the algorithm returns the current
257+
// value of x as the result
258+
return x;
259+
}
260+
}
261+
}
262+
263+
/* Number of nodes in the cluster */
264+
uint32_t m_bArraySize;
265+
266+
/* Smallest power of 2 greater or equal to n */
267+
uint32_t m_m;
268+
269+
/* Smallest power of 2 greater or equal to n minus 1 */
270+
uint32_t m_mm1;
271+
272+
/* Half of m */
273+
uint32_t m_mH;
274+
275+
/* Half of m minus 1 */
276+
uint32_t m_mHm1;
277+
278+
Memento<MementoMap> m_memento;
279+
uint32_t m_lastRemoved;
280+
};
281+
282+
#endif // MEMENTONEENGINE_H

0 commit comments

Comments
 (0)