|
| 1 | +/* |
| 2 | + * Copyright (c) 2023 Amos Brocco. |
| 3 | + * |
| 4 | + * This program is free software: you can redistribute it and/or modify |
| 5 | + * it under the terms of the GNU General Public License as published by |
| 6 | + * the Free Software Foundation, version 3. |
| 7 | + * |
| 8 | + * This program is distributed in the hope that it will be useful, but |
| 9 | + * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | + * General Public License for more details. |
| 12 | + * |
| 13 | + * You should have received a copy of the GNU General Public License |
| 14 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 15 | + */ |
| 16 | +#ifndef MEMENTONEENGINE_H |
| 17 | +#define MEMENTONEENGINE_H |
| 18 | +#include "memento.h" |
| 19 | +#include "pcg_random.hpp" |
| 20 | +#include <cmath> |
| 21 | +#include <xxhash.h> |
| 22 | + |
| 23 | +template <template <typename...> class MementoMap, typename... Args> |
| 24 | +class MementoneEngine final { |
| 25 | +public: |
| 26 | + /** |
| 27 | + * Creates a new MementoHash engine. |
| 28 | + * |
| 29 | + * @param size initial number of working buckets (0 < size) |
| 30 | + */ |
| 31 | + MementoneEngine(uint32_t, uint32_t size) |
| 32 | + : m_bArraySize{size}, m_m{smallestPow2(size)}, m_mH{m_m >> 1}, m_mHm1{m_mH - 1}, m_mm1{m_m - 1}, m_lastRemoved{size} { |
| 33 | + } |
| 34 | + |
| 35 | + /** |
| 36 | + * Returns the bucket where the given key should be mapped. |
| 37 | + * This version uses the same hash function as Anchor |
| 38 | + * |
| 39 | + * @param key the key to map |
| 40 | + * @param seed the initial seed for CRC32c |
| 41 | + * @return the related bucket |
| 42 | + */ |
| 43 | + uint32_t getBucketCRC32c(uint64_t key, uint64_t seed) const noexcept { |
| 44 | + const auto hash = crc32c_sse42_u64(key, seed); |
| 45 | + |
| 46 | + /* |
| 47 | + * We invoke PowerHash to get a bucket |
| 48 | + * in the range [0,bArraySize-1]. |
| 49 | + */ |
| 50 | + auto b = powerConsistentHash(hash, m_bArraySize, m_mm1, m_mHm1); |
| 51 | + |
| 52 | + /* |
| 53 | + * We check if the bucket was removed, if not we are done. |
| 54 | + * If the bucket was removed the replacing bucket is >= 0, |
| 55 | + * otherwise it is -1. |
| 56 | + */ |
| 57 | + auto replacer = m_memento.replacer(b); |
| 58 | + while (replacer >= 0) { |
| 59 | + |
| 60 | + /* |
| 61 | + * If the bucket was removed, we must re-hash and find |
| 62 | + * a new bucket in the remaining slots. To know the |
| 63 | + * remaining slots, we look at 'replacer' that also |
| 64 | + * represents the size of the working set when the bucket |
| 65 | + * was removed and get a new bucket in [0,replacer-1]. |
| 66 | + */ |
| 67 | + const auto h = crc32c_sse42_u64(key, b); |
| 68 | + b = h % replacer; |
| 69 | + |
| 70 | + /* |
| 71 | + * If we hit a removed bucket we follow the replacements |
| 72 | + * until we get a working bucket or a bucket in the range |
| 73 | + * [0,replacer-1] |
| 74 | + */ |
| 75 | + auto r = m_memento.replacer(b); |
| 76 | + while (r >= replacer) { |
| 77 | + b = r; |
| 78 | + r = m_memento.replacer(b); |
| 79 | + } |
| 80 | + |
| 81 | + /* Finally we update the entry of the external loop. */ |
| 82 | + replacer = r; |
| 83 | + } |
| 84 | + |
| 85 | + return b; |
| 86 | + } |
| 87 | + |
| 88 | + /** |
| 89 | + * Adds a new bucket to the engine. |
| 90 | + * |
| 91 | + * @return the added bucket |
| 92 | + */ |
| 93 | + uint32_t addBucket() noexcept { |
| 94 | + /* The new bucket to add is the last removed one. */ |
| 95 | + auto bucket = m_lastRemoved; |
| 96 | + |
| 97 | + /** |
| 98 | + * We restore the bucket from the replacement set |
| 99 | + * and update the value of the last removed bucket. |
| 100 | + */ |
| 101 | + m_lastRemoved = m_memento.restore(bucket); |
| 102 | + |
| 103 | + /** |
| 104 | + * If the restored bucket is 'bArraySize' |
| 105 | + * we must update the actual size by 1. |
| 106 | + */ |
| 107 | + m_bArraySize = m_bArraySize > bucket ? m_bArraySize : bucket + 1; |
| 108 | + |
| 109 | + updatePowerParameters(); |
| 110 | + |
| 111 | + return bucket; |
| 112 | + } |
| 113 | + |
| 114 | + /** |
| 115 | + * Removes the given bucket from the engine. |
| 116 | + * |
| 117 | + * @param bucket the bucket to remove |
| 118 | + * @return the removed bucket |
| 119 | + */ |
| 120 | + uint32_t removeBucket(uint32_t bucket) noexcept { |
| 121 | + /* |
| 122 | + * If the lookup table is empty and the bucket to remove is the last one, |
| 123 | + * we are in the same use case as JumpHash. In this case we don't need |
| 124 | + * to remember the bucket, we just need to reduce the size of the b-array. |
| 125 | + */ |
| 126 | + if ((m_lastRemoved == m_bArraySize) && bucket == m_bArraySize - 1) { |
| 127 | + m_lastRemoved = m_bArraySize = bucket; |
| 128 | + updatePowerParameters(); |
| 129 | + return bucket; |
| 130 | + } |
| 131 | + |
| 132 | + /* Otherwise, we add the entry to the memento table using the removed bucket |
| 133 | + * as the key. */ |
| 134 | + m_lastRemoved = m_memento.remember(bucket, size() - 1, m_lastRemoved); |
| 135 | + |
| 136 | + updatePowerParameters(); |
| 137 | + |
| 138 | + return bucket; |
| 139 | + } |
| 140 | + |
| 141 | + /** |
| 142 | + * Returns the size of the working set. |
| 143 | + * |
| 144 | + * @return size of the working set. |
| 145 | + */ |
| 146 | + uint32_t size() const noexcept { return m_bArraySize - m_memento.size(); } |
| 147 | + |
| 148 | + /** |
| 149 | + * Returns the size of the b-array. |
| 150 | + * |
| 151 | + * @return the size of the b-array. |
| 152 | + */ |
| 153 | + uint32_t bArraySize() const noexcept { return m_bArraySize; } |
| 154 | + |
| 155 | +private: |
| 156 | + |
| 157 | + void updatePowerParameters() noexcept { |
| 158 | + m_m = smallestPow2(m_bArraySize); |
| 159 | + m_mH = m_m >> 1; |
| 160 | + m_mHm1 = m_mH - 1; |
| 161 | + m_mm1 = m_m - 1; |
| 162 | + } |
| 163 | + |
| 164 | + // From AnchorHash |
| 165 | + static uint32_t crc32c_sse42_u64(uint64_t key, uint64_t seed) { |
| 166 | + __asm__ volatile("crc32q %[key], %[seed];" |
| 167 | + : [seed] "+r"(seed) |
| 168 | + : [key] "rm"(key)); |
| 169 | + return seed; |
| 170 | + } |
| 171 | + |
| 172 | + static uint32_t smallestPow2(uint32_t x) { |
| 173 | + --x; |
| 174 | + x |= x >> 1; |
| 175 | + x |= x >> 2; |
| 176 | + x |= x >> 4; |
| 177 | + x |= x >> 8; |
| 178 | + x |= x >> 16; |
| 179 | + return x+1; |
| 180 | + } |
| 181 | + |
| 182 | + /** |
| 183 | + * Powerhash algorithm |
| 184 | + * |
| 185 | + * @param key |
| 186 | + * @param n |
| 187 | + * @param m-1 |
| 188 | + * @param m/2-1 |
| 189 | + * @return |
| 190 | + */ |
| 191 | + static uint32_t powerConsistentHash(uint32_t k, uint32_t n, uint32_t mm1, uint mHm1) noexcept { |
| 192 | + pcg32 rng; |
| 193 | + // r1 = f (key, m) (we pass m-1 because f expects that) |
| 194 | + auto r1 = f(k, mm1, rng); |
| 195 | + if (r1 < n) { |
| 196 | + return r1; |
| 197 | + } |
| 198 | + // r2 = g(key, n, m/2 − 1) |
| 199 | + auto r2 = g(k, n, mHm1, rng); |
| 200 | + if (r2 > mHm1) { |
| 201 | + return r2; |
| 202 | + } |
| 203 | + // f (key, m/2) (we pass m/2-1 because f expects that) |
| 204 | + return f(k, mHm1, rng); |
| 205 | + } |
| 206 | + |
| 207 | + /** |
| 208 | + * Algorithm-f, described in Section VI.A, pages 7 and 8 |
| 209 | + * |
| 210 | + * @param key |
| 211 | + * @param m |
| 212 | + * @return |
| 213 | + */ |
| 214 | + static uint32_t f(uint32_t key, uint32_t mm1, pcg32& rng) { |
| 215 | + // (...) extracts log2(m) bits from the given key |
| 216 | + auto kBits = (key & mm1); |
| 217 | + if (kBits == 0) { |
| 218 | + return 0; |
| 219 | + } |
| 220 | + // [Computes] the bit index of the most significant bit set |
| 221 | + // to 1 in kBits |
| 222 | + auto j = (sizeof(kBits)<<3) - __builtin_clz(kBits) - 1; |
| 223 | + // (...) computes 2^j |
| 224 | + auto h = static_cast<uint32_t>(1) << j; |
| 225 | + // [must return] a pseudo-random integer deterministacally based on |
| 226 | + // the values of key and j |
| 227 | + rng.seed(key, j); |
| 228 | + // (...) produces a random integer r in the ranger [h,2h-1] with equal |
| 229 | + // probability |
| 230 | + auto r = h + (rng() & (h - 1)); |
| 231 | + return r; |
| 232 | + } |
| 233 | + |
| 234 | + /** |
| 235 | + * Algorithm-g, described in Section VI.B, pages 8 and 9 |
| 236 | + * |
| 237 | + * @param key |
| 238 | + * @param n |
| 239 | + * @param s |
| 240 | + * @return |
| 241 | + */ |
| 242 | + static uint32_t g(uint32_t key, uint32_t n, uint32_t s, pcg32& rng) { |
| 243 | + auto x = s; // (...) Initially, x is set to the value of s |
| 244 | + for (;;) { |
| 245 | + // (...) 1. Generate U |
| 246 | + // U denotes the next random number from a generator U (0, 1) |
| 247 | + // that generates random numbers |
| 248 | + // uniformly over range (0, 1) and deterministically based on the given key. |
| 249 | + auto u = (static_cast<double>(rng())/static_cast<double>(rng.max())); |
| 250 | + // (...) 2. Compute r = min{j: U>(x+1)/(j+1) |
| 251 | + auto r = (uint32_t) ceil((static_cast<uint64_t>(x) + 1) / u) - 1; |
| 252 | + // (...) 3. Set x = r if r < n |
| 253 | + if (r < n) { |
| 254 | + x = r; |
| 255 | + } else { |
| 256 | + // (...) Otherwise, the algorithm returns the current |
| 257 | + // value of x as the result |
| 258 | + return x; |
| 259 | + } |
| 260 | + } |
| 261 | + } |
| 262 | + |
| 263 | + /* Number of nodes in the cluster */ |
| 264 | + uint32_t m_bArraySize; |
| 265 | + |
| 266 | + /* Smallest power of 2 greater or equal to n */ |
| 267 | + uint32_t m_m; |
| 268 | + |
| 269 | + /* Smallest power of 2 greater or equal to n minus 1 */ |
| 270 | + uint32_t m_mm1; |
| 271 | + |
| 272 | + /* Half of m */ |
| 273 | + uint32_t m_mH; |
| 274 | + |
| 275 | + /* Half of m minus 1 */ |
| 276 | + uint32_t m_mHm1; |
| 277 | + |
| 278 | + Memento<MementoMap> m_memento; |
| 279 | + uint32_t m_lastRemoved; |
| 280 | +}; |
| 281 | + |
| 282 | +#endif // MEMENTONEENGINE_H |
0 commit comments