Skip to content

Commit 53f7e9e

Browse files
committed
save state
1 parent 0e2d990 commit 53f7e9e

File tree

6 files changed

+256
-44
lines changed

6 files changed

+256
-44
lines changed

include/config.hpp

+15-9
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ using position_t = uint32_t;
1111
using label_t = uint8_t;
1212
static const position_t kFanout = 256;
1313

14-
using suffix_t = uint8_t;
15-
static const suffix_t kSuffixHashMask = 0xFF;
14+
//using suffix_t = uint8_t;
15+
//static const suffix_t kSuffixHashMask = 0xFF;
1616

1717
using word_t = uint64_t;
1818
static const unsigned kWordSize = 64;
1919
static const word_t kMsbMask = 0x8000000000000000;
20+
static const word_t kOneMask = 0xFFFFFFFFFFFFFFFF;
2021

2122
static const bool kIncludeDense = true;
2223
static const uint32_t kSparseDenseRatio = 64;
@@ -28,15 +29,20 @@ enum SuffixType {
2829
kReal = 2
2930
};
3031

31-
std::string uint64ToString(uint64_t key) {
32-
uint64_t endian_swapped_key = __builtin_bswap64(key);
33-
return std::string(reinterpret_cast<const char*>(&endian_swapped_key), 8);
32+
static std::string uint64ToString(const uint64_t word) {
33+
uint64_t endian_swapped_word = __builtin_bswap64(word);
34+
return std::string(reinterpret_cast<const char*>(&endian_swapped_word), 8);
3435
}
3536

36-
uint64_t stringToUint64(std::string str_key) {
37-
uint64_t int_key = 0;
38-
memcpy(reinterpret_cast<char*>(&int_key), str_key.data(), 8);
39-
return __builtin_bswap64(int_key);
37+
static uint64_t stringToUint64(const std::string& str_word) {
38+
uint64_t int_word = 0;
39+
memcpy(reinterpret_cast<char*>(&int_word), str_word.data(), 8);
40+
return __builtin_bswap64(int_word);
41+
}
42+
43+
static void clearMSBits(word_t& word, const position_t content_len) {
44+
word <<= (kWordSize - content_len);
45+
word >>= (kWordSize - content_len);
4046
}
4147

4248
} // namespace surf

include/hash.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,4 @@ inline uint32_t suffixHash(const char* key, const int keylen) {
5656
} // namespace surf
5757

5858
#endif // HASH_H_
59+

include/louds_dense.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ LoudsDense::LoudsDense(const SuRFBuilder* builder) {
114114
label_bitmaps_ = new BitvectorRank(kRankBasicBlockSize, builder->getBitmapLabels(), num_bits_per_level, 0, height_);
115115
child_indicator_bitmaps_ = new BitvectorRank(kRankBasicBlockSize, builder->getBitmapChildIndicatorBits(), num_bits_per_level, 0, height_);
116116
prefixkey_indicator_bits_ = new BitvectorRank(kRankBasicBlockSize, builder->getPrefixkeyIndicatorBits(), builder->getNodeCounts(), 0, height_);
117-
suffixes_ = new SuffixVector(builder->getSuffixConfig(), builder->getSuffixes(), 0, height_);
117+
suffixes_ = new SuffixVector(builder->getSuffixType(), builder->getSuffixes(), 0, height_);
118118
}
119119

120120
bool LoudsDense::lookupKey(const std::string& key, position_t& out_node_num) const {
@@ -214,7 +214,7 @@ inline position_t LoudsDense::getNextPos(const position_t pos) const {
214214
inline void LoudsDense::compareSuffixGreaterThan(const position_t pos, const std::string& key, const level_t level, const bool inclusive, LoudsDense::Iter& iter) const {
215215
if (suffixes_->getType() == kReal) {
216216
position_t suffix_pos = getSuffixPos(pos, false);
217-
int compare = suffixes_->compare(suffix_pos, key[level]);
217+
int compare = suffixes_->compare(suffix_pos, key, level);
218218
if ((compare < 0) || (compare == 0 && !inclusive))
219219
return iter++;
220220
} else {

include/louds_sparse.hpp

+28-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include "label_vector.hpp"
88
#include "rank.hpp"
99
#include "select.hpp"
10-
#include "suffix_vector.hpp"
10+
#include "suffix.hpp"
1111
#include "surf_builder.hpp"
1212

1313
namespace surf {
@@ -27,6 +27,7 @@ class LoudsSparse {
2727
}
2828

2929
bool isValid() const { return is_valid_; };
30+
int compare(const std::string& key);
3031
std::string getKey() const;
3132

3233
position_t getStartNodeNum() const { return start_node_num_; };
@@ -100,7 +101,7 @@ class LoudsSparse {
100101
LabelVector* labels_;
101102
BitvectorRank* child_indicator_bits_;
102103
BitvectorSelect* louds_bits_;
103-
SuffixVector* suffixes_;
104+
BitvectorSuffix* suffixes_;
104105
};
105106

106107

@@ -121,10 +122,15 @@ LoudsSparse::LoudsSparse(const SuRFBuilder* builder) {
121122
for (level_t level = 0; level < height_; level++)
122123
num_items_per_level.push_back(builder->getLabels()[level].size());
123124

125+
position_t suffix_len = builder->getSuffixLen();
126+
std::vector<position_t> num_suffix_bits_per_level;
127+
for (level_t level = 0; level < height_; level++)
128+
num_suffix_bits_per_level.push_back(builder->getSuffixCounts()[level] * suffix_len);
129+
124130
labels_ = new LabelVector(builder->getLabels(), start_level_, height_);
125131
child_indicator_bits_ = new BitvectorRank(kRankBasicBlockSize, builder->getChildIndicatorBits(), num_items_per_level, start_level_, height_);
126132
louds_bits_ = new BitvectorSelect(kSelectSampleInterval, builder->getLoudsBits(), num_items_per_level, start_level_, height_);
127-
suffixes_ = new SuffixVector(builder->getSuffixConfig(), builder->getSuffixes(), start_level_, height_);
133+
suffixes_ = new BitvectorSelect(builder->getSuffixType(), suffix_len, builder->getSuffixes(), num_suffix_bits_per_level, start_level_, height_);
128134
}
129135

130136
bool LoudsSparse::lookupKey(const std::string& key, const position_t in_node_num) const {
@@ -222,7 +228,7 @@ inline void LoudsSparse::moveToLeftInNextSubtrie(position_t pos, const position_
222228
inline void LoudsSparse::compareSuffixGreaterThan(const position_t pos, const std::string& key, const level_t level, const bool inclusive, LoudsSparse::Iter& iter) const {
223229
if (suffixes_->getType() == kReal) {
224230
position_t suffix_pos = getSuffixPos(pos);
225-
int compare = suffixes_->compare(suffix_pos, key[level]);
231+
int compare = suffixes_->compare(suffix_pos, key, level);
226232
if ((compare < 0) || (compare == 0 && !inclusive))
227233
return iter++;
228234
} else {
@@ -234,7 +240,24 @@ inline void LoudsSparse::compareSuffixGreaterThan(const position_t pos, const st
234240

235241
//============================================================================
236242

243+
int LoudsSparse::Iter::compare(const std::string& key) {
244+
std::string key_sparse = key.substr(start_level_);
245+
std::string iter_key = getKey();
246+
int compare = iter_key.compare(key_sparse);
247+
if (compare != 0)
248+
return compare;
249+
position_t suffix_pos = trie_->getSuffixPos(pos_in_trie_[key_len_ - 1]);
250+
return trie_->suffixes_->compare(suffix_pos, key, key_len_);
251+
}
252+
237253
std::string LoudsSparse::Iter::getKey() const {
254+
if (!is_valid_)
255+
return std::string();
256+
level_t len = key_len_;
257+
if (is_at_terminator)
258+
len--;
259+
return std::string((const char*)key_.data(), (size_t)len);
260+
/*
238261
if (!is_valid_)
239262
return std::string();
240263
level_t len = key_len_;
@@ -245,6 +268,7 @@ std::string LoudsSparse::Iter::getKey() const {
245268
if (trie_->suffixes_->getType() == kReal && trie_->suffixes_->read(suffix_pos) > 0)
246269
ret_str += std::string((const char*)(trie_->suffixes_->move(suffix_pos)), sizeof(suffix_t));
247270
return ret_str;
271+
*/
248272
}
249273

250274
inline void LoudsSparse::Iter::append(const position_t pos) {

include/suffix.hpp

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#ifndef SUFFIX_H_
2+
#define SUFFIX_H_
3+
4+
#include "bitvector.hpp"
5+
6+
#include <assert.h>
7+
8+
#include <vector>
9+
10+
#include "config.hpp"
11+
#include "hash.hpp"
12+
13+
namespace surf {
14+
15+
// Max suffix_len_ = 64 bits
16+
// For kReal suffixes, if the stored key is not long enough to provide
17+
// suffix_len_ suffix bits, its suffix field is cleared (i.e., all 0's)
18+
// to indicate that there is no suffix info associated with the key.
19+
class BitvectorSuffix : public Bitvector {
20+
public:
21+
BitvectorSuffix() : type_(kNone), suffix_len_(0) {};
22+
23+
BitvectorSuffix(const SuffixType type, const position_t suffix_len,
24+
const std::vector<std::vector<word_t> >& bitvector_per_level,
25+
const std::vector<position_t>& num_bits_per_level,
26+
const level_t start_level = 0,
27+
level_t end_level = 0/* non-inclusive */)
28+
: Bitvector(bitvector_per_level, num_bits_per_level, start_level, end_level) {
29+
assert(suffix_len <= kWordSize);
30+
type_ = type;
31+
suffix_len_ = suffix_len;
32+
}
33+
34+
static word_t constructSuffix(const std::string& key, const level_t level, const position_t len);
35+
36+
SuffixType getType() const {
37+
return type_;
38+
}
39+
40+
position_t getSuffixLen() const {
41+
return suffix_len_;
42+
}
43+
44+
position_t size() const {
45+
position_t bitvector_mem = (num_bits_ / kWordSize) * (kWordSize / 8);
46+
if (num_bits_ % kWordSize == 0)
47+
bitvector_mem += (kWordSize / 8);
48+
return (sizeof(BitvectorSuffix) + bitvector_mem);
49+
}
50+
51+
word_t read(const position_t idx) const;
52+
bool checkEquality(const position_t idx, const std::string& key, const level_t level) const;
53+
54+
// Compare stored suffix to querying suffix.
55+
// kReal suffix type only.
56+
int compare(const position_t idx, const std::string& key, const level_t level) const;
57+
58+
private:
59+
static inline word_t constructHashSuffix(const std::string& key, const position_t len);
60+
static inline word_t constructRealSuffix(const std::string& key, const level_t level, const position_t len);
61+
62+
private:
63+
SuffixType type_;
64+
position_t suffix_len_; // in bits
65+
};
66+
67+
static word_t BitvectorSuffix::constructSuffix(const std::string& key, const level_t level, const position_t len) {
68+
switch (type_) {
69+
case kHash:
70+
return constructHashSuffix(key, len);
71+
case kReal:
72+
return constructRealSuffix(key, level, len);
73+
default:
74+
return 0;
75+
}
76+
}
77+
78+
word_t BitvectorSuffix::read(const position_t idx) const {
79+
assert(idx * suffix_len_ < num_bits_);
80+
position_t bit_pos = idx * suffix_len_;
81+
position_t word_id = bit_pos / kWordSize;
82+
position_t offset = bit_pos & (kWordSize - 1);
83+
return (bits_[word_id] << offset) >> (kWordSize - suffix_len_);
84+
}
85+
86+
bool BitvectorSuffix::checkEquality(const position_t idx, const std::string& key, const level_t level) const {
87+
if (type_ == kNone) return true;
88+
assert(idx * suffix_len_ < num_bits_);
89+
word_t stored_suffix = read(idx);
90+
// if no suffix info for the stored key
91+
if (type_ == kReal && stored_suffix == 0) return true;
92+
// if the querying key is shorter than the stored key
93+
if (type_ == kReal && ((key.length() - level) * 8) < suffix_len_) return false;
94+
word_t querying_suffix = constructSuffix(key, level, suffix_len_);
95+
if (stored_suffix == querying_suffix) return true;
96+
return false;
97+
}
98+
99+
int BitvectorSuffix::compare(const position_t pos, const std::string& key, const level_t level) const {
100+
assert(type_ == kReal);
101+
assert(idx * suffix_len_ < num_bits_);
102+
word_t stored_suffix = read(idx);
103+
if (stored_suffix == 0) return -1;
104+
word_t querying_suffix = constructSuffixFromKey(key, level);
105+
if (stored_suffix < querying_suffix) return -1;
106+
else if (stored_suffix == querying_suffix) return 0;
107+
else return 1;
108+
}
109+
110+
static inline word_t BitvectorSuffix::constructHashSuffix(const std::string& key, const position_t len) {
111+
word_t suffix = suffixHash(key);
112+
clearMSBits(suffix, len);
113+
return suffix;
114+
}
115+
116+
static inline word_t BitvectorSuffix::constructRealSuffix(const std::string& key, const level_t level, const position_t len) {
117+
word_t suffix = 0;
118+
level_t key_len = (level_t)key.length();
119+
position_t num_complete_bytes = suffix_len_ / 8;
120+
if (num_complete_bytes > 0) {
121+
if (level < key_len)
122+
suffix += (word_t)key[level];
123+
for (position_t i = 1; i < num_complete_bytes; i++) {
124+
suffix <<= 8;
125+
if ((level + i) < key_len)
126+
suffix += (word_t)key[level + i];
127+
}
128+
}
129+
position_t offset = suffix_len_ % 8;
130+
if (offset > 0) {
131+
suffix << offset;
132+
word_t remaining_bits = 0;
133+
if ((level + num_complete_bytes) < key_len)
134+
remaining_bits = (word_t)key[level + num_complete_bytes];
135+
remaining_bits >>= (8 - offset);
136+
suffix += remaining_bits;
137+
}
138+
return suffix;
139+
}
140+
141+
} // namespace surf
142+
143+
#endif // SUFFIXVECTOR_H_

0 commit comments

Comments
 (0)