Skip to content

Commit eccd132

Browse files
authored
Some modifications for vineyard (#8)
* Add template specialization to std::string_view. Add inline keyword to get_available_memory. Fix bug of inline failed when call _mm_popcnt_u64. Signed-off-by: vegetableysm <[email protected]>
1 parent 2d75490 commit eccd132

File tree

7 files changed

+210
-24
lines changed

7 files changed

+210
-24
lines changed

grape/graph/hashmap_indexer_impl.h

+60
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ limitations under the License.
1818

1919
#include <vector>
2020

21+
#include "grape/communication/sync_comm.h"
2122
#include "grape/utils/ref_vector.h"
2223
#include "grape/utils/string_view_vector.h"
2324

@@ -43,6 +44,11 @@ inline int8_t log2(size_t value) {
4344
return table[((value - (value >> 1)) * 0x07EDD5E59A4E28C2) >> 58];
4445
}
4546

47+
template <typename T>
48+
size_t vec_dump_bytes(T const& vec) {
49+
return vec.size() * sizeof(vec.front()) + sizeof(typename T::size_type);
50+
}
51+
4652
template <typename T>
4753
struct KeyBuffer {
4854
public:
@@ -96,6 +102,8 @@ struct KeyBuffer {
96102
dumper.dump_vec(inner_);
97103
}
98104

105+
size_t dump_size() { return vec_dump_bytes(inner_); }
106+
99107
private:
100108
std::vector<T, Allocator<T>> inner_;
101109
};
@@ -144,9 +152,61 @@ struct KeyBuffer<nonstd::string_view> {
144152
dumper.dump_vec(inner_.offset_buffer());
145153
}
146154

155+
size_t dump_size() {
156+
return vec_dump_bytes(inner_.content_buffer()) +
157+
vec_dump_bytes(inner_.offset_buffer());
158+
}
159+
160+
private:
161+
StringViewVector inner_;
162+
};
163+
164+
#if __cplusplus >= 201703L
165+
template <>
166+
struct KeyBuffer<std::string_view> {
167+
KeyBuffer() = default;
168+
~KeyBuffer() = default;
169+
170+
std::string_view get(size_t idx) const {
171+
std::string_view view(inner_[idx].data(), inner_[idx].size());
172+
return view;
173+
}
174+
175+
void push_back(const std::string_view& val) {
176+
nonstd::string_view view(val.data(), val.size());
177+
inner_.push_back(view);
178+
}
179+
180+
size_t size() const { return inner_.size(); }
181+
182+
StringViewVector& buffer() { return inner_; }
183+
const StringViewVector& buffer() const { return inner_; }
184+
185+
void swap(KeyBuffer& rhs) { inner_.swap(rhs.inner_); }
186+
187+
void clear() { inner_.clear(); }
188+
189+
template <typename Loader>
190+
void load(Loader& loader) {
191+
loader.load_vec(inner_.content_buffer());
192+
loader.load_vec(inner_.offset_buffer());
193+
}
194+
195+
template <typename Dumper>
196+
void dump(Dumper& dumper) const {
197+
dumper.dump_vec(inner_.content_buffer());
198+
dumper.dump_vec(inner_.offset_buffer());
199+
}
200+
201+
size_t dump_size() {
202+
return vec_dump_bytes(inner_.content_buffer()) +
203+
vec_dump_bytes(inner_.offset_buffer());
204+
}
205+
147206
private:
148207
StringViewVector inner_;
149208
};
209+
#endif
150210

151211
template <typename T>
152212
struct KeyBufferView {

grape/graph/perfect_hash_indexer.h

+29-22
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
#define GRAPE_GRAPH_PERFECT_HASH_INDEXER_H_
1818

1919
#include "grape/graph/hashmap_indexer_impl.h"
20+
#include "grape/util.h"
2021
#include "grape/utils/pthash_utils/single_phf_view.h"
22+
#include "grape/utils/string_view_vector.h"
2123

2224
namespace grape {
2325

@@ -69,6 +71,10 @@ class ImmPHIdxer {
6971
idxer_.init(buffer_.data(), buffer_.size());
7072
}
7173

74+
void Init(const char *buf, size_t size) {
75+
idxer_.init(buf, size);
76+
}
77+
7278
size_t entry_num() const { return idxer_.entry_num(); }
7379

7480
bool empty() const { return idxer_.empty(); }
@@ -119,33 +125,34 @@ class PHIdxerViewBuilder {
119125

120126
void add(KEY_T&& oid) { keys_.push_back(std::move(oid)); }
121127

122-
ImmPHIdxer<KEY_T, INDEX_T> finish() {
123-
mem_dumper dumper;
124-
{
125-
SinglePHFView<murmurhasher>::build(keys_.begin(), keys_.size(), dumper,
126-
1);
127-
mem_loader loader(dumper.buffer().data(), dumper.buffer().size());
128-
SinglePHFView<murmurhasher> phf;
129-
phf.load(loader);
130-
hashmap_indexer_impl::KeyBuffer<KEY_T> key_buffer;
131-
132-
std::vector<KEY_T> ordered_keys(keys_.size());
133-
for (auto& key : keys_) {
134-
size_t idx = phf(key);
135-
ordered_keys[idx] = key;
136-
}
137-
for (auto& key : ordered_keys) {
138-
key_buffer.push_back(key);
139-
}
140-
key_buffer.dump(dumper);
128+
void buildPhf() {
129+
SinglePHFView<murmurhasher>::build(keys_.begin(),
130+
keys_.size(), phf, 1);
131+
std::vector<KEY_T> ordered_keys(keys_.size());
132+
for (auto& key : keys_) {
133+
size_t idx = phf(key);
134+
ordered_keys[idx] = key;
141135
}
142-
ImmPHIdxer<KEY_T, INDEX_T> idxer;
143-
idxer.Init(std::move(dumper.buffer()));
144-
return idxer;
136+
for (auto& key : ordered_keys) {
137+
key_buffer.push_back(key);
138+
}
139+
}
140+
141+
void finish(void *buffer, size_t size, ImmPHIdxer<KEY_T, INDEX_T> &idxer) {
142+
external_mem_dumper dumper(buffer, size);
143+
phf.dump(dumper);
144+
key_buffer.dump(dumper);
145+
idxer.Init(static_cast<const char*>(dumper.buffer()), dumper.size());
146+
}
147+
148+
size_t getSerializeSize() {
149+
return phf.num_bits() / 8 + key_buffer.dump_size();
145150
}
146151

147152
private:
148153
std::vector<KEY_T> keys_;
154+
hashmap_indexer_impl::KeyBuffer<KEY_T> key_buffer;
155+
pthash::single_phf<murmurhasher, pthash::dictionary_dictionary, true> phf;
149156
};
150157

151158
} // namespace grape

grape/types.h

+6
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,12 @@ struct murmurhasher {
156156
static inline hash_type hash(const nonstd::string_view& val, uint64_t seed) {
157157
return pthash::MurmurHash2_64(val.data(), val.size(), seed);
158158
}
159+
160+
#if __cplusplus >= 201703L
161+
static inline hash_type hash(std::string_view const& val, uint64_t seed) {
162+
return pthash::MurmurHash2_64(val.data(), val.size(), seed);
163+
}
164+
#endif
159165
};
160166

161167
} // namespace grape

grape/util.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ inline std::map<std::string, size_t> parse_meminfo() {
190190
return ret;
191191
}
192192

193-
size_t get_available_memory() {
193+
inline size_t get_available_memory() {
194194
auto meminfo = parse_meminfo();
195195
#ifdef USE_HUGEPAGES
196196
return meminfo.at("HugePages_Free") * meminfo.at("Hugepagesize");

grape/utils/pthash_utils/single_phf_view.h

+56
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,46 @@ struct mem_dumper {
5555
std::vector<char> buf_;
5656
};
5757

58+
struct external_mem_dumper {
59+
public:
60+
external_mem_dumper(void* buf, size_t size) : buf_(buf), size_(size) {}
61+
62+
~external_mem_dumper() = default;
63+
64+
template <typename T>
65+
void dump(const T& val) {
66+
static_assert(std::is_pod<T>::value);
67+
const char* ptr = reinterpret_cast<const char*>(&val);
68+
if (pos_ + sizeof(T) > size_) {
69+
return;
70+
}
71+
memcpy(reinterpret_cast<char*>(buf_) + pos_, ptr, sizeof(T));
72+
pos_ += sizeof(T);
73+
}
74+
75+
template <typename T, typename ALLOC_T>
76+
void dump_vec(const std::vector<T, ALLOC_T>& vec) {
77+
static_assert(std::is_pod<T>::value);
78+
size_t n = vec.size();
79+
if (pos_ + sizeof(T) * n + sizeof(size_t) > size_) {
80+
return;
81+
}
82+
dump(n);
83+
const char* ptr = reinterpret_cast<const char*>(vec.data());
84+
memcpy(reinterpret_cast<char*>(buf_) + pos_, ptr, sizeof(T) * n);
85+
pos_ += sizeof(T) * n;
86+
}
87+
88+
const void* buffer() const { return buf_; }
89+
90+
size_t size() const { return size_; }
91+
92+
private:
93+
void* buf_ = nullptr;
94+
size_t pos_ = 0;
95+
size_t size_ = 0;
96+
};
97+
5898
struct mem_loader {
5999
public:
60100
mem_loader(const char* buf, size_t size)
@@ -147,6 +187,22 @@ struct SinglePHFView {
147187
phf.dump(dumper);
148188
}
149189

190+
template <typename Iterator>
191+
static void build(
192+
Iterator keys, uint64_t n,
193+
pthash::single_phf<murmurhasher, pthash::dictionary_dictionary, true>&
194+
phf,
195+
int thread_num) {
196+
pthash::build_configuration config;
197+
config.c = 7.0;
198+
config.alpha = 0.94;
199+
config.num_threads = thread_num;
200+
config.minimal_output = true;
201+
config.verbose_output = false;
202+
203+
phf.build_in_internal_memory(keys, n, config);
204+
}
205+
150206
private:
151207
uint64_t m_seed;
152208
uint64_t m_num_keys;

grape/utils/string_view_vector.h

+44
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,50 @@ struct ref_vector<nonstd::string_view> {
161161
ref_vector<size_t> offsets_;
162162
};
163163

164+
#if __cplusplus >= 201703L
165+
template <>
166+
struct ref_vector<std::string_view> {
167+
ref_vector() {}
168+
~ref_vector() {}
169+
170+
size_t init(const void* buffer, size_t size) {
171+
size_t buffer_size = buffer_.init(buffer, size);
172+
const void* ptr = reinterpret_cast<const char*>(buffer) + buffer_size;
173+
size_t offset_size = offsets_.init(ptr, size - buffer_size);
174+
return buffer_size + offset_size;
175+
}
176+
177+
ref_vector<char>& buffer() { return buffer_; }
178+
ref_vector<size_t>& offsets() { return offsets_; }
179+
180+
const ref_vector<char>& buffer() const { return buffer_; }
181+
const ref_vector<size_t>& offsets() const { return offsets_; }
182+
183+
size_t size() const {
184+
if (offsets_.size() == 0) {
185+
return 0;
186+
}
187+
return offsets_.size() - 1;
188+
}
189+
190+
std::string_view get(size_t idx) const {
191+
size_t from = offsets_.get(idx);
192+
size_t to = offsets_.get(idx + 1);
193+
return std::string_view(buffer_.data() + from, to - from);
194+
}
195+
196+
template <typename Loader>
197+
void load(Loader& loader) {
198+
loader.load_ref_vec(buffer_);
199+
loader.load_ref_vec(offsets_);
200+
}
201+
202+
private:
203+
ref_vector<char> buffer_;
204+
ref_vector<size_t> offsets_;
205+
};
206+
#endif
207+
164208
} // namespace grape
165209

166210
#endif // GRAPE_UTILS_STRING_VIEW_VECTOR_H_

thirdparty/pthash/encoders/util.hpp

+14-1
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,21 @@
2121

2222
#pragma once
2323

24+
#include <cstdint>
25+
#include <cassert>
26+
27+
#if defined(__x86_64__) && __SSE4_2__
2428
#include <immintrin.h>
29+
#endif
2530

2631
namespace pthash::util {
2732

33+
#if defined(__x86_64__) && __SSE4_2__
2834
template <typename T>
2935
inline void prefetch(T const* ptr) {
3036
_mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0);
3137
}
38+
#endif
3239

3340
inline uint8_t msb(uint64_t x) {
3441
assert(x);
@@ -66,7 +73,13 @@ inline uint8_t lsb(uint64_t x) {
6673
}
6774

6875
inline uint64_t popcount(uint64_t x) {
69-
return static_cast<uint64_t>(_mm_popcnt_u64(x));
76+
#ifdef __SSE4_2__
77+
return static_cast<uint64_t>(_mm_popcnt_u64(x));
78+
#elif __cplusplus >= 202002L
79+
return std::popcount(x);
80+
#else
81+
return static_cast<uint64_t>(__builtin_popcountll(x));
82+
#endif
7083
}
7184

7285
inline uint64_t select64_pdep_tzcnt(uint64_t x, const uint64_t k) {

0 commit comments

Comments
 (0)