-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathebr.h
More file actions
323 lines (259 loc) · 9.55 KB
/
ebr.h
File metadata and controls
323 lines (259 loc) · 9.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
////////////////////////////////
// EBR - Epoch-based reclamation
////////////////////////////////
#ifndef EBR_H
#define EBR_H
#include <threads.h>
#include <stdint.h>
#include <stddef.h>
#include <stdatomic.h>
// Used by both data structures for tracking frozen values
#define EBR_PRIME_BIT (1ull << 63ull)
// Virtual memory allocation (since the tables are generally nicely page-size friendly)
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#define EBR_VIRTUAL_ALLOC(size) VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE)
#define EBR_VIRTUAL_FREE(ptr, size) VirtualFree(ptr, size, MEM_RELEASE)
#else
#include <sys/mman.h>
#define EBR_VIRTUAL_ALLOC(size) mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)
#define EBR_VIRTUAL_FREE(ptr, size) munmap(ptr, size)
#endif
// traditional heap ops
#ifndef EBR_REALLOC
#define EBR_REALLOC(ptr, size) realloc(ptr, size)
#endif // EBR_REALLOC
#define EBR_DEBOOGING 0
#if EBR_DEBOOGING
#define EBR__BEGIN(name) spall_auto_buffer_begin(name, sizeof(name) - 1, NULL, 0)
#define EBR__END() spall_auto_buffer_end()
#else
#define EBR__BEGIN(name)
#define EBR__END()
#endif
void ebr_init(void);
void ebr_deinit(void);
// Annotates the critical sections in the mutators
void ebr_enter_cs(void);
void ebr_exit_cs(void);
void ebr_free(void* ptr, size_t size);
#endif // EBR_H
#ifdef EBR_IMPL
#undef EBR_IMPL
// for the time in the ebr entry
#define EBR_PINNED_BIT (1ull << 63ull)
#ifdef _WIN32
#include <tlhelp32.h>
#else
#include <pthread.h>
#endif
typedef struct EBR_Entry EBR_Entry;
struct EBR_Entry {
_Atomic(EBR_Entry*) next;
_Atomic(uint64_t) time;
// OS-specific thread handle
#ifdef _WIN32
uint32_t os_handle;
#endif
bool gc_mark;
// keep on a separate cacheline to avoid false sharing
_Alignas(64) uint64_t checkpoint_t;
};
typedef struct EBR_FreeNode EBR_FreeNode;
struct EBR_FreeNode {
EBR_FreeNode* next;
// space to reclaim
void* ptr;
size_t size;
};
static _Thread_local bool ebr_thread_init;
static _Thread_local EBR_Entry* ebr_thread_entry;
// concurrent free-list
static _Atomic(EBR_FreeNode*) ebr_free_list;
static _Atomic(EBR_Entry*) ebr_list;
static thrd_t ebr_thread;
static _Atomic bool ebr_running;
static mtx_t ebr_mutex;
static cnd_t ebr_exit_signal;
void ebr_deinit(void) {
if (ebr_running) {
ebr_running = false;
cnd_broadcast(&ebr_exit_signal);
int res;
thrd_join(ebr_thread, &res);
}
}
static int ebr_thread_fn(void* arg) {
EBR_FreeNode* last_free_list = NULL;
#if EBR_DEBOOGING
spall_auto_thread_init(37373737, SPALL_DEFAULT_BUFFER_SIZE);
#endif
ebr_running = true;
atexit(ebr_deinit);
while (ebr_running) {
EBR__BEGIN("cycle");
// clear the free list, then we wait for the checkpoint before
// freeing the memory.
EBR_FreeNode* free_list = atomic_exchange(&ebr_free_list, NULL);
// must be loaded after, we need "at least" all the mutator
// threads visible who could've written to free_list, we can scan more.
EBR_Entry* thread_list = atomic_load(&ebr_list);
EBR__BEGIN("Checkpoint");
// wait for the critical sections to advance, once
// that happens we can choose to free things.
for (EBR_Entry* list = thread_list; list; list = list->next) {
list->checkpoint_t = list->time;
}
for (EBR_Entry* list = thread_list; list; list = list->next) {
uint64_t before_t = list->checkpoint_t;
if (before_t & EBR_PINNED_BIT) {
uint64_t now_t, tries = 0;
do {
// once we're at this a bunch of times, we start to yield
if (tries > 30) {
thrd_yield();
}
// idk, maybe this should be a better spinlock
now_t = list->time;
tries++;
} while (before_t == now_t);
list->checkpoint_t = now_t;
}
list->gc_mark = false;
}
EBR__END();
EBR__BEGIN("Reclaim memory");
if (last_free_list) {
EBR_FreeNode* list = last_free_list;
while (list) {
EBR_FreeNode* next = list->next;
(void) EBR_REALLOC(list, 0);
list = next;
}
last_free_list = NULL;
}
// empty the free list, it's possible that the mutators are still watching it
// so we can't free it until the next iteration.
for (; free_list; free_list = free_list->next) {
EBR_VIRTUAL_FREE(free_list->ptr, free_list->size);
// printf("FREE %p %zu\n", free_list->ptr, free_list->size);
}
last_free_list = free_list;
EBR__END();
EBR__BEGIN("Thread GC");
// mark the dead threads and then remove them from the list
#ifdef _WIN32
uint32_t pid = GetCurrentProcessId();
HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, pid);
// mark phase
THREADENTRY32 te32 = { .dwSize = sizeof(THREADENTRY32) };
EBR__BEGIN("Thread32First");
if (Thread32First(snapshot, &te32)) {
do {
EBR__END();
if (!ebr_running) {
break;
}
if (te32.th32OwnerProcessID == pid) {
// probably slow, don't care, in practice there's not enough threads to worry about it
for (EBR_Entry* list = thread_list; list; list = list->next) {
if (list->os_handle == te32.th32ThreadID) {
list->gc_mark = true;
break;
}
}
}
EBR__BEGIN("Thread32Next");
} while (Thread32Next(snapshot, &te32));
EBR__END();
}
CloseHandle(snapshot);
// once we've advanced, we can acknowledge any thread deaths
EBR_Entry* list = thread_list;
EBR_Entry* prev = NULL;
while (list) {
EBR_Entry* next = list->next;
// we can't free the first entry in the list without causing issues so we just
// mark it as dead for now, next cycle we'll try again
if (!list->gc_mark && prev != NULL) {
// printf("THREAD %u DIED!\n", list->os_handle);
// object was marked as dead, let's remove the entry
atomic_store_explicit(&prev->next, next, memory_order_release);
// we can free this immediately, it begin reclaimed early is actually ok.
// if reclaimed early AND it's at the start of the list then things work
// out as if the original never died. since we don't read the ebr_list root
// node at all (just the pointer) then we wouldn't crash from holding this
// potentially invalid ptr.
EBR_REALLOC(list, 0);
} else {
prev = list;
}
list = next;
}
#endif
EBR__END();
EBR__END();
struct timespec w_time;
if (clock_gettime(CLOCK_REALTIME, &w_time) == 0) {
w_time.tv_nsec += 500000000;
if (w_time.tv_nsec >= 1000000000) {
w_time.tv_nsec -= 1000000000;
w_time.tv_sec += 1;
}
// we're in no rush to free to might as well yield some time
mtx_lock(&ebr_mutex);
cnd_timedwait(&ebr_exit_signal, &ebr_mutex, &w_time);
mtx_unlock(&ebr_mutex);
}
}
#if EBR_DEBOOGING
spall_auto_thread_quit();
#endif
return 0;
}
static void ebr_once_init(void) {
mtx_init(&ebr_mutex, mtx_plain);
cnd_init(&ebr_exit_signal);
thrd_create(&ebr_thread, ebr_thread_fn, NULL);
}
static once_flag ebr_init_flag = ONCE_FLAG_INIT;
void ebr_init(void) {
call_once(&ebr_init_flag, ebr_once_init);
}
void ebr_free(void* ptr, size_t size) {
EBR_FreeNode* node = EBR_REALLOC(NULL, sizeof(EBR_FreeNode));
node->ptr = ptr;
node->size = size;
EBR_FreeNode* list = ebr_free_list;
do {
node->next = list;
} while (!atomic_compare_exchange_strong(&ebr_free_list, &list, node));
// printf("FREE @ %p %#zx %p\n", ebr_thread_entry, ebr_thread_entry->time, ptr);
}
void ebr_enter_cs(void) {
if (ebr_thread_entry == NULL) {
EBR_Entry* new_node = EBR_REALLOC(NULL, sizeof(EBR_Entry));
new_node->time = 0;
#ifdef _WIN32
new_node->os_handle = GetCurrentThreadId();
#endif
new_node->checkpoint_t = 0;
ebr_thread_entry = new_node;
// add to ebr list, we never free this because i don't care
// TODO(NeGate): i do care, this is a nightmare when threads die figure it out
EBR_Entry* old = atomic_load_explicit(&ebr_list, memory_order_relaxed);
do {
ebr_thread_entry->next = old;
} while (!atomic_compare_exchange_strong(&ebr_list, &old, new_node));
}
// flips the top bit on
uint64_t t = atomic_load_explicit(&ebr_thread_entry->time, memory_order_relaxed);
atomic_store_explicit(&ebr_thread_entry->time, t + EBR_PINNED_BIT, memory_order_release);
}
// flips the top bit off AND increments time by one
void ebr_exit_cs(void) {
uint64_t t = atomic_load_explicit(&ebr_thread_entry->time, memory_order_relaxed);
atomic_store_explicit(&ebr_thread_entry->time, t + EBR_PINNED_BIT + 1, memory_order_release);
}
#endif // EBR_IMPL