Skip to content

Commit 92da225

Browse files
authored
Merge pull request #1147 from bratpiorka/rrudnick_dp_bit_chunk
optimize Disjoint Pool chunks
2 parents 2ed7caf + d693070 commit 92da225

10 files changed

+117
-123
lines changed

src/base_alloc/base_alloc_global.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ static void umf_ba_create_global(void) {
7171
}
7272

7373
size_t smallestSize = BASE_ALLOC.ac_sizes[0];
74-
BASE_ALLOC.smallest_ac_size_log2 = log2Utils(smallestSize);
74+
BASE_ALLOC.smallest_ac_size_log2 = utils_msb64(smallestSize);
7575

7676
LOG_DEBUG("UMF base allocator created");
7777
}
@@ -83,8 +83,8 @@ static int size_to_idx(size_t size) {
8383
}
8484

8585
int isPowerOf2 = (0 == (size & (size - 1)));
86-
int index =
87-
(int)(log2Utils(size) + !isPowerOf2 - BASE_ALLOC.smallest_ac_size_log2);
86+
int index = (int)(utils_msb64(size) + !isPowerOf2 -
87+
BASE_ALLOC.smallest_ac_size_log2);
8888

8989
assert(index >= 0);
9090
return index;

src/critnib/critnib.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include "utils_assert.h"
6565
#include "utils_common.h"
6666
#include "utils_concurrency.h"
67+
#include "utils_math.h"
6768

6869
/*
6970
* A node that has been deleted is left untouched for this many delete
@@ -367,7 +368,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) {
367368
}
368369

369370
/* and convert that to an index. */
370-
sh_t sh = utils_mssb_index(at) & (sh_t) ~(SLICE - 1);
371+
sh_t sh = utils_msb64(at) & (sh_t) ~(SLICE - 1);
371372

372373
struct critnib_node *m = alloc_node(c);
373374
if (!m) {

src/pool/pool_disjoint.c

Lines changed: 32 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -75,28 +75,36 @@ static slab_t *create_slab(bucket_t *bucket) {
7575
umf_result_t res = UMF_RESULT_SUCCESS;
7676
umf_memory_provider_handle_t provider = bucket->pool->provider;
7777

78-
slab_t *slab = umf_ba_global_alloc(sizeof(*slab));
78+
size_t num_chunks_total =
79+
utils_max(bucket_slab_min_size(bucket) / bucket->size, 1);
80+
81+
// Calculate the number of 64-bit words needed.
82+
size_t num_words =
83+
(num_chunks_total + CHUNK_BITMAP_SIZE - 1) / CHUNK_BITMAP_SIZE;
84+
85+
slab_t *slab = umf_ba_global_alloc(sizeof(*slab) +
86+
num_words * sizeof(slab->chunks[0]));
7987
if (slab == NULL) {
8088
LOG_ERR("allocation of new slab failed!");
8189
return NULL;
8290
}
8391

8492
slab->num_chunks_allocated = 0;
85-
slab->first_free_chunk_idx = 0;
8693
slab->bucket = bucket;
8794

8895
slab->iter.val = slab;
8996
slab->iter.prev = slab->iter.next = NULL;
9097

91-
slab->num_chunks_total =
92-
utils_max(bucket_slab_min_size(bucket) / bucket->size, 1);
93-
slab->chunks =
94-
umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total);
95-
if (slab->chunks == NULL) {
96-
LOG_ERR("allocation of slab chunks failed!");
97-
goto free_slab;
98+
slab->num_chunks_total = num_chunks_total;
99+
slab->num_words = num_words;
100+
101+
// set all chunks as free
102+
memset(slab->chunks, ~0, num_words * sizeof(slab->chunks[0]));
103+
if (num_chunks_total % CHUNK_BITMAP_SIZE) {
104+
// clear remaining bits
105+
slab->chunks[num_words - 1] =
106+
((1ULL << (num_chunks_total % CHUNK_BITMAP_SIZE)) - 1);
98107
}
99-
memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total);
100108

101109
// if slab_min_size is not a multiple of bucket size, we would have some
102110
// padding at the end of the slab
@@ -108,7 +116,7 @@ static slab_t *create_slab(bucket_t *bucket) {
108116
res = umfMemoryProviderAlloc(provider, slab->slab_size, 0, &slab->mem_ptr);
109117
if (res != UMF_RESULT_SUCCESS) {
110118
LOG_ERR("allocation of slab data failed!");
111-
goto free_slab_chunks;
119+
goto free_slab;
112120
}
113121

114122
// raw allocation is not available for user so mark it as inaccessible
@@ -117,9 +125,6 @@ static slab_t *create_slab(bucket_t *bucket) {
117125
LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)bucket, slab->slab_size);
118126
return slab;
119127

120-
free_slab_chunks:
121-
umf_ba_global_free(slab->chunks);
122-
123128
free_slab:
124129
umf_ba_global_free(slab);
125130
return NULL;
@@ -136,25 +141,21 @@ static void destroy_slab(slab_t *slab) {
136141
LOG_ERR("deallocation of slab data failed!");
137142
}
138143

139-
umf_ba_global_free(slab->chunks);
140144
umf_ba_global_free(slab);
141145
}
142146

143-
// return the index of the first available chunk, SIZE_MAX otherwise
144147
static size_t slab_find_first_available_chunk_idx(const slab_t *slab) {
145-
// use the first free chunk index as a hint for the search
146-
for (bool *chunk = slab->chunks + slab->first_free_chunk_idx;
147-
chunk != slab->chunks + slab->num_chunks_total; chunk++) {
148-
149-
// false means not used
150-
if (*chunk == false) {
151-
size_t idx = chunk - slab->chunks;
152-
LOG_DEBUG("idx: %zu", idx);
153-
return idx;
148+
for (size_t i = 0; i < slab->num_words; i++) {
149+
// NOTE: free chunks are represented as set bits
150+
uint64_t word = slab->chunks[i];
151+
if (word != 0) {
152+
size_t bit_index = utils_lsb64(word);
153+
size_t free_chunk = i * CHUNK_BITMAP_SIZE + bit_index;
154+
return free_chunk;
154155
}
155156
}
156157

157-
LOG_DEBUG("idx: SIZE_MAX");
158+
// No free chunk was found.
158159
return SIZE_MAX;
159160
}
160161

@@ -167,12 +168,9 @@ static void *slab_get_chunk(slab_t *slab) {
167168
(void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size);
168169

169170
// mark chunk as used
170-
slab->chunks[chunk_idx] = true;
171+
slab_set_chunk_bit(slab, chunk_idx, false);
171172
slab->num_chunks_allocated += 1;
172173

173-
// use the found index as the next hint
174-
slab->first_free_chunk_idx = chunk_idx + 1;
175-
176174
return free_chunk;
177175
}
178176

@@ -195,18 +193,9 @@ static void slab_free_chunk(slab_t *slab, void *ptr) {
195193
size_t chunk_idx = ptr_diff / slab->bucket->size;
196194

197195
// Make sure that the chunk was allocated
198-
assert(slab->chunks[chunk_idx] && "double free detected");
199-
slab->chunks[chunk_idx] = false;
196+
assert(slab_read_chunk_bit(slab, chunk_idx) == 0 && "double free detected");
197+
slab_set_chunk_bit(slab, chunk_idx, true);
200198
slab->num_chunks_allocated -= 1;
201-
202-
if (chunk_idx < slab->first_free_chunk_idx) {
203-
slab->first_free_chunk_idx = chunk_idx;
204-
}
205-
206-
LOG_DEBUG("chunk_idx: %zu, num_chunks_allocated: %zu, "
207-
"first_free_chunk_idx: %zu",
208-
chunk_idx, slab->num_chunks_allocated,
209-
slab->first_free_chunk_idx);
210199
}
211200

212201
static bool slab_has_avail(const slab_t *slab) {
@@ -466,7 +455,7 @@ static size_t size_to_idx(disjoint_pool_t *pool, size_t size) {
466455
}
467456

468457
// get the position of the leftmost set bit
469-
size_t position = getLeftmostSetBitPos(size);
458+
size_t position = utils_msb64(size);
470459

471460
bool is_power_of_2 = 0 == (size & (size - 1));
472461
bool larger_than_halfway_between_powers_of_2 =
@@ -622,7 +611,7 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
622611
Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
623612

624613
// Calculate the exponent for min_bucket_size used for finding buckets.
625-
disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1);
614+
disjoint_pool->min_bucket_size_exp = (size_t)utils_msb64(Size1);
626615
disjoint_pool->default_shared_limits =
627616
umfDisjointPoolSharedLimitsCreate(SIZE_MAX);
628617

src/pool/pool_disjoint_internal.h

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "critnib/critnib.h"
1616
#include "utils_concurrency.h"
1717

18+
#define CHUNK_BITMAP_SIZE 64
19+
1820
typedef struct bucket_t bucket_t;
1921
typedef struct slab_t slab_t;
2022
typedef struct slab_list_item_t slab_list_item_t;
@@ -81,23 +83,24 @@ typedef struct slab_t {
8183
void *mem_ptr;
8284
size_t slab_size;
8385

84-
// Represents the current state of each chunk: if the bit is set, the
85-
// chunk is allocated; otherwise, the chunk is free for allocation
86-
bool *chunks;
8786
size_t num_chunks_total;
8887

88+
// Num of 64-bit words needed to store chunk state
89+
size_t num_words;
90+
8991
// Total number of allocated chunks at the moment.
9092
size_t num_chunks_allocated;
9193

9294
// The bucket which the slab belongs to
9395
bucket_t *bucket;
9496

95-
// Hints where to start search for free chunk in a slab
96-
size_t first_free_chunk_idx;
97-
9897
// Store iterator to the corresponding node in avail/unavail list
9998
// to achieve O(1) removal
10099
slab_list_item_t iter;
100+
101+
// Represents the current state of each chunk: if the bit is clear, the
102+
// chunk is allocated; otherwise, the chunk is free for allocation
103+
uint64_t chunks[];
101104
} slab_t;
102105

103106
typedef struct umf_disjoint_pool_shared_limits_t {
@@ -158,4 +161,24 @@ typedef struct disjoint_pool_t {
158161
size_t provider_min_page_size;
159162
} disjoint_pool_t;
160163

164+
static inline void slab_set_chunk_bit(slab_t *slab, size_t index, bool value) {
165+
assert(index < slab->num_chunks_total && "Index out of range");
166+
167+
size_t word_index = index / CHUNK_BITMAP_SIZE;
168+
unsigned bit_index = index % CHUNK_BITMAP_SIZE;
169+
if (value) {
170+
slab->chunks[word_index] |= (1ULL << bit_index);
171+
} else {
172+
slab->chunks[word_index] &= ~(1ULL << bit_index);
173+
}
174+
}
175+
176+
static inline int slab_read_chunk_bit(const slab_t *slab, size_t index) {
177+
assert(index < slab->num_chunks_total && "Index out of range");
178+
179+
size_t word_index = index / CHUNK_BITMAP_SIZE;
180+
unsigned bit_index = index % CHUNK_BITMAP_SIZE;
181+
return (slab->chunks[word_index] >> bit_index) & 1;
182+
}
183+
161184
#endif // UMF_POOL_DISJOINT_INTERNAL_H

src/utils/CMakeLists.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2023-2024 Intel Corporation
1+
# Copyright (C) 2023-2025 Intel Corporation
22
# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
33
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
44

@@ -7,15 +7,14 @@ include(FindThreads)
77

88
set(UMF_UTILS_SOURCES_COMMON utils_common.c utils_log.c utils_load_library.c)
99

10-
set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c
11-
utils_posix_math.c)
10+
set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c)
1211

1312
set(UMF_UTILS_SOURCES_LINUX utils_linux_common.c)
1413

1514
set(UMF_UTILS_SOURCES_MACOSX utils_macosx_common.c)
1615

1716
set(UMF_UTILS_SOURCES_WINDOWS utils_windows_common.c
18-
utils_windows_concurrency.c utils_windows_math.c)
17+
utils_windows_concurrency.c)
1918

2019
if(UMF_USE_VALGRIND)
2120
if(UMF_USE_ASAN

src/utils/utils_concurrency.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,6 @@ void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void));
8989

9090
#if defined(_WIN32)
9191

92-
static inline unsigned char utils_lssb_index(long long value) {
93-
unsigned long ret;
94-
_BitScanForward64(&ret, value);
95-
return (unsigned char)ret;
96-
}
97-
98-
static inline unsigned char utils_mssb_index(long long value) {
99-
unsigned long ret;
100-
_BitScanReverse64(&ret, value);
101-
return (unsigned char)ret;
102-
}
103-
10492
// There is no good way to do atomic_load on windows...
10593
static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) {
10694
// NOTE: Windows cl complains about direct accessing 'ptr' which is next
@@ -166,9 +154,6 @@ static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected,
166154

167155
#else // !defined(_WIN32)
168156

169-
#define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x))
170-
#define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x)))
171-
172157
static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) {
173158
ASSERT_IS_ALIGNED((uintptr_t)ptr, 8);
174159
ASSERT_IS_ALIGNED((uintptr_t)out, 8);

src/utils/utils_math.h

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
*
3-
* Copyright (C) 2023-2024 Intel Corporation
3+
* Copyright (C) 2023-2025 Intel Corporation
44
*
55
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
66
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -11,16 +11,58 @@
1111
#define UMF_MATH_H 1
1212

1313
#include <assert.h>
14+
#include <limits.h>
1415
#include <stddef.h>
16+
#include <stdint.h>
1517

1618
#ifdef __cplusplus
1719
extern "C" {
1820
#endif
1921

20-
size_t getLeftmostSetBitPos(size_t num);
22+
#if defined(_WIN32)
2123

22-
// Logarithm is an index of the most significant non-zero bit.
23-
static inline size_t log2Utils(size_t num) { return getLeftmostSetBitPos(num); }
24+
#include "utils_windows_intrin.h"
25+
26+
#pragma intrinsic(_BitScanReverse64)
27+
#pragma intrinsic(_BitScanForward64)
28+
29+
// Retrieves the position of the leftmost set bit.
30+
// The position of the bit is counted from 0
31+
// e.g. for 01000011110 the position equals 9.
32+
static inline size_t utils_msb64(uint64_t num) {
33+
assert(num != 0 &&
34+
"Finding leftmost set bit when number equals zero is undefined");
35+
unsigned long index = 0;
36+
_BitScanReverse64(&index, num);
37+
return (size_t)index;
38+
}
39+
40+
static inline size_t utils_lsb64(uint64_t num) {
41+
assert(num != 0 &&
42+
"Finding rightmost set bit when number equals zero is undefined");
43+
unsigned long index = 0;
44+
_BitScanForward64(&index, num);
45+
return (size_t)index;
46+
}
47+
48+
#else // !defined(_WIN32)
49+
50+
// Retrieves the position of the leftmost set bit.
51+
// The position of the bit is counted from 0
52+
// e.g. for 01000011110 the position equals 9.
53+
static inline size_t utils_msb64(uint64_t num) {
54+
assert(num != 0 &&
55+
"Finding leftmost set bit when number equals zero is undefined");
56+
return 63 - __builtin_clzll(num);
57+
}
58+
59+
static inline size_t utils_lsb64(uint64_t num) {
60+
assert(num != 0 &&
61+
"Finding rightmost set bit when number equals zero is undefined");
62+
return __builtin_ctzll(num);
63+
}
64+
65+
#endif // !defined(_WIN32)
2466

2567
#ifdef __cplusplus
2668
}

src/utils/utils_posix_math.c

Lines changed: 0 additions & 20 deletions
This file was deleted.

0 commit comments

Comments
 (0)