Skip to content

Commit 553bdc1

Browse files
committed
Fix and improve tests and docs.
1 parent ec50156 commit 553bdc1

File tree

13 files changed

+114
-76
lines changed

13 files changed

+114
-76
lines changed

include/roaring/containers/array.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -472,14 +472,13 @@ inline int array_container_index_equalorlarger(const array_container_t *arr,
472472
* @param ac The array container to read from
473473
* @param it Iterator state (index into the array)
474474
* @param buf Boolean buffer to write to
475-
* @param max_value Stop reading when reaching this value. If it is null, read
476-
* the whole container.
475+
* @param max_value Stop reading when it->current_value >= this value.
477476
* @param value_out Output parameter for the next value
478477
* @return true if there are more values to read, false otherwise
479478
*/
480479
bool array_container_iterator_read_into_bool(
481480
const array_container_t *ac, struct roaring_container_iterator_s *it,
482-
bool *buf, const uint16_t *max_value, uint16_t *value_out);
481+
bool *buf, uint32_t max_value, uint16_t *value_out);
483482

484483
/*
485484
* Adds all values in range [min,max] using hint:

include/roaring/containers/bitset.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -511,14 +511,13 @@ int bitset_container_index_equalorlarger(const bitset_container_t *container,
511511
* @param bc The bitset container to read from
512512
* @param it Iterator state (index into the bitset)
513513
* @param buf Boolean buffer to write to
514-
* @param max_value Stop reading when reaching this value. If it is null, read
515-
* the whole container.
514+
* @param max_value Stop reading when it->current_value >= this value.
516515
* @param value_out Output parameter for the next value
517516
* @return true if there are more values to read, false otherwise
518517
*/
519518
bool bitset_container_iterator_read_into_bool(
520519
const bitset_container_t *bc, struct roaring_container_iterator_s *it,
521-
bool *buf, const uint16_t *max_value, uint16_t *value_out);
520+
bool *buf, uint32_t max_value, uint16_t *value_out);
522521

523522
#ifdef __cplusplus
524523
}

include/roaring/containers/containers.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2478,10 +2478,8 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
24782478
uint16_t *value_out);
24792479

24802480
/**
2481-
* Reads entries until the the last entry whose value is strictly smaller than
2482-
* `*max_value` from the container (*max_value is excluded), and sets
2483-
* corresponding positions in `buf` to true. If `max_value` is null, then all
2484-
* entries are read.
2481+
* Iterate all entries within [it->current_value, max_value), and sets
2482+
* corresponding positions in `buf` to true.
24852483
*
24862484
* The `buf` array is filled starting from index 0, which corresponds to the
24872485
* initial iterator position `it`. For subsequent iterator positions `it_new`,
@@ -2494,7 +2492,7 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
24942492
*/
24952493
bool container_iterator_read_into_bool(const container_t *c, uint8_t typecode,
24962494
roaring_container_iterator_t *it,
2497-
bool *buf, const uint16_t *max_value,
2495+
bool *buf, uint32_t max_value,
24982496
uint16_t *value_out);
24992497

25002498
/**

include/roaring/containers/run.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -715,14 +715,13 @@ static inline void run_container_remove_range(run_container_t *run,
715715
* @param rc The run container to read from
716716
* @param it Iterator state (index into the runs array)
717717
* @param buf Boolean buffer to write to
718-
* @param max_value Stop reading when reaching this value. If it is null, read
719-
* the whole container.
718+
* @param max_value Stop reading when it->current_value >= this value.
720719
* @param value_out Output parameter for the current/next value
721720
* @return true if there are more values to read, false otherwise
722721
*/
723722
bool run_container_iterator_read_into_bool(
724723
const run_container_t *rc, struct roaring_container_iterator_s *it,
725-
bool *buf, const uint16_t *max_value, uint16_t *value_out);
724+
bool *buf, uint32_t max_value, uint16_t *value_out);
726725

727726
#ifdef __cplusplus
728727
}

include/roaring/roaring.h

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,16 +1227,30 @@ uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
12271227
uint32_t *buf, uint32_t count);
12281228

12291229
/**
1230-
* Reads until the last value that is strictly smaller than `max_value` and
1231-
* fill the bool array `buf`.
1230+
* Iterate over `it` in range [it->current_value, max_value) and fill bool array
1231+
* `buf` from its beginning.
12321232
*
12331233
* This function satisfies semantics of iteration and can be used together with
12341234
* other iterator functions.
12351235
*
1236-
* Let `it1` be the initial iterator and it has value, then for every iterated
1237-
* `it`, buf[it1.current_value - it.current_value] will be set to true; other
1238-
* positions will remain to be false.
1239-
* - after function returns, iterator is positioned at the next element
1236+
* Let `init_it` be the initial iterator and it has value, then for every
1237+
* iterated `it`, buf[init_it.current_value - it.current_value] will be set to
1238+
* true; other positions will remain to be false. The final `it` will be invalid
1239+
* or point to the first value >= max_value.
1240+
*
1241+
* User should ensure that `buf` has enough space for holding the bool values.
1242+
*
1243+
* Here is an example:
1244+
* final_it(8)
1245+
* init_it(4) max_value(8)
1246+
* │ │
1247+
* ▼ ▼
1248+
* Values: 1 2 3 4 5 6 7 8 9
1249+
* Roaring: x x x x x
1250+
* The result bool array: [1 0 0 1]
1251+
* Size of the bool array: 4 ▲
1252+
* │
1253+
* Beginning of the bool array
12401254
*/
12411255
void roaring_uint32_iterator_read_into_bool(roaring_uint32_iterator_t *it,
12421256
bool *buf, uint32_t max_value);

src/containers/array.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -565,12 +565,11 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base,
565565
CROARING_ALLOW_UNALIGNED
566566
bool array_container_iterator_read_into_bool(const array_container_t *ac,
567567
roaring_container_iterator_t *it,
568-
bool *buf,
569-
const uint16_t *max_value,
568+
bool *buf, uint32_t max_value,
570569
uint16_t *value_out) {
571570
int32_t initial_index = it->index;
572571

573-
if (max_value == NULL) {
572+
if (max_value > UINT16_MAX) {
574573
// TODO: SIMD optimization
575574
while (it->index < ac->cardinality) {
576575
buf[ac->array[it->index] - ac->array[initial_index]] = true;
@@ -579,7 +578,8 @@ bool array_container_iterator_read_into_bool(const array_container_t *ac,
579578
return false;
580579
}
581580

582-
while (it->index < ac->cardinality && ac->array[it->index] < *max_value) {
581+
while (it->index < ac->cardinality &&
582+
(uint32_t)ac->array[it->index] < max_value) {
583583
buf[ac->array[it->index] - ac->array[initial_index]] = true;
584584
it->index++;
585585
}

src/containers/bitset.c

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -248,47 +248,49 @@ bool bitset_container_intersect(const bitset_container_t *src_1,
248248
CROARING_ALLOW_UNALIGNED
249249
bool bitset_container_iterator_read_into_bool(const bitset_container_t *bc,
250250
roaring_container_iterator_t *it,
251-
bool *buf,
252-
const uint16_t *max_value,
251+
bool *buf, uint32_t max_value,
253252
uint16_t *value_out) {
254-
uint32_t max_wordindex = BITSET_CONTAINER_SIZE_IN_WORDS;
255-
// If max_value is not NULL, get the wordindex of the max_value.
256-
if (max_value != NULL) {
257-
max_wordindex = *max_value / 64;
258-
assert(max_wordindex < BITSET_CONTAINER_SIZE_IN_WORDS);
253+
uint32_t max_wordindex = max_value / 64;
254+
if (max_wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) {
255+
max_wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
259256
}
260257
uint32_t wordindex = it->index / 64;
261258
uint64_t word = bc->words[wordindex] & (UINT64_MAX << (it->index % 64));
262259
uint16_t initial_value = it->index;
263-
if (max_wordindex > 0) {
264-
while (wordindex < max_wordindex) {
265-
// TODO: SIMD optimization
266-
while (word != 0) {
267-
*value_out = wordindex * 64 + roaring_trailing_zeroes(word);
268-
buf[*value_out - initial_value] = true;
269-
word = word & (word - 1);
270-
}
271-
wordindex++;
272-
if (wordindex < BITSET_CONTAINER_SIZE_IN_WORDS) {
273-
word = bc->words[wordindex];
274-
}
260+
// Remain the last word to process out of loop for reducing `if` branches
261+
while (wordindex < max_wordindex) {
262+
// TODO: SIMD optimization
263+
while (word != 0) {
264+
it->index = wordindex * 64 + roaring_trailing_zeroes(word);
265+
buf[it->index - initial_value] = true;
266+
word = word & (word - 1);
267+
}
268+
wordindex++;
269+
if (wordindex < BITSET_CONTAINER_SIZE_IN_WORDS) {
270+
word = bc->words[wordindex];
275271
}
276272
}
277-
// All the words are processed.
278-
if (max_value == NULL) return false;
279273
// Process the last word (which is at max_wordindex)
280274
while (word != 0) {
281-
*value_out = wordindex * 64 + roaring_trailing_zeroes(word);
282-
if (*value_out >= *max_value) {
283-
it->index = *value_out;
275+
it->index = wordindex * 64 + roaring_trailing_zeroes(word);
276+
if ((uint32_t)it->index >= max_value) {
277+
*value_out = it->index;
284278
return true;
285279
}
286-
buf[*value_out - initial_value] = true;
280+
buf[it->index - initial_value] = true;
287281
word = word & (word - 1);
288282
}
289-
// If max_value is not NULL, its wordindex must be less than
290-
// BITSET_CONTAINER_SIZE_IN_WORDS. So if reach this line, the bitset must be
291-
// drained.
283+
284+
/// If the bitset is not drained, iterate to the next set bit.
285+
while (word == 0 && (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
286+
wordindex++;
287+
word = bc->words[wordindex];
288+
}
289+
if (word != 0) {
290+
it->index = wordindex * 64 + roaring_trailing_zeroes(word);
291+
*value_out = it->index;
292+
return true;
293+
}
292294
return false;
293295
}
294296

src/containers/containers.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
708708

709709
bool container_iterator_read_into_bool(const container_t *c, uint8_t typecode,
710710
roaring_container_iterator_t *it,
711-
bool *buf, const uint16_t *max_value,
711+
bool *buf, uint32_t max_value,
712712
uint16_t *value_out) {
713713
c = container_unwrap_shared(c, &typecode);
714714
switch (typecode) {

src/containers/run.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,12 +1130,12 @@ int run_container_to_uint32_array(void *vout, const run_container_t *cont,
11301130
CROARING_ALLOW_UNALIGNED
11311131
bool run_container_iterator_read_into_bool(const run_container_t *rc,
11321132
roaring_container_iterator_t *it,
1133-
bool *buf, const uint16_t *max_value,
1133+
bool *buf, uint32_t max_value,
11341134
uint16_t *value_out) {
11351135
uint16_t initial_value = *value_out;
11361136

11371137
// TODO: SIMD optimization
1138-
if (max_value == NULL) {
1138+
if (max_value > UINT16_MAX) {
11391139
while (it->index < rc->n_runs) {
11401140
uint16_t run_start = rc->runs[it->index].value;
11411141
uint16_t run_end = run_start + rc->runs[it->index].length;
@@ -1155,14 +1155,14 @@ bool run_container_iterator_read_into_bool(const run_container_t *rc,
11551155
// Start from current value if we're in the middle of a run
11561156
uint16_t start = (*value_out >= run_start) ? *value_out : run_start;
11571157
// max_value .. [start .. run_end]
1158-
if (*max_value <= start) {
1158+
if (max_value <= start) {
11591159
*value_out = start;
11601160
return true;
11611161
}
11621162
// [start .. max_value .. run_end]
1163-
if (*max_value <= run_end) {
1164-
memset(buf + start - initial_value, true, *max_value - start);
1165-
*value_out = *max_value;
1163+
if (max_value <= run_end) {
1164+
memset(buf + start - initial_value, true, max_value - start);
1165+
*value_out = max_value;
11661166
return true;
11671167
}
11681168
// [start .. run_end] .. max_value

src/roaring.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,12 +1888,12 @@ void roaring_uint32_iterator_read_into_bool(roaring_uint32_iterator_t *it,
18881888
while (it->has_value && it->current_value < max_value) {
18891889
buf += (it->current_value - initial_value);
18901890
uint16_t low16 = (uint16_t)it->current_value;
1891-
uint16_t *max_value_ptr = it->highbits == highbits_of_max_value
1892-
? &lowbits_of_max_value
1893-
: NULL;
1891+
uint32_t max_value_for_container = it->highbits == highbits_of_max_value
1892+
? lowbits_of_max_value
1893+
: UINT16_MAX + 1;
18941894
bool has_value = container_iterator_read_into_bool(
1895-
it->container, it->typecode, &it->container_it, buf, max_value_ptr,
1896-
&low16);
1895+
it->container, it->typecode, &it->container_it, buf,
1896+
max_value_for_container, &low16);
18971897
if (has_value) {
18981898
it->has_value = true;
18991899
it->current_value = it->highbits | low16;

0 commit comments

Comments
 (0)