Skip to content

Commit fc1835d

Browse files
Chen-Yuan-LaiIan Laialamb
authored
refactor: switch BooleanBufferBuilder to NullBufferBuilder in MaybeNullBufferBuilder (#14504)
* refactor: switch BooleanBufferBuilder to NullBufferBuilder in MaybeNullBufferBuilder * fix: cargo test failed * fix: typo --------- Co-authored-by: Ian Lai <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent c0d4ae7 commit fc1835d

File tree

1 file changed

+30
-72
lines changed

1 file changed

+30
-72
lines changed

datafusion/physical-plan/src/aggregates/group_values/null_builder.rs

Lines changed: 30 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -15,120 +15,78 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::BooleanBufferBuilder;
18+
use arrow::array::NullBufferBuilder;
1919
use arrow::buffer::NullBuffer;
2020

2121
/// Builder for an (optional) null mask
2222
///
2323
/// Optimized for avoid creating the bitmask when all values are non-null
2424
#[derive(Debug)]
25-
pub(crate) enum MaybeNullBufferBuilder {
26-
/// seen `row_count` rows but no nulls yet
27-
NoNulls { row_count: usize },
28-
/// have at least one null value
29-
///
25+
pub(crate) struct MaybeNullBufferBuilder {
3026
/// Note this is an Arrow *VALIDITY* buffer (so it is false for nulls, true
3127
/// for non-nulls)
32-
Nulls(BooleanBufferBuilder),
28+
nulls: NullBufferBuilder,
3329
}
3430

3531
impl MaybeNullBufferBuilder {
3632
/// Create a new builder
3733
pub fn new() -> Self {
38-
Self::NoNulls { row_count: 0 }
34+
Self {
35+
nulls: NullBufferBuilder::new(0),
36+
}
3937
}
4038

4139
/// Return true if the row at index `row` is null
4240
pub fn is_null(&self, row: usize) -> bool {
43-
match self {
44-
Self::NoNulls { .. } => false,
41+
match self.nulls.as_slice() {
4542
// validity mask means a unset bit is NULL
46-
Self::Nulls(builder) => !builder.get_bit(row),
43+
Some(_) => !self.nulls.is_valid(row),
44+
None => false,
4745
}
4846
}
4947

5048
/// Set the nullness of the next row to `is_null`
5149
///
52-
/// num_values is the current length of the rows being tracked
53-
///
5450
/// If `value` is true, the row is null.
5551
/// If `value` is false, the row is non null
5652
pub fn append(&mut self, is_null: bool) {
57-
match self {
58-
Self::NoNulls { row_count } if is_null => {
59-
// have seen no nulls so far, this is the first null,
60-
// need to create the nulls buffer for all currently valid values
61-
// alloc 2x the need given we push a new but immediately
62-
let mut nulls = BooleanBufferBuilder::new(*row_count * 2);
63-
nulls.append_n(*row_count, true);
64-
nulls.append(false);
65-
*self = Self::Nulls(nulls);
66-
}
67-
Self::NoNulls { row_count } => {
68-
*row_count += 1;
69-
}
70-
Self::Nulls(builder) => builder.append(!is_null),
71-
}
53+
self.nulls.append(!is_null)
7254
}
7355

7456
pub fn append_n(&mut self, n: usize, is_null: bool) {
75-
match self {
76-
Self::NoNulls { row_count } if is_null => {
77-
// have seen no nulls so far, this is the first null,
78-
// need to create the nulls buffer for all currently valid values
79-
// alloc 2x the need given we push a new but immediately
80-
let mut nulls = BooleanBufferBuilder::new(*row_count * 2);
81-
nulls.append_n(*row_count, true);
82-
nulls.append_n(n, false);
83-
*self = Self::Nulls(nulls);
84-
}
85-
Self::NoNulls { row_count } => {
86-
*row_count += n;
87-
}
88-
Self::Nulls(builder) => builder.append_n(n, !is_null),
57+
if is_null {
58+
self.nulls.append_n_nulls(n);
59+
} else {
60+
self.nulls.append_n_non_nulls(n);
8961
}
9062
}
9163

9264
/// return the number of heap allocated bytes used by this structure to store boolean values
9365
pub fn allocated_size(&self) -> usize {
94-
match self {
95-
Self::NoNulls { .. } => 0,
96-
// BooleanBufferBuilder builder::capacity returns capacity in bits (not bytes)
97-
Self::Nulls(builder) => builder.capacity() / 8,
98-
}
66+
// NullBufferBuilder builder::allocated_size returns capacity in bits
67+
self.nulls.allocated_size() / 8
9968
}
10069

10170
/// Return a NullBuffer representing the accumulated nulls so far
102-
pub fn build(self) -> Option<NullBuffer> {
103-
match self {
104-
Self::NoNulls { .. } => None,
105-
Self::Nulls(mut builder) => Some(NullBuffer::from(builder.finish())),
106-
}
71+
pub fn build(mut self) -> Option<NullBuffer> {
72+
self.nulls.finish()
10773
}
10874

10975
/// Returns a NullBuffer representing the first `n` rows accumulated so far
11076
/// shifting any remaining down by `n`
11177
pub fn take_n(&mut self, n: usize) -> Option<NullBuffer> {
112-
match self {
113-
Self::NoNulls { row_count } => {
114-
*row_count -= n;
115-
None
116-
}
117-
Self::Nulls(builder) => {
118-
// Copy over the values at n..len-1 values to the start of a
119-
// new builder and leave it in self
120-
//
121-
// TODO: it would be great to use something like `set_bits` from arrow here.
122-
let mut new_builder = BooleanBufferBuilder::new(builder.len());
123-
for i in n..builder.len() {
124-
new_builder.append(builder.get_bit(i));
125-
}
126-
std::mem::swap(&mut new_builder, builder);
127-
128-
// take only first n values from the original builder
129-
new_builder.truncate(n);
130-
Some(NullBuffer::from(new_builder.finish()))
131-
}
78+
// Copy over the values at n..len-1 values to the start of a
79+
// new builder and leave it in self
80+
//
81+
// TODO: it would be great to use something like `set_bits` from arrow here.
82+
let mut new_builder = NullBufferBuilder::new(self.nulls.len());
83+
for i in n..self.nulls.len() {
84+
new_builder.append(self.nulls.is_valid(i));
13285
}
86+
std::mem::swap(&mut new_builder, &mut self.nulls);
87+
88+
// take only first n values from the original builder
89+
new_builder.truncate(n);
90+
new_builder.finish()
13391
}
13492
}

0 commit comments

Comments
 (0)