|
15 | 15 | // specific language governing permissions and limitations
|
16 | 16 | // under the License.
|
17 | 17 |
|
18 |
| -use arrow::array::BooleanBufferBuilder; |
| 18 | +use arrow::array::NullBufferBuilder; |
19 | 19 | use arrow::buffer::NullBuffer;
|
20 | 20 |
|
21 | 21 | /// Builder for an (optional) null mask
|
22 | 22 | ///
|
23 | 23 | /// Optimized for avoid creating the bitmask when all values are non-null
|
24 | 24 | #[derive(Debug)]
|
25 |
| -pub(crate) enum MaybeNullBufferBuilder { |
26 |
| - /// seen `row_count` rows but no nulls yet |
27 |
| - NoNulls { row_count: usize }, |
28 |
| - /// have at least one null value |
29 |
| - /// |
| 25 | +pub(crate) struct MaybeNullBufferBuilder { |
30 | 26 | /// Note this is an Arrow *VALIDITY* buffer (so it is false for nulls, true
|
31 | 27 | /// for non-nulls)
|
32 |
| - Nulls(BooleanBufferBuilder), |
| 28 | + nulls: NullBufferBuilder, |
33 | 29 | }
|
34 | 30 |
|
35 | 31 | impl MaybeNullBufferBuilder {
|
36 | 32 | /// Create a new builder
|
37 | 33 | pub fn new() -> Self {
|
38 |
| - Self::NoNulls { row_count: 0 } |
| 34 | + Self { |
| 35 | + nulls: NullBufferBuilder::new(0), |
| 36 | + } |
39 | 37 | }
|
40 | 38 |
|
41 | 39 | /// Return true if the row at index `row` is null
|
42 | 40 | pub fn is_null(&self, row: usize) -> bool {
|
43 |
| - match self { |
44 |
| - Self::NoNulls { .. } => false, |
| 41 | + match self.nulls.as_slice() { |
45 | 42 | // validity mask means a unset bit is NULL
|
46 |
| - Self::Nulls(builder) => !builder.get_bit(row), |
| 43 | + Some(_) => !self.nulls.is_valid(row), |
| 44 | + None => false, |
47 | 45 | }
|
48 | 46 | }
|
49 | 47 |
|
50 | 48 | /// Set the nullness of the next row to `is_null`
|
51 | 49 | ///
|
52 |
| - /// num_values is the current length of the rows being tracked |
53 |
| - /// |
54 | 50 | /// If `value` is true, the row is null.
|
55 | 51 | /// If `value` is false, the row is non null
|
56 | 52 | pub fn append(&mut self, is_null: bool) {
|
57 |
| - match self { |
58 |
| - Self::NoNulls { row_count } if is_null => { |
59 |
| - // have seen no nulls so far, this is the first null, |
60 |
| - // need to create the nulls buffer for all currently valid values |
61 |
| - // alloc 2x the need given we push a new but immediately |
62 |
| - let mut nulls = BooleanBufferBuilder::new(*row_count * 2); |
63 |
| - nulls.append_n(*row_count, true); |
64 |
| - nulls.append(false); |
65 |
| - *self = Self::Nulls(nulls); |
66 |
| - } |
67 |
| - Self::NoNulls { row_count } => { |
68 |
| - *row_count += 1; |
69 |
| - } |
70 |
| - Self::Nulls(builder) => builder.append(!is_null), |
71 |
| - } |
| 53 | + self.nulls.append(!is_null) |
72 | 54 | }
|
73 | 55 |
|
74 | 56 | pub fn append_n(&mut self, n: usize, is_null: bool) {
|
75 |
| - match self { |
76 |
| - Self::NoNulls { row_count } if is_null => { |
77 |
| - // have seen no nulls so far, this is the first null, |
78 |
| - // need to create the nulls buffer for all currently valid values |
79 |
| - // alloc 2x the need given we push a new but immediately |
80 |
| - let mut nulls = BooleanBufferBuilder::new(*row_count * 2); |
81 |
| - nulls.append_n(*row_count, true); |
82 |
| - nulls.append_n(n, false); |
83 |
| - *self = Self::Nulls(nulls); |
84 |
| - } |
85 |
| - Self::NoNulls { row_count } => { |
86 |
| - *row_count += n; |
87 |
| - } |
88 |
| - Self::Nulls(builder) => builder.append_n(n, !is_null), |
| 57 | + if is_null { |
| 58 | + self.nulls.append_n_nulls(n); |
| 59 | + } else { |
| 60 | + self.nulls.append_n_non_nulls(n); |
89 | 61 | }
|
90 | 62 | }
|
91 | 63 |
|
92 | 64 | /// return the number of heap allocated bytes used by this structure to store boolean values
|
93 | 65 | pub fn allocated_size(&self) -> usize {
|
94 |
| - match self { |
95 |
| - Self::NoNulls { .. } => 0, |
96 |
| - // BooleanBufferBuilder builder::capacity returns capacity in bits (not bytes) |
97 |
| - Self::Nulls(builder) => builder.capacity() / 8, |
98 |
| - } |
| 66 | + // NullBufferBuilder builder::allocated_size returns capacity in bits |
| 67 | + self.nulls.allocated_size() / 8 |
99 | 68 | }
|
100 | 69 |
|
101 | 70 | /// Return a NullBuffer representing the accumulated nulls so far
|
102 |
| - pub fn build(self) -> Option<NullBuffer> { |
103 |
| - match self { |
104 |
| - Self::NoNulls { .. } => None, |
105 |
| - Self::Nulls(mut builder) => Some(NullBuffer::from(builder.finish())), |
106 |
| - } |
| 71 | + pub fn build(mut self) -> Option<NullBuffer> { |
| 72 | + self.nulls.finish() |
107 | 73 | }
|
108 | 74 |
|
109 | 75 | /// Returns a NullBuffer representing the first `n` rows accumulated so far
|
110 | 76 | /// shifting any remaining down by `n`
|
111 | 77 | pub fn take_n(&mut self, n: usize) -> Option<NullBuffer> {
|
112 |
| - match self { |
113 |
| - Self::NoNulls { row_count } => { |
114 |
| - *row_count -= n; |
115 |
| - None |
116 |
| - } |
117 |
| - Self::Nulls(builder) => { |
118 |
| - // Copy over the values at n..len-1 values to the start of a |
119 |
| - // new builder and leave it in self |
120 |
| - // |
121 |
| - // TODO: it would be great to use something like `set_bits` from arrow here. |
122 |
| - let mut new_builder = BooleanBufferBuilder::new(builder.len()); |
123 |
| - for i in n..builder.len() { |
124 |
| - new_builder.append(builder.get_bit(i)); |
125 |
| - } |
126 |
| - std::mem::swap(&mut new_builder, builder); |
127 |
| - |
128 |
| - // take only first n values from the original builder |
129 |
| - new_builder.truncate(n); |
130 |
| - Some(NullBuffer::from(new_builder.finish())) |
131 |
| - } |
| 78 | + // Copy over the values at n..len-1 values to the start of a |
| 79 | + // new builder and leave it in self |
| 80 | + // |
| 81 | + // TODO: it would be great to use something like `set_bits` from arrow here. |
| 82 | + let mut new_builder = NullBufferBuilder::new(self.nulls.len()); |
| 83 | + for i in n..self.nulls.len() { |
| 84 | + new_builder.append(self.nulls.is_valid(i)); |
132 | 85 | }
|
| 86 | + std::mem::swap(&mut new_builder, &mut self.nulls); |
| 87 | + |
| 88 | + // take only first n values from the original builder |
| 89 | + new_builder.truncate(n); |
| 90 | + new_builder.finish() |
133 | 91 | }
|
134 | 92 | }
|
0 commit comments