Skip to content

Commit 19e3e8c

Browse files
authored
Implement Extend for ArrayBuilder (#1841) (#3563)
* Implement Extend for ArrayBuilder (#1841) * Add dictionaries * Add tests
1 parent a1cedb4 commit 19e3e8c

7 files changed

+207
-15
lines changed

arrow-array/src/array/dictionary_array.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -481,18 +481,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<Option<&'a str>> for Dictionary
481481
let it = iter.into_iter();
482482
let (lower, _) = it.size_hint();
483483
let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
484-
it.for_each(|i| {
485-
if let Some(i) = i {
486-
// Note: impl ... for Result<DictionaryArray<T>> fails with
487-
// error[E0117]: only traits defined in the current crate can be implemented for arbitrary types
488-
builder
489-
.append(i)
490-
.expect("Unable to append a value to a dictionary array.");
491-
} else {
492-
builder.append_null();
493-
}
494-
});
495-
484+
builder.extend(it);
496485
builder.finish()
497486
}
498487
}

arrow-array/src/builder/boolean_builder.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,15 @@ impl ArrayBuilder for BooleanBuilder {
211211
}
212212
}
213213

214+
impl Extend<Option<bool>> for BooleanBuilder {
215+
#[inline]
216+
fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
217+
for v in iter {
218+
self.append_option(v)
219+
}
220+
}
221+
}
222+
214223
#[cfg(test)]
215224
mod tests {
216225
use super::*;
@@ -304,4 +313,17 @@ mod tests {
304313
assert_eq!(0, array.null_count());
305314
assert!(array.data().null_buffer().is_none());
306315
}
316+
317+
#[test]
318+
fn test_extend() {
319+
let mut builder = BooleanBuilder::new();
320+
builder.extend([false, false, true, false, false].into_iter().map(Some));
321+
builder.extend([true, true, false].into_iter().map(Some));
322+
let array = builder.finish();
323+
let values = array.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
324+
assert_eq!(
325+
&values,
326+
&[false, false, true, false, false, true, true, false]
327+
)
328+
}
307329
}

arrow-array/src/builder/generic_bytes_builder.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
8888
}
8989

9090
/// Appends a value into the builder.
91+
///
92+
/// # Panics
93+
///
94+
/// Panics if the resulting length of [`Self::values_slice`] would exceed `T::Offset::MAX`
9195
#[inline]
9296
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
9397
self.value_builder.append_slice(value.as_ref().as_ref());
@@ -219,6 +223,15 @@ impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
219223
}
220224
}
221225

226+
impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
227+
#[inline]
228+
fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
229+
for v in iter {
230+
self.append_option(v)
231+
}
232+
}
233+
}
234+
222235
/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
223236
pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
224237

@@ -420,4 +433,14 @@ mod tests {
420433
fn test_large_string_array_builder_finish_cloned() {
421434
_test_generic_string_array_builder_finish_cloned::<i64>()
422435
}
436+
437+
#[test]
438+
fn test_extend() {
439+
let mut builder = GenericStringBuilder::<i32>::new();
440+
builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
441+
builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
442+
let array = builder.finish();
443+
assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
444+
assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
445+
}
423446
}

arrow-array/src/builder/generic_bytes_dictionary_builder.rs

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ where
214214
K: ArrowDictionaryKeyType,
215215
T: ByteArrayType,
216216
{
217-
/// Append a primitive value to the array. Return an existing index
217+
/// Append a value to the array. Return an existing index
218218
/// if already present in the values array or a new index if the
219219
/// value is appended to the values array.
220220
///
@@ -255,12 +255,34 @@ where
255255
Ok(key)
256256
}
257257

258+
/// Infallibly append a value to this builder
259+
///
260+
/// # Panics
261+
///
262+
/// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX`
263+
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
264+
self.append(value).expect("dictionary key overflow");
265+
}
266+
258267
/// Appends a null slot into the builder
259268
#[inline]
260269
pub fn append_null(&mut self) {
261270
self.keys_builder.append_null()
262271
}
263272

273+
/// Append an `Option` value into the builder
274+
///
275+
/// # Panics
276+
///
277+
/// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX`
278+
#[inline]
279+
pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
280+
match value {
281+
None => self.append_null(),
282+
Some(v) => self.append_value(v),
283+
};
284+
}
285+
264286
/// Builds the `DictionaryArray` and reset this builder.
265287
pub fn finish(&mut self) -> DictionaryArray<K> {
266288
self.dedup.clear();
@@ -297,6 +319,17 @@ where
297319
}
298320
}
299321

322+
impl<K: ArrowDictionaryKeyType, T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>>
323+
for GenericByteDictionaryBuilder<K, T>
324+
{
325+
#[inline]
326+
fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
327+
for v in iter {
328+
self.append_option(v)
329+
}
330+
}
331+
}
332+
300333
fn get_bytes<'a, K: ArrowNativeType, T: ByteArrayType>(
301334
values: &'a GenericByteBuilder<T>,
302335
key: &K,
@@ -405,7 +438,7 @@ mod tests {
405438

406439
use crate::array::Array;
407440
use crate::array::Int8Array;
408-
use crate::types::{Int16Type, Int8Type};
441+
use crate::types::{Int16Type, Int32Type, Int8Type, Utf8Type};
409442
use crate::{BinaryArray, StringArray};
410443

411444
fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
@@ -622,4 +655,14 @@ mod tests {
622655
vec![b"abc", b"def"],
623656
);
624657
}
658+
659+
#[test]
660+
fn test_extend() {
661+
let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
662+
builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
663+
builder.extend(["c", "d", "a"].into_iter().map(Some));
664+
let dict = builder.finish();
665+
assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]);
666+
assert_eq!(dict.values().len(), 4);
667+
}
625668
}

arrow-array/src/builder/generic_list_builder.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ where
111111
}
112112

113113
/// Finish the current variable-length list array slot
114+
///
115+
/// # Panics
116+
///
117+
/// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX`
114118
#[inline]
115119
pub fn append(&mut self, is_valid: bool) {
116120
self.offsets_builder
@@ -178,10 +182,32 @@ where
178182
}
179183
}
180184

185+
impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
186+
where
187+
O: OffsetSizeTrait,
188+
B: ArrayBuilder + Extend<E>,
189+
V: IntoIterator<Item = E>,
190+
{
191+
#[inline]
192+
fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
193+
for v in iter {
194+
match v {
195+
Some(elements) => {
196+
self.values_builder.extend(elements);
197+
self.append(true);
198+
}
199+
None => self.append(false),
200+
}
201+
}
202+
}
203+
}
204+
181205
#[cfg(test)]
182206
mod tests {
183207
use super::*;
184208
use crate::builder::{Int32Builder, ListBuilder};
209+
use crate::cast::as_primitive_array;
210+
use crate::types::Int32Type;
185211
use crate::{Array, Int32Array};
186212
use arrow_buffer::Buffer;
187213
use arrow_schema::DataType;
@@ -364,4 +390,25 @@ mod tests {
364390
list_array.values().data().child_data()[0].buffers()[0].clone()
365391
);
366392
}
393+
394+
#[test]
395+
fn test_extend() {
396+
let mut builder = ListBuilder::new(Int32Builder::new());
397+
builder.extend([
398+
Some(vec![Some(1), Some(2), Some(7), None]),
399+
Some(vec![]),
400+
Some(vec![Some(4), Some(5)]),
401+
None,
402+
]);
403+
404+
let array = builder.finish();
405+
assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
406+
assert_eq!(array.null_count(), 1);
407+
assert!(array.is_null(3));
408+
let a_values = array.values();
409+
let elements = as_primitive_array::<Int32Type>(a_values.as_ref());
410+
assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
411+
assert_eq!(elements.null_count(), 1);
412+
assert!(elements.is_null(3));
413+
}
367414
}

arrow-array/src/builder/primitive_builder.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,10 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
238238
}
239239

240240
/// Appends values from a slice of type `T` and a validity boolean slice
241+
///
242+
/// # Panics
243+
///
244+
/// Panics if `values` and `is_valid` have different lengths
241245
#[inline]
242246
pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
243247
assert_eq!(
@@ -328,6 +332,15 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
328332
}
329333
}
330334

335+
impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
336+
#[inline]
337+
fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
338+
for v in iter {
339+
self.append_option(v)
340+
}
341+
}
342+
}
343+
331344
#[cfg(test)]
332345
mod tests {
333346
use super::*;
@@ -578,4 +591,13 @@ mod tests {
578591
fn test_invalid_with_data_type() {
579592
Int32Builder::new().with_data_type(DataType::Int64);
580593
}
594+
595+
#[test]
596+
fn test_extend() {
597+
let mut builder = PrimitiveBuilder::<Int16Type>::new();
598+
builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
599+
builder.extend([2, 4, 6, 2].into_iter().map(Some));
600+
let array = builder.finish();
601+
assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
602+
}
581603
}

arrow-array/src/builder/primitive_dictionary_builder.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,34 @@ where
193193
Ok(key)
194194
}
195195

196+
/// Infallibly append a value to this builder
197+
///
198+
/// # Panics
199+
///
200+
/// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX`
201+
pub fn append_value(&mut self, value: V::Native) {
202+
self.append(value).expect("dictionary key overflow");
203+
}
204+
196205
/// Appends a null slot into the builder
197206
#[inline]
198207
pub fn append_null(&mut self) {
199208
self.keys_builder.append_null()
200209
}
201210

211+
/// Append an `Option` value into the builder
212+
///
213+
/// # Panics
214+
///
215+
/// Panics if the resulting length of the dictionary values array would exceed `T::Native::MAX`
216+
#[inline]
217+
pub fn append_option(&mut self, value: Option<V::Native>) {
218+
match value {
219+
None => self.append_null(),
220+
Some(v) => self.append_value(v),
221+
};
222+
}
223+
202224
/// Builds the `DictionaryArray` and reset this builder.
203225
pub fn finish(&mut self) -> DictionaryArray<K> {
204226
self.map.clear();
@@ -235,14 +257,25 @@ where
235257
}
236258
}
237259

260+
impl<K: ArrowPrimitiveType, P: ArrowPrimitiveType> Extend<Option<P::Native>>
261+
for PrimitiveDictionaryBuilder<K, P>
262+
{
263+
#[inline]
264+
fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
265+
for v in iter {
266+
self.append_option(v)
267+
}
268+
}
269+
}
270+
238271
#[cfg(test)]
239272
mod tests {
240273
use super::*;
241274

242275
use crate::array::Array;
243276
use crate::array::UInt32Array;
244277
use crate::array::UInt8Array;
245-
use crate::types::{UInt32Type, UInt8Type};
278+
use crate::types::{Int32Type, UInt32Type, UInt8Type};
246279

247280
#[test]
248281
fn test_primitive_dictionary_builder() {
@@ -270,6 +303,19 @@ mod tests {
270303
assert_eq!(avs, &[12345678, 22345678]);
271304
}
272305

306+
#[test]
307+
fn test_extend() {
308+
let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
309+
builder.extend([1, 2, 3, 1, 2, 3, 1, 2, 3].into_iter().map(Some));
310+
builder.extend([4, 5, 1, 3, 1].into_iter().map(Some));
311+
let dict = builder.finish();
312+
assert_eq!(
313+
dict.keys().values(),
314+
&[0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 4, 0, 2, 0]
315+
);
316+
assert_eq!(dict.values().len(), 5);
317+
}
318+
273319
#[test]
274320
#[should_panic(expected = "DictionaryKeyOverflowError")]
275321
fn test_primitive_dictionary_overflow() {

0 commit comments

Comments
 (0)