Skip to content

Commit 0c3732f

Browse files
a10yalamb
andauthored
Optimize take kernel for BinaryViewArray and StringViewArray (#6168)
* improve speed of view take kernel * ArrayData -> new_unchecked * Update arrow-select/src/take.rs Co-authored-by: Andrew Lamb <[email protected]> --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent ede5a64 commit 0c3732f

File tree

3 files changed

+68
-5
lines changed

3 files changed

+68
-5
lines changed

arrow-select/src/take.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -487,11 +487,10 @@ fn take_byte_view<T: ByteViewType, IndexType: ArrowPrimitiveType>(
487487
) -> Result<GenericByteViewArray<T>, ArrowError> {
488488
let new_views = take_native(array.views(), indices);
489489
let new_nulls = take_nulls(array.nulls(), indices);
490-
Ok(GenericByteViewArray::new(
491-
new_views,
492-
array.data_buffers().to_vec(),
493-
new_nulls,
494-
))
490+
// Safety: array.views was valid, and take_native copies only valid values, and verifies bounds
491+
Ok(unsafe {
492+
GenericByteViewArray::new_unchecked(new_views, array.data_buffers().to_vec(), new_nulls)
493+
})
495494
}
496495

497496
/// `take` implementation for list arrays

arrow/benches/take_kernels.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,42 @@ fn add_benchmark(c: &mut Criterion) {
149149
b.iter(|| bench_take(&values, &indices))
150150
});
151151

152+
let values = create_string_view_array(512, 0.0);
153+
let indices = create_random_index(512, 0.0);
154+
c.bench_function("take stringview 512", |b| {
155+
b.iter(|| bench_take(&values, &indices))
156+
});
157+
158+
let values = create_string_view_array(1024, 0.0);
159+
let indices = create_random_index(1024, 0.0);
160+
c.bench_function("take stringview 1024", |b| {
161+
b.iter(|| bench_take(&values, &indices))
162+
});
163+
164+
let values = create_string_view_array(512, 0.0);
165+
let indices = create_random_index(512, 0.5);
166+
c.bench_function("take stringview null indices 512", |b| {
167+
b.iter(|| bench_take(&values, &indices))
168+
});
169+
170+
let values = create_string_view_array(1024, 0.0);
171+
let indices = create_random_index(1024, 0.5);
172+
c.bench_function("take stringview null indices 1024", |b| {
173+
b.iter(|| bench_take(&values, &indices))
174+
});
175+
176+
let values = create_string_view_array(1024, 0.5);
177+
let indices = create_random_index(1024, 0.0);
178+
c.bench_function("take stringview null values 1024", |b| {
179+
b.iter(|| bench_take(&values, &indices))
180+
});
181+
182+
let values = create_string_view_array(1024, 0.5);
183+
let indices = create_random_index(1024, 0.5);
184+
c.bench_function("take stringview null values null indices 1024", |b| {
185+
b.iter(|| bench_take(&values, &indices))
186+
});
187+
152188
let values = create_primitive_run_array::<Int32Type, Int32Type>(1024, 512);
153189
let indices = create_random_index(1024, 0.0);
154190
c.bench_function(

arrow/src/util/bench_util.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,34 @@ pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
160160
.collect()
161161
}
162162

163+
/// Creates a random (but fixed-seeded) string view array of a given size and null density.
164+
///
165+
/// See `create_string_array` above for more details.
166+
pub fn create_string_view_array(size: usize, null_density: f32) -> StringViewArray {
167+
create_string_view_array_with_max_len(size, null_density, 400)
168+
}
169+
170+
/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
171+
fn create_string_view_array_with_max_len(
172+
size: usize,
173+
null_density: f32,
174+
max_str_len: usize,
175+
) -> StringViewArray {
176+
let rng = &mut seedable_rng();
177+
(0..size)
178+
.map(|_| {
179+
if rng.gen::<f32>() < null_density {
180+
None
181+
} else {
182+
let str_len = rng.gen_range(0..max_str_len);
183+
let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
184+
let value = String::from_utf8(value).unwrap();
185+
Some(value)
186+
}
187+
})
188+
.collect()
189+
}
190+
163191
/// Creates a random (but fixed-seeded) array of a given size, null density and length
164192
pub fn create_string_view_array_with_len(
165193
size: usize,

0 commit comments

Comments
 (0)