Skip to content

Commit 4f52a25

Browse files
authored
Add divide_opt kernel which produce null values on division by zero error (#2710)
* Add divide_opt kernel * Add fast-path for non-null arrays * Add doc
1 parent 7e47fa6 commit 4f52a25

File tree

2 files changed

+102
-4
lines changed

2 files changed

+102
-4
lines changed

arrow/src/compute/kernels/arithmetic.rs

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use crate::buffer::Buffer;
3232
use crate::buffer::MutableBuffer;
3333
use crate::compute::kernels::arity::unary;
3434
use crate::compute::util::combine_option_bitmap;
35-
use crate::compute::{binary, try_binary, try_unary, unary_dyn};
35+
use crate::compute::{binary, binary_opt, try_binary, try_unary, unary_dyn};
3636
use crate::datatypes::{
3737
native_op::ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type, Date64Type,
3838
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType,
@@ -711,7 +711,7 @@ where
711711
}
712712

713713
/// Perform `left + right` operation on two arrays. If either left or right value is null
714-
/// then the result is also null. Once
714+
/// then the result is also null.
715715
///
716716
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
717717
/// use `add` instead.
@@ -1118,6 +1118,32 @@ where
11181118
return math_checked_divide_op(left, right, |a, b| a.div_checked(b));
11191119
}
11201120

1121+
/// Perform `left / right` operation on two arrays. If either left or right value is null
1122+
/// then the result is also null.
1123+
///
1124+
/// If any right hand value is zero, the operation value will be replaced with null in the
1125+
/// result.
1126+
///
1127+
/// Unlike `divide` or `divide_checked`, division by zero will get a null value instead
1128+
/// returning an `Err`, this also doesn't check overflowing, overflowing will just wrap
1129+
/// the result around.
1130+
pub fn divide_opt<T>(
1131+
left: &PrimitiveArray<T>,
1132+
right: &PrimitiveArray<T>,
1133+
) -> Result<PrimitiveArray<T>>
1134+
where
1135+
T: ArrowNumericType,
1136+
T::Native: ArrowNativeTypeOp + Zero + One,
1137+
{
1138+
Ok(binary_opt(left, right, |a, b| {
1139+
if b.is_zero() {
1140+
None
1141+
} else {
1142+
Some(a.div_wrapping(b))
1143+
}
1144+
}))
1145+
}
1146+
11211147
/// Perform `left / right` operation on two arrays. If either left or right value is null
11221148
/// then the result is also null. If any right hand value is zero then the result of this
11231149
/// operation will be `Err(ArrowError::DivideByZero)`.
@@ -1152,7 +1178,7 @@ pub fn divide<T>(
11521178
right: &PrimitiveArray<T>,
11531179
) -> Result<PrimitiveArray<T>>
11541180
where
1155-
T: datatypes::ArrowNumericType,
1181+
T: ArrowNumericType,
11561182
T::Native: ArrowNativeTypeOp,
11571183
{
11581184
math_op(left, right, |a, b| a.div_wrapping(b))
@@ -2195,4 +2221,23 @@ mod tests {
21952221
let overflow = multiply_scalar_checked(&a, i32::MAX);
21962222
overflow.expect_err("overflow should be detected");
21972223
}
2224+
2225+
#[test]
2226+
fn test_primitive_div_opt_overflow_division_by_zero() {
2227+
let a = Int32Array::from(vec![i32::MIN]);
2228+
let b = Int32Array::from(vec![-1]);
2229+
2230+
let wrapped = divide(&a, &b);
2231+
let expected = Int32Array::from(vec![-2147483648]);
2232+
assert_eq!(expected, wrapped.unwrap());
2233+
2234+
let overflow = divide_opt(&a, &b);
2235+
let expected = Int32Array::from(vec![-2147483648]);
2236+
assert_eq!(expected, overflow.unwrap());
2237+
2238+
let b = Int32Array::from(vec![0]);
2239+
let overflow = divide_opt(&a, &b);
2240+
let expected = Int32Array::from(vec![None]);
2241+
assert_eq!(expected, overflow.unwrap());
2242+
}
21982243
}

arrow/src/compute/kernels/arity.rs

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//! Defines kernels suitable to perform operations to primitive arrays.
1919
2020
use crate::array::{
21-
Array, ArrayData, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray,
21+
Array, ArrayData, ArrayIter, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray,
2222
};
2323
use crate::buffer::Buffer;
2424
use crate::compute::util::combine_option_bitmap;
@@ -257,6 +257,59 @@ where
257257
Ok(unsafe { build_primitive_array(len, buffer.finish(), null_count, null_buffer) })
258258
}
259259

260+
/// Applies the provided binary operation across `a` and `b`, collecting the optional results
261+
/// into a [`PrimitiveArray`]. If any index is null in either `a` or `b`, the corresponding
262+
/// index in the result will also be null. The binary operation could return `None` which
263+
/// results in a new null in the collected [`PrimitiveArray`].
264+
///
265+
/// The function is only evaluated for non-null indices
266+
///
267+
/// # Panic
268+
///
269+
/// Panics if the arrays have different lengths
270+
pub(crate) fn binary_opt<A, B, F, O>(
271+
a: &PrimitiveArray<A>,
272+
b: &PrimitiveArray<B>,
273+
op: F,
274+
) -> PrimitiveArray<O>
275+
where
276+
A: ArrowPrimitiveType,
277+
B: ArrowPrimitiveType,
278+
O: ArrowPrimitiveType,
279+
F: Fn(A::Native, B::Native) -> Option<O::Native>,
280+
{
281+
assert_eq!(a.len(), b.len());
282+
283+
if a.is_empty() {
284+
return PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE));
285+
}
286+
287+
if a.null_count() == 0 && b.null_count() == 0 {
288+
a.values()
289+
.iter()
290+
.zip(b.values().iter())
291+
.map(|(a, b)| op(*a, *b))
292+
.collect()
293+
} else {
294+
let iter_a = ArrayIter::new(a);
295+
let iter_b = ArrayIter::new(b);
296+
297+
let values =
298+
iter_a
299+
.into_iter()
300+
.zip(iter_b.into_iter())
301+
.map(|(item_a, item_b)| {
302+
if let (Some(a), Some(b)) = (item_a, item_b) {
303+
op(a, b)
304+
} else {
305+
None
306+
}
307+
});
308+
309+
values.collect()
310+
}
311+
}
312+
260313
#[cfg(test)]
261314
mod tests {
262315
use super::*;

0 commit comments

Comments
 (0)