Skip to content

Update Arrow 45.0.0 And Datum Arithmetic, change Decimal Division semantics #6832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
68a89b3
Datum based arithmetic
tustvold Jul 3, 2023
47c4fe1
Update scalar timestamp arithmetic tests
tustvold Jul 4, 2023
5180300
Clippy
tustvold Jul 4, 2023
c189ab8
Merge remote-tracking branch 'upstream/main' into datum-arithmetic
tustvold Jul 12, 2023
3fa6236
Update negation
tustvold Jul 12, 2023
36205a2
Update sqllogictests
tustvold Jul 13, 2023
152408b
Merge remote-tracking branch 'upstream/main' into datum-arithmetic
tustvold Jul 13, 2023
be29b4c
Update arrow 44.0.0
tustvold Jul 18, 2023
9af340c
Merge remote-tracking branch 'upstream/main' into datum-arithmetic
tustvold Jul 18, 2023
87cf899
Update for fixed size binary comparisons
tustvold Jul 18, 2023
632cc2c
Clippy
tustvold Jul 18, 2023
a2dda89
Merge remote-tracking branch 'upstream/main' into datum-arithmetic
tustvold Jul 29, 2023
eebfc28
Update pin
tustvold Jul 30, 2023
001a747
Override formatting
tustvold Jul 30, 2023
b774150
Merge remote-tracking branch 'upstream/master' into datum-arithmetic
tustvold Jul 30, 2023
88217bb
Make test stable
tustvold Jul 30, 2023
4cfb92b
Remove temporary file
tustvold Jul 30, 2023
5d2bd3b
Update datafusion-cli lockfile
tustvold Jul 30, 2023
6a6048e
Update pin
tustvold Aug 2, 2023
027d2de
Merge remote-tracking branch 'upstream/main' into datum-arithmetic
tustvold Aug 2, 2023
ec92746
Format
tustvold Aug 2, 2023
3b9b605
Move DEFAULT_FORMAT_OPTIONS to datafusion_common
tustvold Aug 2, 2023
735d315
Merge remote-tracking branch 'apache/main' into datum-arithmetic
alamb Aug 8, 2023
e0c4b1f
Merge remote-tracking branch 'apache/main' into datum-arithmetic
alamb Aug 8, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ panic = 'unwind'
rpath = false

[patch.crates-io]
arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
31 changes: 16 additions & 15 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ predicates = "3.0"
rstest = "0.17"

[patch.crates-io]
arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" }
arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "8bcb3fc4ec458f7fdf4a98de199620e1164281df" }
6 changes: 6 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/interval.slt
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ create table t (i interval) as values ('5 days 3 nanoseconds'::interval);
statement ok
insert into t values ('6 days 7 nanoseconds'::interval)

query ?
select -i from t;
----
0 years 0 mons -5 days 0 hours 0 mins -0.000000003 secs
0 years 0 mons -6 days 0 hours 0 mins -0.000000007 secs

query ?T rowsort
select
i,
Expand Down
68 changes: 4 additions & 64 deletions datafusion/physical-expr/src/expressions/negative.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,9 @@ use std::any::Any;
use std::hash::{Hash, Hasher};
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::compute::kernels::arithmetic::negate;
use arrow::{
array::{
Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
},
datatypes::{DataType, IntervalUnit, Schema},
compute::kernels::numeric::neg_wrapping,
datatypes::{DataType, Schema},
record_batch::RecordBatch,
};

Expand All @@ -40,18 +35,6 @@ use datafusion_expr::{
ColumnarValue,
};

/// Invoke a compute kernel on array(s)
macro_rules! compute_op {
// invoke unary operator
($OPERAND:expr, $OP:ident, $DT:ident) => {{
let operand = $OPERAND
.as_any()
.downcast_ref::<$DT>()
.expect("compute_op failed to downcast array");
Ok(Arc::new($OP(&operand)?))
}};
}

/// Negative expression
#[derive(Debug, Hash)]
pub struct NegativeExpr {
Expand Down Expand Up @@ -95,23 +78,8 @@ impl PhysicalExpr for NegativeExpr {
let arg = self.arg.evaluate(batch)?;
match arg {
ColumnarValue::Array(array) => {
let result: Result<ArrayRef> = match array.data_type() {
DataType::Int8 => compute_op!(array, negate, Int8Array),
DataType::Int16 => compute_op!(array, negate, Int16Array),
DataType::Int32 => compute_op!(array, negate, Int32Array),
DataType::Int64 => compute_op!(array, negate, Int64Array),
DataType::Float32 => compute_op!(array, negate, Float32Array),
DataType::Float64 => compute_op!(array, negate, Float64Array),
DataType::Interval(IntervalUnit::YearMonth) => compute_op!(array, negate, IntervalYearMonthArray),
DataType::Interval(IntervalUnit::DayTime) => compute_op!(array, negate, IntervalDayTimeArray),
DataType::Interval(IntervalUnit::MonthDayNano) => compute_op!(array, negate, IntervalMonthDayNanoArray),
_ => Err(DataFusionError::Internal(format!(
"(- '{:?}') can't be evaluated because the expression's type is {:?}, not signed numeric",
self,
array.data_type(),
))),
};
result.map(|a| ColumnarValue::Array(a))
let result = neg_wrapping(array.as_ref())?;
Ok(ColumnarValue::Array(result))
}
ColumnarValue::Scalar(scalar) => {
Ok(ColumnarValue::Scalar((scalar.arithmetic_negate())?))
Expand Down Expand Up @@ -203,31 +171,6 @@ mod tests {
};
}

macro_rules! test_array_negative_op_intervals {
($DATA_TY:tt, $($VALUE:expr),* ) => {
let schema = Schema::new(vec![Field::new("a", DataType::Interval(IntervalUnit::$DATA_TY), true)]);
let expr = negative(col("a", &schema)?, &schema)?;
assert_eq!(expr.data_type(&schema)?, DataType::Interval(IntervalUnit::$DATA_TY));
assert!(expr.nullable(&schema)?);
let mut arr = Vec::new();
let mut arr_expected = Vec::new();
$(
arr.push(Some($VALUE));
arr_expected.push(Some(-$VALUE));
)+
arr.push(None);
arr_expected.push(None);
let input = paste!{[<Interval $DATA_TY Array>]::from(arr)};
let expected = &paste!{[<Interval $DATA_TY Array>]::from(arr_expected)};
let batch =
RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(input)])?;
let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
let result =
as_primitive_array(&result).expect(format!("failed to downcast to {:?}Array", $DATA_TY).as_str());
assert_eq!(result, expected);
};
}

#[test]
fn array_negative_op() -> Result<()> {
test_array_negative_op!(Int8, 2i8, 1i8);
Expand All @@ -236,9 +179,6 @@ mod tests {
test_array_negative_op!(Int64, 23456i64, 12345i64);
test_array_negative_op!(Float32, 2345.0f32, 1234.0f32);
test_array_negative_op!(Float64, 23456.0f64, 12345.0f64);
test_array_negative_op_intervals!(YearMonth, 2345i32, 1234i32);
test_array_negative_op_intervals!(DayTime, 23456i64, 12345i64);
test_array_negative_op_intervals!(MonthDayNano, 234567i128, 123456i128);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is moved into sqllogic, unfortunately I couldn't find a way to get a MonthDayNano type, but this is better than nothing

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can do something like this

select arrow_cast(interval '30 minutes', 'Interval(MonthDayNano)');

Ok(())
}
}