Skip to content

Commit da02530

Browse files
committed
chore: switch to using proper Substrait types for IntervalYearMonth and IntervalDayTime
also clean up IntervalMonthDayNano type - the type itself needs no parameters
1 parent d542cbd commit da02530

File tree

3 files changed

+89
-107
lines changed

3 files changed

+89
-107
lines changed

datafusion/substrait/src/logical_plan/consumer.rs

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ use std::str::FromStr;
6565
use std::sync::Arc;
6666
use substrait::proto::exchange_rel::ExchangeKind;
6767
use substrait::proto::expression::literal::user_defined::Val;
68-
use substrait::proto::expression::literal::IntervalDayToSecond;
68+
use substrait::proto::expression::literal::{IntervalDayToSecond, IntervalYearToMonth};
6969
use substrait::proto::expression::subquery::SubqueryType;
7070
use substrait::proto::expression::{self, FieldReference, Literal, ScalarFunction};
7171
use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile;
@@ -1414,7 +1414,7 @@ fn from_substrait_type(
14141414
})?;
14151415
let field = Arc::new(Field::new_list_field(
14161416
from_substrait_type(inner_type, dfs_names, name_idx)?,
1417-
// We ignore Substrait's nullability here to match to_substrait_literal
1417+
// We ignore Substrait's nullability here to match to_substrait_literal
14181418
// which always creates nullable lists
14191419
true,
14201420
));
@@ -1445,12 +1445,15 @@ fn from_substrait_type(
14451445
));
14461446
match map.type_variation_reference {
14471447
DEFAULT_CONTAINER_TYPE_VARIATION_REF => {
1448-
Ok(DataType::Map(Arc::new(Field::new_struct(
1449-
"entries",
1450-
[key_field, value_field],
1451-
false, // The inner map field is always non-nullable (Arrow #1697),
1452-
)), false))
1453-
},
1448+
Ok(DataType::Map(
1449+
Arc::new(Field::new_struct(
1450+
"entries",
1451+
[key_field, value_field],
1452+
false, // The inner map field is always non-nullable (Arrow #1697),
1453+
)),
1454+
false,
1455+
))
1456+
}
14541457
v => not_impl_err!(
14551458
"Unsupported Substrait type variation {v} of type {s_kind:?}"
14561459
)?,
@@ -1467,14 +1470,33 @@ fn from_substrait_type(
14671470
"Unsupported Substrait type variation {v} of type {s_kind:?}"
14681471
),
14691472
},
1473+
r#type::Kind::IntervalYear(i) => match i.type_variation_reference {
1474+
DEFAULT_TYPE_VARIATION_REF => {
1475+
Ok(DataType::Interval(IntervalUnit::YearMonth))
1476+
}
1477+
v => not_impl_err!(
1478+
"Unsupported Substrait type variation {v} of type {s_kind:?}"
1479+
),
1480+
},
1481+
r#type::Kind::IntervalDay(i) => match i.type_variation_reference {
1482+
DEFAULT_TYPE_VARIATION_REF => {
1483+
Ok(DataType::Interval(IntervalUnit::DayTime))
1484+
}
1485+
v => not_impl_err!(
1486+
"Unsupported Substrait type variation {v} of type {s_kind:?}"
1487+
),
1488+
},
14701489
r#type::Kind::UserDefined(u) => {
14711490
match u.type_reference {
1491+
// Kept for backwards compatibility, use IntervalYear instead
14721492
INTERVAL_YEAR_MONTH_TYPE_REF => {
14731493
Ok(DataType::Interval(IntervalUnit::YearMonth))
14741494
}
1495+
// Kept for backwards compatibility, use IntervalDay instead
14751496
INTERVAL_DAY_TIME_TYPE_REF => {
14761497
Ok(DataType::Interval(IntervalUnit::DayTime))
14771498
}
1499+
// Not supported yet by Substrait
14781500
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
14791501
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
14801502
}
@@ -1484,7 +1506,7 @@ fn from_substrait_type(
14841506
u.type_variation_reference
14851507
),
14861508
}
1487-
},
1509+
}
14881510
r#type::Kind::Struct(s) => Ok(DataType::Struct(from_substrait_struct_type(
14891511
s, dfs_names, name_idx,
14901512
)?)),
@@ -1753,11 +1775,16 @@ fn from_substrait_literal(
17531775
seconds,
17541776
microseconds,
17551777
})) => {
1778+
// DF only supports millisecond precision, so we lose the micros here
17561779
ScalarValue::new_interval_dt(*days, (seconds * 1000) + (microseconds / 1000))
17571780
}
1781+
Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => {
1782+
ScalarValue::new_interval_ym(*years, *months)
1783+
}
17581784
Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
17591785
Some(LiteralType::UserDefined(user_defined)) => {
17601786
match user_defined.type_reference {
1787+
// Kept for backwards compatibility, use IntervalYearToMonth instead
17611788
INTERVAL_YEAR_MONTH_TYPE_REF => {
17621789
let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
17631790
return substrait_err!("Interval year month value is empty");
@@ -1770,6 +1797,7 @@ fn from_substrait_literal(
17701797
})?;
17711798
ScalarValue::IntervalYearMonth(Some(i32::from_le_bytes(value_slice)))
17721799
}
1800+
// Kept for backwards compatibility, use IntervalDayToSecond instead
17731801
INTERVAL_DAY_TIME_TYPE_REF => {
17741802
let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
17751803
return substrait_err!("Interval day time value is empty");

datafusion/substrait/src/logical_plan/producer.rs

Lines changed: 42 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,11 @@ use datafusion::logical_expr::{expr, Between, JoinConstraint, LogicalPlan, Opera
4848
use datafusion::prelude::Expr;
4949
use pbjson_types::Any as ProtoAny;
5050
use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields};
51-
use substrait::proto::expression::literal::user_defined::Val;
52-
use substrait::proto::expression::literal::UserDefined;
53-
use substrait::proto::expression::literal::{List, Struct};
51+
use substrait::proto::expression::literal::{
52+
user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Struct, UserDefined,
53+
};
5454
use substrait::proto::expression::subquery::InPredicate;
5555
use substrait::proto::expression::window_function::BoundsType;
56-
use substrait::proto::r#type::{parameter, Parameter};
5756
use substrait::proto::read_rel::VirtualTable;
5857
use substrait::proto::{CrossRel, ExchangeRel};
5958
use substrait::{
@@ -95,9 +94,7 @@ use crate::variation_const::{
9594
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
9695
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
9796
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
98-
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_DAY_TIME_TYPE_URL,
9997
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_URL,
100-
INTERVAL_YEAR_MONTH_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_URL,
10198
LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
10299
TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
103100
TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
@@ -1534,47 +1531,31 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
15341531
})),
15351532
}),
15361533
DataType::Interval(interval_unit) => {
1537-
// define two type parameters for convenience
1538-
let i32_param = Parameter {
1539-
parameter: Some(parameter::Parameter::DataType(substrait::proto::Type {
1540-
kind: Some(r#type::Kind::I32(r#type::I32 {
1534+
match interval_unit {
1535+
IntervalUnit::YearMonth => Ok(substrait::proto::Type {
1536+
kind: Some(r#type::Kind::IntervalYear(r#type::IntervalYear {
15411537
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1542-
nullability: r#type::Nullability::Unspecified as i32,
1538+
nullability,
15431539
})),
1544-
})),
1545-
};
1546-
let i64_param = Parameter {
1547-
parameter: Some(parameter::Parameter::DataType(substrait::proto::Type {
1548-
kind: Some(r#type::Kind::I64(r#type::I64 {
1540+
}),
1541+
IntervalUnit::DayTime => Ok(substrait::proto::Type {
1542+
kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay {
15491543
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1550-
nullability: r#type::Nullability::Unspecified as i32,
1544+
nullability,
15511545
})),
1552-
})),
1553-
};
1554-
1555-
let (type_parameters, type_reference) = match interval_unit {
1556-
IntervalUnit::YearMonth => {
1557-
let type_parameters = vec![i32_param];
1558-
(type_parameters, INTERVAL_YEAR_MONTH_TYPE_REF)
1559-
}
1560-
IntervalUnit::DayTime => {
1561-
let type_parameters = vec![i64_param];
1562-
(type_parameters, INTERVAL_DAY_TIME_TYPE_REF)
1563-
}
1546+
}),
15641547
IntervalUnit::MonthDayNano => {
1565-
// use 2 `i64` as `i128`
1566-
let type_parameters = vec![i64_param.clone(), i64_param];
1567-
(type_parameters, INTERVAL_MONTH_DAY_NANO_TYPE_REF)
1548+
// Substrait doesn't currently support this type, so we represent it as a UDT
1549+
Ok(substrait::proto::Type {
1550+
kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
1551+
type_reference: INTERVAL_MONTH_DAY_NANO_TYPE_REF,
1552+
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1553+
nullability,
1554+
type_parameters: vec![],
1555+
})),
1556+
})
15681557
}
1569-
};
1570-
Ok(substrait::proto::Type {
1571-
kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
1572-
type_reference,
1573-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1574-
nullability,
1575-
type_parameters,
1576-
})),
1577-
})
1558+
}
15781559
}
15791560
DataType::Binary => Ok(substrait::proto::Type {
15801561
kind: Some(r#type::Kind::Binary(r#type::Binary {
@@ -1954,75 +1935,38 @@ fn to_substrait_literal(value: &ScalarValue) -> Result<Literal> {
19541935
(LiteralType::Date(*d), DATE_32_TYPE_VARIATION_REF)
19551936
}
19561937
// Date64 literal is not supported in Substrait
1957-
ScalarValue::IntervalYearMonth(Some(i)) => {
1958-
let bytes = i.to_le_bytes();
1959-
(
1960-
LiteralType::UserDefined(UserDefined {
1961-
type_reference: INTERVAL_YEAR_MONTH_TYPE_REF,
1962-
type_parameters: vec![Parameter {
1963-
parameter: Some(parameter::Parameter::DataType(
1964-
substrait::proto::Type {
1965-
kind: Some(r#type::Kind::I32(r#type::I32 {
1966-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1967-
nullability: r#type::Nullability::Required as i32,
1968-
})),
1969-
},
1970-
)),
1971-
}],
1972-
val: Some(Val::Value(ProtoAny {
1973-
type_url: INTERVAL_YEAR_MONTH_TYPE_URL.to_string(),
1974-
value: bytes.to_vec().into(),
1975-
})),
1976-
}),
1977-
INTERVAL_YEAR_MONTH_TYPE_REF,
1978-
)
1979-
}
1938+
ScalarValue::IntervalYearMonth(Some(i)) => (
1939+
LiteralType::IntervalYearToMonth(IntervalYearToMonth {
1940+
// DF only tracks total months, but there should always be 12 months in a year
1941+
years: *i / 12,
1942+
months: *i % 12,
1943+
}),
1944+
DEFAULT_TYPE_VARIATION_REF,
1945+
),
19801946
ScalarValue::IntervalMonthDayNano(Some(i)) => {
1981-
// treat `i128` as two contiguous `i64`
1947+
// IntervalMonthDayNano is internally represented as a 128-bit integer, containing
1948+
// months (32bit), days (32bit), and nanoseconds (64bit)
19821949
let bytes = i.to_byte_slice();
1983-
let i64_param = Parameter {
1984-
parameter: Some(parameter::Parameter::DataType(substrait::proto::Type {
1985-
kind: Some(r#type::Kind::I64(r#type::I64 {
1986-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1987-
nullability: r#type::Nullability::Required as i32,
1988-
})),
1989-
})),
1990-
};
19911950
(
19921951
LiteralType::UserDefined(UserDefined {
19931952
type_reference: INTERVAL_MONTH_DAY_NANO_TYPE_REF,
1994-
type_parameters: vec![i64_param.clone(), i64_param],
1995-
val: Some(Val::Value(ProtoAny {
1953+
type_parameters: vec![],
1954+
val: Some(user_defined::Val::Value(ProtoAny {
19961955
type_url: INTERVAL_MONTH_DAY_NANO_TYPE_URL.to_string(),
19971956
value: bytes.to_vec().into(),
19981957
})),
19991958
}),
20001959
INTERVAL_MONTH_DAY_NANO_TYPE_REF,
20011960
)
20021961
}
2003-
ScalarValue::IntervalDayTime(Some(i)) => {
2004-
let bytes = i.to_byte_slice();
2005-
(
2006-
LiteralType::UserDefined(UserDefined {
2007-
type_reference: INTERVAL_DAY_TIME_TYPE_REF,
2008-
type_parameters: vec![Parameter {
2009-
parameter: Some(parameter::Parameter::DataType(
2010-
substrait::proto::Type {
2011-
kind: Some(r#type::Kind::I64(r#type::I64 {
2012-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
2013-
nullability: r#type::Nullability::Required as i32,
2014-
})),
2015-
},
2016-
)),
2017-
}],
2018-
val: Some(Val::Value(ProtoAny {
2019-
type_url: INTERVAL_DAY_TIME_TYPE_URL.to_string(),
2020-
value: bytes.to_vec().into(),
2021-
})),
2022-
}),
2023-
INTERVAL_DAY_TIME_TYPE_REF,
2024-
)
2025-
}
1962+
ScalarValue::IntervalDayTime(Some(i)) => (
1963+
LiteralType::IntervalDayToSecond(IntervalDayToSecond {
1964+
days: i.days,
1965+
seconds: i.milliseconds / 1000,
1966+
microseconds: (i.milliseconds % 1000) * 1000,
1967+
}),
1968+
DEFAULT_TYPE_VARIATION_REF,
1969+
),
20261970
ScalarValue::Binary(Some(b)) => (
20271971
LiteralType::Binary(b.clone()),
20281972
DEFAULT_CONTAINER_TYPE_VARIATION_REF,

datafusion/substrait/tests/cases/roundtrip_logical_plan.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,11 +1202,21 @@ async fn create_all_type_context() -> Result<SessionContext> {
12021202
),
12031203
Field::new("decimal_128_col", DataType::Decimal128(10, 2), true),
12041204
Field::new("decimal_256_col", DataType::Decimal256(10, 2), true),
1205+
Field::new(
1206+
"interval_year_month_col",
1207+
DataType::Interval(IntervalUnit::YearMonth),
1208+
true,
1209+
),
12051210
Field::new(
12061211
"interval_day_time_col",
12071212
DataType::Interval(IntervalUnit::DayTime),
12081213
true,
12091214
),
1215+
Field::new(
1216+
"interval_month_day_nano_col",
1217+
DataType::Interval(IntervalUnit::MonthDayNano),
1218+
true,
1219+
),
12101220
]);
12111221
explicit_options.schema = Some(&schema);
12121222
explicit_options.has_header = false;

0 commit comments

Comments
 (0)