Skip to content

Commit d89da8e

Browse files
committed
use IntervalCompound instead of interval-month-day-nano UDT
1 parent 502ce4b commit d89da8e

File tree

4 files changed

+79
-122
lines changed

4 files changed

+79
-122
lines changed

datafusion/substrait/src/logical_plan/consumer.rs

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,14 @@ use crate::variation_const::{
4141
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
4242
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
4343
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
44-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
45-
UNSIGNED_INTEGER_TYPE_VARIATION_REF,
44+
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
4645
};
4746
#[allow(deprecated)]
4847
use crate::variation_const::{
49-
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
50-
INTERVAL_YEAR_MONTH_TYPE_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
51-
TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
52-
TIMESTAMP_SECOND_TYPE_VARIATION_REF,
48+
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
49+
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
50+
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
51+
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
5352
};
5453
use datafusion::arrow::array::{new_empty_array, AsArray};
5554
use datafusion::common::scalar::ScalarStructBuilder;
@@ -69,10 +68,10 @@ use datafusion::{
6968
use std::collections::{HashMap, HashSet};
7069
use std::sync::Arc;
7170
use substrait::proto::exchange_rel::ExchangeKind;
72-
use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
7371
use substrait::proto::expression::literal::user_defined::Val;
7472
use substrait::proto::expression::literal::{
75-
IntervalDayToSecond, IntervalYearToMonth, UserDefined,
73+
interval_day_to_second, IntervalCompound, IntervalDayToSecond, IntervalYearToMonth,
74+
UserDefined,
7675
};
7776
use substrait::proto::expression::subquery::SubqueryType;
7877
use substrait::proto::expression::{self, FieldReference, Literal, ScalarFunction};
@@ -1505,8 +1504,13 @@ fn from_substrait_type(
15051504
Ok(DataType::Interval(IntervalUnit::YearMonth))
15061505
}
15071506
r#type::Kind::IntervalDay(_) => Ok(DataType::Interval(IntervalUnit::DayTime)),
1507+
r#type::Kind::IntervalCompound(_) => {
1508+
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
1509+
}
15081510
r#type::Kind::UserDefined(u) => {
1511+
// Kept for backwards compatibility, use IntervalCompound instead
15091512
if let Some(name) = extensions.types.get(&u.type_reference) {
1513+
#[allow(deprecated)]
15101514
match name.as_ref() {
15111515
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
15121516
_ => not_impl_err!(
@@ -1516,7 +1520,7 @@ fn from_substrait_type(
15161520
),
15171521
}
15181522
} else {
1519-
// Kept for backwards compatibility, new plans should include the extension instead
1523+
// Kept for backwards compatibility, use IntervalCompound instead
15201524
#[allow(deprecated)]
15211525
match u.type_reference {
15221526
// Kept for backwards compatibility, use IntervalYear instead
@@ -1946,6 +1950,7 @@ fn from_substrait_literal(
19461950
subseconds,
19471951
precision_mode,
19481952
})) => {
1953+
use interval_day_to_second::PrecisionMode;
19491954
// DF only supports millisecond precision, so for any more granular type we lose precision
19501955
let milliseconds = match precision_mode {
19511956
Some(PrecisionMode::Microseconds(ms)) => ms / 1000,
@@ -1965,6 +1970,39 @@ fn from_substrait_literal(
19651970
Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => {
19661971
ScalarValue::new_interval_ym(*years, *months)
19671972
}
1973+
Some(LiteralType::IntervalCompound(IntervalCompound {
1974+
interval_year_to_month,
1975+
interval_day_to_second,
1976+
})) => match (interval_year_to_month, interval_day_to_second) {
1977+
(
1978+
Some(IntervalYearToMonth { years, months }),
1979+
Some(IntervalDayToSecond {
1980+
days,
1981+
seconds,
1982+
subseconds,
1983+
precision_mode:
1984+
Some(interval_day_to_second::PrecisionMode::Precision(p)),
1985+
}),
1986+
) => {
1987+
let nanos = match p {
1988+
0 => *subseconds * 1_000_000_000,
1989+
3 => *subseconds * 1_000_000,
1990+
6 => *subseconds * 1_000,
1991+
9 => *subseconds,
1992+
_ => {
1993+
return not_impl_err!(
1994+
"Unsupported Substrait interval day to second precision mode"
1995+
)
1996+
}
1997+
};
1998+
ScalarValue::new_interval_mdn(
1999+
*years * 12 + months,
2000+
*days,
2001+
*seconds as i64 * 1_000_000_000 + nanos,
2002+
)
2003+
}
2004+
_ => return not_impl_err!("Unsupported Substrait compound interval literal"),
2005+
},
19682006
Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
19692007
Some(LiteralType::UserDefined(user_defined)) => {
19702008
// Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
@@ -1995,6 +2033,8 @@ fn from_substrait_literal(
19952033

19962034
if let Some(name) = extensions.types.get(&user_defined.type_reference) {
19972035
match name.as_ref() {
2036+
// Kept for backwards compatibility - new plans should use IntervalCompound instead
2037+
#[allow(deprecated)]
19982038
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
19992039
interval_month_day_nano(user_defined)?
20002040
}
@@ -2045,6 +2085,7 @@ fn from_substrait_literal(
20452085
milliseconds,
20462086
}))
20472087
}
2088+
// Kept for backwards compatibility, use IntervalCompound instead
20482089
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
20492090
interval_month_day_nano(user_defined)?
20502091
}

datafusion/substrait/src/logical_plan/producer.rs

Lines changed: 24 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use itertools::Itertools;
1919
use std::ops::Deref;
2020
use std::sync::Arc;
2121

22-
use arrow_buffer::ToByteSlice;
2322
use datafusion::arrow::datatypes::IntervalUnit;
2423
use datafusion::logical_expr::{
2524
CrossJoin, Distinct, Like, Partitioning, WindowFrameUnits,
@@ -37,8 +36,7 @@ use crate::variation_const::{
3736
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
3837
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
3938
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
40-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
41-
UNSIGNED_INTEGER_TYPE_VARIATION_REF,
39+
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
4240
};
4341
use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait};
4442
use datafusion::common::{
@@ -56,8 +54,8 @@ use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields};
5654
use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
5755
use substrait::proto::expression::literal::map::KeyValue;
5856
use substrait::proto::expression::literal::{
59-
user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map,
60-
PrecisionTimestamp, Struct, UserDefined,
57+
IntervalCompound, IntervalDayToSecond, IntervalYearToMonth, List, Map,
58+
PrecisionTimestamp, Struct,
6159
};
6260
use substrait::proto::expression::subquery::InPredicate;
6361
use substrait::proto::expression::window_function::BoundsType;
@@ -1429,16 +1427,14 @@ fn to_substrait_type(
14291427
})),
14301428
}),
14311429
IntervalUnit::MonthDayNano => {
1432-
// Substrait doesn't currently support this type, so we represent it as a UDT
14331430
Ok(substrait::proto::Type {
1434-
kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
1435-
type_reference: extensions.register_type(
1436-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
1437-
),
1438-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1439-
nullability,
1440-
type_parameters: vec![],
1441-
})),
1431+
kind: Some(r#type::Kind::IntervalCompound(
1432+
r#type::IntervalCompound {
1433+
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1434+
nullability,
1435+
precision: 9, // nanos
1436+
},
1437+
)),
14421438
})
14431439
}
14441440
}
@@ -1880,23 +1876,21 @@ fn to_substrait_literal(
18801876
}),
18811877
DEFAULT_TYPE_VARIATION_REF,
18821878
),
1883-
ScalarValue::IntervalMonthDayNano(Some(i)) => {
1884-
// IntervalMonthDayNano is internally represented as a 128-bit integer, containing
1885-
// months (32bit), days (32bit), and nanoseconds (64bit)
1886-
let bytes = i.to_byte_slice();
1887-
(
1888-
LiteralType::UserDefined(UserDefined {
1889-
type_reference: extensions
1890-
.register_type(INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()),
1891-
type_parameters: vec![],
1892-
val: Some(user_defined::Val::Value(ProtoAny {
1893-
type_url: INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
1894-
value: bytes.to_vec().into(),
1895-
})),
1879+
ScalarValue::IntervalMonthDayNano(Some(i)) => (
1880+
LiteralType::IntervalCompound(IntervalCompound {
1881+
interval_year_to_month: Some(IntervalYearToMonth {
1882+
years: 0,
1883+
months: i.months,
18961884
}),
1897-
DEFAULT_TYPE_VARIATION_REF,
1898-
)
1899-
}
1885+
interval_day_to_second: Some(IntervalDayToSecond {
1886+
days: i.days,
1887+
seconds: 0,
1888+
subseconds: i.nanoseconds,
1889+
precision_mode: Some(PrecisionMode::Precision(9)), // nanoseconds
1890+
}),
1891+
}),
1892+
DEFAULT_TYPE_VARIATION_REF,
1893+
),
19001894
ScalarValue::IntervalDayTime(Some(i)) => (
19011895
LiteralType::IntervalDayToSecond(IntervalDayToSecond {
19021896
days: i.days,
@@ -2161,7 +2155,6 @@ mod test {
21612155
};
21622156
use datafusion::arrow::datatypes::Field;
21632157
use datafusion::common::scalar::ScalarStructBuilder;
2164-
use std::collections::HashMap;
21652158

21662159
#[test]
21672160
fn round_trip_literals() -> Result<()> {
@@ -2292,39 +2285,6 @@ mod test {
22922285
Ok(())
22932286
}
22942287

2295-
#[test]
2296-
fn custom_type_literal_extensions() -> Result<()> {
2297-
let mut extensions = Extensions::default();
2298-
// IntervalMonthDayNano is represented as a custom type in Substrait
2299-
let scalar = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(
2300-
17, 25, 1234567890,
2301-
)));
2302-
let substrait_literal = to_substrait_literal(&scalar, &mut extensions)?;
2303-
let roundtrip_scalar =
2304-
from_substrait_literal_without_names(&substrait_literal, &extensions)?;
2305-
assert_eq!(scalar, roundtrip_scalar);
2306-
2307-
assert_eq!(
2308-
extensions,
2309-
Extensions {
2310-
functions: HashMap::new(),
2311-
types: HashMap::from([(
2312-
0,
2313-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
2314-
)]),
2315-
type_variations: HashMap::new(),
2316-
}
2317-
);
2318-
2319-
// Check we fail if we don't propagate extensions
2320-
assert!(from_substrait_literal_without_names(
2321-
&substrait_literal,
2322-
&Extensions::default()
2323-
)
2324-
.is_err());
2325-
Ok(())
2326-
}
2327-
23282288
#[test]
23292289
fn round_trip_types() -> Result<()> {
23302290
round_trip_type(DataType::Boolean)?;
@@ -2403,35 +2363,4 @@ mod test {
24032363
assert_eq!(dt, roundtrip_dt);
24042364
Ok(())
24052365
}
2406-
2407-
#[test]
2408-
fn custom_type_extensions() -> Result<()> {
2409-
let mut extensions = Extensions::default();
2410-
// IntervalMonthDayNano is represented as a custom type in Substrait
2411-
let dt = DataType::Interval(IntervalUnit::MonthDayNano);
2412-
2413-
let substrait = to_substrait_type(&dt, true, &mut extensions)?;
2414-
let roundtrip_dt = from_substrait_type_without_names(&substrait, &extensions)?;
2415-
assert_eq!(dt, roundtrip_dt);
2416-
2417-
assert_eq!(
2418-
extensions,
2419-
Extensions {
2420-
functions: HashMap::new(),
2421-
types: HashMap::from([(
2422-
0,
2423-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
2424-
)]),
2425-
type_variations: HashMap::new(),
2426-
}
2427-
);
2428-
2429-
// Check we fail if we don't propagate extensions
2430-
assert!(
2431-
from_substrait_type_without_names(&substrait, &Extensions::default())
2432-
.is_err()
2433-
);
2434-
2435-
Ok(())
2436-
}
24372366
}

datafusion/substrait/src/variation_const.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,16 @@ pub const INTERVAL_DAY_TIME_TYPE_REF: u32 = 2;
9595
/// [`ScalarValue::IntervalMonthDayNano`]: datafusion::common::ScalarValue::IntervalMonthDayNano
9696
#[deprecated(
9797
since = "41.0.0",
98-
note = "Use Substrait `UserDefinedType` with name `INTERVAL_MONTH_DAY_NANO_TYPE_NAME` instead"
98+
note = "Use Substrait `IntervalCompund` type instead"
9999
)]
100100
pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
101101

102102
/// For [`DataType::Interval`] with [`IntervalUnit::MonthDayNano`].
103103
///
104104
/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
105105
/// [`IntervalUnit::MonthDayNano`]: datafusion::arrow::datatypes::IntervalUnit::MonthDayNano
106+
#[deprecated(
107+
since = "42.0.0",
108+
note = "Use Substrait `IntervalCompund` type instead"
109+
)]
106110
pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";

datafusion/substrait/tests/cases/roundtrip_logical_plan.rs

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -204,23 +204,6 @@ async fn select_with_reused_functions() -> Result<()> {
204204
Ok(())
205205
}
206206

207-
#[tokio::test]
208-
async fn roundtrip_udt_extensions() -> Result<()> {
209-
let ctx = create_context().await?;
210-
let proto =
211-
roundtrip_with_ctx("SELECT INTERVAL '1 YEAR 1 DAY 1 SECOND' FROM data", ctx)
212-
.await?;
213-
let expected_type = SimpleExtensionDeclaration {
214-
mapping_type: Some(MappingType::ExtensionType(ExtensionType {
215-
extension_uri_reference: u32::MAX,
216-
type_anchor: 0,
217-
name: "interval-month-day-nano".to_string(),
218-
})),
219-
};
220-
assert_eq!(proto.extensions, vec![expected_type]);
221-
Ok(())
222-
}
223-
224207
#[tokio::test]
225208
async fn select_with_filter_date() -> Result<()> {
226209
roundtrip("SELECT * FROM data WHERE c > CAST('2020-01-01' AS DATE)").await

0 commit comments

Comments
 (0)