Skip to content

Commit 4d0268f

Browse files
Dandandanclaude
andcommitted
Support Date32/Date64 in unwrap_cast optimization
Add Date32 and Date64 to the supported numeric types in the existing unwrap_cast_in_comparison optimizer. This allows filters like CAST(CAST(col AS Int32) AS Date32) >= Date32("2013-07-01") to be simplified to col >= UInt16(15887) eliminating per-row CAST operations. Date32 is internally i32 (days since epoch) and Date64 is i64 (ms since epoch), so they participate in numeric comparisons the same way as their integer counterparts. This affects ClickBench Q36-Q42, which all filter on EventDate (stored as UInt16, viewed as Date32). Each query previously evaluated 4 CAST operations per row; now it does 0. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5c653be commit 4d0268f

File tree

2 files changed

+52
-31
lines changed

2 files changed

+52
-31
lines changed

datafusion/expr-common/src/casts.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ pub fn is_supported_type(data_type: &DataType) -> bool {
5959
}
6060

6161
/// Returns true if unwrap_cast_in_comparison support this numeric type
62+
fn is_date_type(data_type: &DataType) -> bool {
63+
matches!(data_type, DataType::Date32 | DataType::Date64)
64+
}
65+
6266
fn is_supported_numeric_type(data_type: &DataType) -> bool {
6367
matches!(
6468
data_type,
@@ -70,6 +74,8 @@ fn is_supported_numeric_type(data_type: &DataType) -> bool {
7074
| DataType::Int16
7175
| DataType::Int32
7276
| DataType::Int64
77+
| DataType::Date32
78+
| DataType::Date64
7379
| DataType::Decimal32(_, _)
7480
| DataType::Decimal64(_, _)
7581
| DataType::Decimal128(_, _)
@@ -107,6 +113,15 @@ fn try_cast_numeric_literal(
107113
return None;
108114
}
109115

116+
// Date↔Timestamp casts are lossy (drop time-of-day or add midnight),
117+
// so unwrapping would change comparison semantics.
118+
let is_ts = |dt: &DataType| matches!(dt, DataType::Timestamp(_, _));
119+
if (is_date_type(&lit_data_type) && is_ts(target_type))
120+
|| (is_date_type(target_type) && is_ts(&lit_data_type))
121+
{
122+
return None;
123+
}
124+
110125
let mul = match target_type {
111126
DataType::UInt8
112127
| DataType::UInt16
@@ -115,7 +130,9 @@ fn try_cast_numeric_literal(
115130
| DataType::Int8
116131
| DataType::Int16
117132
| DataType::Int32
118-
| DataType::Int64 => 1_i128,
133+
| DataType::Int64
134+
| DataType::Date32
135+
| DataType::Date64 => 1_i128,
119136
DataType::Timestamp(_, _) => 1_i128,
120137
DataType::Decimal32(_, scale) => 10_i128.pow(*scale as u32),
121138
DataType::Decimal64(_, scale) => 10_i128.pow(*scale as u32),
@@ -129,8 +146,8 @@ fn try_cast_numeric_literal(
129146
DataType::UInt64 => (u64::MIN as i128, u64::MAX as i128),
130147
DataType::Int8 => (i8::MIN as i128, i8::MAX as i128),
131148
DataType::Int16 => (i16::MIN as i128, i16::MAX as i128),
132-
DataType::Int32 => (i32::MIN as i128, i32::MAX as i128),
133-
DataType::Int64 => (i64::MIN as i128, i64::MAX as i128),
149+
DataType::Int32 | DataType::Date32 => (i32::MIN as i128, i32::MAX as i128),
150+
DataType::Int64 | DataType::Date64 => (i64::MIN as i128, i64::MAX as i128),
134151
DataType::Timestamp(_, _) => (i64::MIN as i128, i64::MAX as i128),
135152
DataType::Decimal32(precision, _) => (
136153
// Different precision for decimal32 can store different range of value.
@@ -164,6 +181,8 @@ fn try_cast_numeric_literal(
164181
ScalarValue::UInt16(Some(v)) => (*v as i128).checked_mul(mul),
165182
ScalarValue::UInt32(Some(v)) => (*v as i128).checked_mul(mul),
166183
ScalarValue::UInt64(Some(v)) => (*v as i128).checked_mul(mul),
184+
ScalarValue::Date32(Some(v)) => (*v as i128).checked_mul(mul),
185+
ScalarValue::Date64(Some(v)) => (*v as i128).checked_mul(mul),
167186
ScalarValue::TimestampSecond(Some(v), _) => (*v as i128).checked_mul(mul),
168187
ScalarValue::TimestampMillisecond(Some(v), _) => (*v as i128).checked_mul(mul),
169188
ScalarValue::TimestampMicrosecond(Some(v), _) => (*v as i128).checked_mul(mul),
@@ -241,6 +260,8 @@ fn try_cast_numeric_literal(
241260
DataType::Int16 => ScalarValue::Int16(Some(value as i16)),
242261
DataType::Int32 => ScalarValue::Int32(Some(value as i32)),
243262
DataType::Int64 => ScalarValue::Int64(Some(value as i64)),
263+
DataType::Date32 => ScalarValue::Date32(Some(value as i32)),
264+
DataType::Date64 => ScalarValue::Date64(Some(value as i64)),
244265
DataType::UInt8 => ScalarValue::UInt8(Some(value as u8)),
245266
DataType::UInt16 => ScalarValue::UInt16(Some(value as u16)),
246267
DataType::UInt32 => ScalarValue::UInt32(Some(value as u32)),

0 commit comments

Comments
 (0)