Skip to content

Commit 0794802

Browse files
committed
simplify signature for nullif
Signed-off-by: Jay Zhan <[email protected]>
1 parent 0f584c8 commit 0794802

File tree

4 files changed

+90
-34
lines changed

4 files changed

+90
-34
lines changed

datafusion/expr-common/src/signature.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,15 @@ pub enum TypeSignature {
133133
/// Null is considerd as `Utf8` by default
134134
/// Dictionary with string value type is also handled.
135135
String(usize),
136+
/// Fixed number of arguments of boolean types.
137+
Boolean(usize),
138+
}
139+
140+
impl TypeSignature {
141+
#[inline]
142+
pub fn is_one_of(&self) -> bool {
143+
matches!(self, TypeSignature::OneOf(_))
144+
}
136145
}
137146

138147
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
@@ -198,6 +207,9 @@ impl TypeSignature {
198207
.collect::<Vec<String>>()
199208
.join(", ")]
200209
}
210+
TypeSignature::Boolean(num) => {
211+
vec![format!("Boolean({num})")]
212+
}
201213
TypeSignature::String(num) => {
202214
vec![format!("String({num})")]
203215
}
@@ -267,6 +279,7 @@ impl TypeSignature {
267279
| TypeSignature::UserDefined
268280
| TypeSignature::ArraySignature(_)
269281
| TypeSignature::Numeric(_)
282+
| TypeSignature::Boolean(_)
270283
| TypeSignature::String(_) => vec![],
271284
}
272285
}
@@ -323,6 +336,14 @@ impl Signature {
323336
}
324337
}
325338

339+
/// A specified number of boolean arguments
340+
pub fn boolean(arg_count: usize, volatility: Volatility) -> Self {
341+
Self {
342+
type_signature: TypeSignature::Boolean(arg_count),
343+
volatility,
344+
}
345+
}
346+
326347
/// An arbitrary number of arguments of any type.
327348
pub fn variadic_any(volatility: Volatility) -> Self {
328349
Self {

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ fn is_well_supported_signature(type_signature: &TypeSignature) -> bool {
180180
| TypeSignature::Numeric(_)
181181
| TypeSignature::String(_)
182182
| TypeSignature::Coercible(_)
183+
| TypeSignature::Boolean(_)
183184
| TypeSignature::Any(_)
184185
)
185186
}
@@ -193,13 +194,18 @@ fn try_coerce_types(
193194

194195
// Well-supported signature that returns exact valid types.
195196
if !valid_types.is_empty() && is_well_supported_signature(type_signature) {
196-
// exact valid types
197-
assert_eq!(valid_types.len(), 1);
197+
// There may be many valid types if valid signature is OneOf
198+
// Otherwise, there should be only one valid type
199+
if !type_signature.is_one_of() {
200+
assert_eq!(valid_types.len(), 1);
201+
}
202+
198203
let valid_types = valid_types.swap_remove(0);
199204
if let Some(t) = maybe_data_types_without_coercion(&valid_types, current_types) {
200205
return Ok(t);
201206
}
202207
} else {
208+
// TODO: Deprecate this branch after all signatures are well-supported (aka coercion are happend already)
203209
// Try and coerce the argument types to match the signature, returning the
204210
// coerced types from the first matching signature.
205211
for valid_types in valid_types {
@@ -476,6 +482,33 @@ fn get_valid_types(
476482

477483
vec![vec![base_type_or_default_type(&coerced_type); *number]]
478484
}
485+
TypeSignature::Boolean(number) => {
486+
function_length_check(current_types.len(), *number)?;
487+
488+
// Find common boolean type amongs given types
489+
let mut valid_type = current_types.first().unwrap().to_owned();
490+
for t in current_types.iter().skip(1) {
491+
let logical_data_type: NativeType = t.into();
492+
if logical_data_type == NativeType::Null {
493+
continue;
494+
}
495+
496+
if logical_data_type != NativeType::Boolean {
497+
return plan_err!(
498+
"The signature expected NativeType::Boolean but received {logical_data_type}"
499+
);
500+
}
501+
502+
valid_type = t.to_owned();
503+
}
504+
505+
let logical_data_type: NativeType = valid_type.clone().into();
506+
if logical_data_type == NativeType::Null {
507+
valid_type = DataType::Boolean;
508+
}
509+
510+
vec![vec![valid_type; *number]]
511+
}
479512
TypeSignature::Numeric(number) => {
480513
function_length_check(current_types.len(), *number)?;
481514

datafusion/functions/src/core/nullif.rs

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use arrow::datatypes::DataType;
1919
use datafusion_common::{exec_err, Result};
20-
use datafusion_expr::{ColumnarValue, Documentation};
20+
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature};
2121

2222
use arrow::compute::kernels::cmp::eq;
2323
use arrow::compute::kernels::nullif::nullif;
@@ -32,25 +32,6 @@ pub struct NullIfFunc {
3232
signature: Signature,
3333
}
3434

35-
/// Currently supported types by the nullif function.
36-
/// The order of these types correspond to the order on which coercion applies
37-
/// This should thus be from least informative to most informative
38-
static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
39-
DataType::Boolean,
40-
DataType::UInt8,
41-
DataType::UInt16,
42-
DataType::UInt32,
43-
DataType::UInt64,
44-
DataType::Int8,
45-
DataType::Int16,
46-
DataType::Int32,
47-
DataType::Int64,
48-
DataType::Float32,
49-
DataType::Float64,
50-
DataType::Utf8,
51-
DataType::LargeUtf8,
52-
];
53-
5435
impl Default for NullIfFunc {
5536
fn default() -> Self {
5637
Self::new()
@@ -60,9 +41,13 @@ impl Default for NullIfFunc {
6041
impl NullIfFunc {
6142
pub fn new() -> Self {
6243
Self {
63-
signature: Signature::uniform(
64-
2,
65-
SUPPORTED_NULLIF_TYPES.to_vec(),
44+
signature: Signature::one_of(
45+
// Hack: String is at the beginning so the return type is String if both args are Nulls
46+
vec![
47+
TypeSignature::String(2),
48+
TypeSignature::Numeric(2),
49+
TypeSignature::Boolean(2),
50+
],
6651
Volatility::Immutable,
6752
),
6853
}
@@ -82,14 +67,7 @@ impl ScalarUDFImpl for NullIfFunc {
8267
}
8368

8469
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
85-
// NULLIF has two args and they might get coerced, get a preview of this
86-
let coerced_types = datafusion_expr::type_coercion::functions::data_types(
87-
arg_types,
88-
&self.signature,
89-
);
90-
coerced_types
91-
.map(|typs| typs[0].clone())
92-
.map_err(|e| e.context("Failed to coerce arguments for NULLIF"))
70+
Ok(arg_types[0].to_owned())
9371
}
9472

9573
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {

datafusion/sqllogictest/test_files/nullif.slt

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,31 @@ SELECT NULLIF(1, 3);
9797
----
9898
1
9999

100-
query I
100+
query T
101101
SELECT NULLIF(NULL, NULL);
102102
----
103103
NULL
104+
105+
query R
106+
select nullif(1, 1.2);
107+
----
108+
1
109+
110+
query R
111+
select nullif(1.0, 2);
112+
----
113+
1
114+
115+
query error DataFusion error: Error during planning: Internal error: Failed to match any signature, errors: Error during planning: The signature expected NativeType::String but received NativeType::Int64
116+
select nullif(2, 'a');
117+
118+
119+
query T
120+
select nullif('2', '3');
121+
----
122+
2
123+
124+
# TODO: support numeric string
125+
# This query success in Postgres and DuckDB
126+
query error DataFusion error: Error during planning: Internal error: Failed to match any signature, errors: Error during planning: The signature expected NativeType::String but received NativeType::Int64
127+
select nullif(2, '1');

0 commit comments

Comments
 (0)