Skip to content

Commit f054586

Browse files
tisonkunjmhainalamb
authored
build(deps): upgrade sqlparser to 0.47.0 (#10392)
* build(deps): upgrade sqlparser to 0.46.0 Signed-off-by: tison <[email protected]> * function and cast fixups * catchup refactors Signed-off-by: tison <[email protected]> * try migrate json expr Signed-off-by: tison <[email protected]> * Update for changes in sqlparser * Update dependencies * handle zero argument form * fmt * fixup more Signed-off-by: tison <[email protected]> * fixup more Signed-off-by: tison <[email protected]> * try use jmhain's branch Signed-off-by: tison <[email protected]> * fix compile FunctionArgumentClause exhausted Signed-off-by: tison <[email protected]> * fix compile set multi vars Signed-off-by: tison <[email protected]> * fix compile new string values Signed-off-by: tison <[email protected]> * fix compile set multi vars Signed-off-by: tison <[email protected]> * fix compile Subscript Signed-off-by: tison <[email protected]> * cargo fmt Signed-off-by: tison <[email protected]> * revert workaround on values Signed-off-by: tison <[email protected]> * Rework field access * update lock * fix doc * try catchup new sqlparser version Signed-off-by: tison <[email protected]> * fixup timezone expr Signed-off-by: tison <[email protected]> * fixup params Signed-off-by: tison <[email protected]> * lock Signed-off-by: tison <[email protected]> * Update to sqlparser 0.47.0 * Update rust stack size on windows * Revert "Update rust stack size on windows" This reverts commit b5743d5. * Add test + support for `$$` function definition * Disable failing windows CI test * fmt * simplify test * fmt --------- Signed-off-by: tison <[email protected]> Co-authored-by: Joey Hain <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 089b232 commit f054586

File tree

20 files changed

+502
-429
lines changed

20 files changed

+502
-429
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ rand = "0.8"
111111
regex = "1.8"
112112
rstest = "0.21.0"
113113
serde_json = "1"
114-
sqlparser = { version = "0.45.0", features = ["visitor"] }
114+
sqlparser = { version = "0.47", features = ["visitor"] }
115115
tempfile = "3"
116116
thiserror = "1.0.44"
117117
tokio = { version = "1.36", features = ["macros", "rt", "sync"] }

datafusion-cli/Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-cli/tests/cli_integration.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ fn init() {
2828
let _ = env_logger::try_init();
2929
}
3030

31+
// Disabled due to https://github.com/apache/datafusion/issues/10793
32+
#[cfg(not(target_family = "windows"))]
3133
#[rstest]
3234
#[case::exec_from_commands(
3335
["--command", "select 1", "--format", "json", "-q"],

datafusion-examples/examples/function_factory.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ impl TryFrom<CreateFunction> for ScalarFunctionWrapper {
212212
name: definition.name,
213213
expr: definition
214214
.params
215-
.return_
215+
.function_body
216216
.expect("Expression has to be defined!"),
217217
return_type: definition
218218
.return_type

datafusion/core/tests/user_defined/user_defined_scalar_functions.rs

Lines changed: 98 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,17 @@ use datafusion_common::cast::{as_float64_array, as_int32_array};
2828
use datafusion_common::tree_node::{Transformed, TreeNode};
2929
use datafusion_common::{
3030
assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, internal_err,
31-
not_impl_err, plan_err, DataFusionError, ExprSchema, Result, ScalarValue,
31+
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue,
3232
};
33-
use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
3433
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
3534
use datafusion_expr::{
36-
Accumulator, ColumnarValue, CreateFunction, ExprSchemable, LogicalPlanBuilder,
37-
ScalarUDF, ScalarUDFImpl, Signature, Volatility,
35+
Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, ExprSchemable,
36+
LogicalPlanBuilder, OperateFunctionArg, ScalarUDF, ScalarUDFImpl, Signature,
37+
Volatility,
3838
};
39+
use datafusion_functions_array::range::range_udf;
40+
use parking_lot::Mutex;
41+
use sqlparser::ast::Ident;
3942

4043
/// test that casting happens on udfs.
4144
/// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and
@@ -828,7 +831,7 @@ impl TryFrom<CreateFunction> for ScalarFunctionWrapper {
828831
name: definition.name,
829832
expr: definition
830833
.params
831-
.return_
834+
.function_body
832835
.expect("Expression has to be defined!"),
833836
return_type: definition
834837
.return_type
@@ -852,15 +855,7 @@ impl TryFrom<CreateFunction> for ScalarFunctionWrapper {
852855
#[tokio::test]
853856
async fn create_scalar_function_from_sql_statement() -> Result<()> {
854857
let function_factory = Arc::new(CustomFunctionFactory::default());
855-
let runtime_config = RuntimeConfig::new();
856-
let runtime_environment = RuntimeEnv::new(runtime_config)?;
857-
858-
let session_config = SessionConfig::new();
859-
let state =
860-
SessionState::new_with_config_rt(session_config, Arc::new(runtime_environment))
861-
.with_function_factory(function_factory.clone());
862-
863-
let ctx = SessionContext::new_with_state(state);
858+
let ctx = SessionContext::new().with_function_factory(function_factory.clone());
864859
let options = SQLOptions::new().with_allow_ddl(false);
865860

866861
let sql = r#"
@@ -926,6 +921,95 @@ async fn create_scalar_function_from_sql_statement() -> Result<()> {
926921
Ok(())
927922
}
928923

924+
/// Saves whatever is passed to it as a scalar function
925+
#[derive(Debug, Default)]
926+
struct RecordingFunctonFactory {
927+
calls: Mutex<Vec<CreateFunction>>,
928+
}
929+
930+
impl RecordingFunctonFactory {
931+
fn new() -> Self {
932+
Self::default()
933+
}
934+
935+
/// return all the calls made to the factory
936+
fn calls(&self) -> Vec<CreateFunction> {
937+
self.calls.lock().clone()
938+
}
939+
}
940+
941+
#[async_trait::async_trait]
942+
impl FunctionFactory for RecordingFunctonFactory {
943+
async fn create(
944+
&self,
945+
_state: &SessionState,
946+
statement: CreateFunction,
947+
) -> Result<RegisterFunction> {
948+
self.calls.lock().push(statement);
949+
950+
let udf = range_udf();
951+
Ok(RegisterFunction::Scalar(udf))
952+
}
953+
}
954+
955+
#[tokio::test]
956+
async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<()> {
957+
let function_factory = Arc::new(RecordingFunctonFactory::new());
958+
let ctx = SessionContext::new().with_function_factory(function_factory.clone());
959+
960+
let sql = r#"
961+
CREATE FUNCTION strlen(name TEXT)
962+
RETURNS int LANGUAGE plrust AS
963+
$$
964+
Ok(Some(name.unwrap().len() as i32))
965+
$$;
966+
"#;
967+
968+
let body = "
969+
Ok(Some(name.unwrap().len() as i32))
970+
";
971+
972+
match ctx.sql(sql).await {
973+
Ok(_) => {}
974+
Err(e) => {
975+
panic!("Error creating function: {}", e);
976+
}
977+
}
978+
979+
// verify that the call was passed through
980+
let calls = function_factory.calls();
981+
let schema = DFSchema::try_from(Schema::empty())?;
982+
assert_eq!(calls.len(), 1);
983+
let call = &calls[0];
984+
let expected = CreateFunction {
985+
or_replace: false,
986+
temporary: false,
987+
name: "strlen".into(),
988+
args: Some(vec![OperateFunctionArg {
989+
name: Some(Ident {
990+
value: "name".into(),
991+
quote_style: None,
992+
}),
993+
data_type: DataType::Utf8,
994+
default_expr: None,
995+
}]),
996+
return_type: Some(DataType::Int32),
997+
params: CreateFunctionBody {
998+
language: Some(Ident {
999+
value: "plrust".into(),
1000+
quote_style: None,
1001+
}),
1002+
behavior: None,
1003+
function_body: Some(lit(body)),
1004+
},
1005+
schema: Arc::new(schema),
1006+
};
1007+
1008+
assert_eq!(call, &expected);
1009+
1010+
Ok(())
1011+
}
1012+
9291013
fn create_udf_context() -> SessionContext {
9301014
let ctx = SessionContext::new();
9311015
// register a custom UDF

datafusion/expr/src/logical_plan/ddl.rs

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -341,29 +341,8 @@ pub struct CreateFunctionBody {
341341
pub language: Option<Ident>,
342342
/// IMMUTABLE | STABLE | VOLATILE
343343
pub behavior: Option<Volatility>,
344-
/// AS 'definition'
345-
pub as_: Option<DefinitionStatement>,
346-
/// RETURN expression
347-
pub return_: Option<Expr>,
348-
}
349-
350-
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
351-
pub enum DefinitionStatement {
352-
SingleQuotedDef(String),
353-
DoubleDollarDef(String),
354-
}
355-
356-
impl From<sqlparser::ast::FunctionDefinition> for DefinitionStatement {
357-
fn from(value: sqlparser::ast::FunctionDefinition) -> Self {
358-
match value {
359-
sqlparser::ast::FunctionDefinition::SingleQuotedDef(s) => {
360-
Self::SingleQuotedDef(s)
361-
}
362-
sqlparser::ast::FunctionDefinition::DoubleDollarDef(s) => {
363-
Self::DoubleDollarDef(s)
364-
}
365-
}
366-
}
344+
/// RETURN or AS function body
345+
pub function_body: Option<Expr>,
367346
}
368347

369348
#[derive(Clone, PartialEq, Eq, Hash, Debug)]

datafusion/expr/src/logical_plan/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ pub use builder::{
3030
};
3131
pub use ddl::{
3232
CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction,
33-
CreateFunctionBody, CreateMemoryTable, CreateView, DdlStatement, DefinitionStatement,
34-
DropCatalogSchema, DropFunction, DropTable, DropView, OperateFunctionArg,
33+
CreateFunctionBody, CreateMemoryTable, CreateView, DdlStatement, DropCatalogSchema,
34+
DropFunction, DropTable, DropView, OperateFunctionArg,
3535
};
3636
pub use dml::{DmlStatement, WriteOp};
3737
pub use plan::{

datafusion/sql/src/expr/binary_op.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
5151
BinaryOperator::PGBitwiseShiftRight => Ok(Operator::BitwiseShiftRight),
5252
BinaryOperator::PGBitwiseShiftLeft => Ok(Operator::BitwiseShiftLeft),
5353
BinaryOperator::StringConcat => Ok(Operator::StringConcat),
54+
BinaryOperator::ArrowAt => Ok(Operator::ArrowAt),
55+
BinaryOperator::AtArrow => Ok(Operator::AtArrow),
5456
_ => not_impl_err!("Unsupported SQL binary operator {op:?}"),
5557
}
5658
}

datafusion/sql/src/expr/function.rs

Lines changed: 122 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ use datafusion_expr::{
3030
BuiltInWindowFunction,
3131
};
3232
use sqlparser::ast::{
33-
Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, WindowType,
33+
DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg,
34+
FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments,
35+
NullTreatment, ObjectName, OrderByExpr, WindowType,
3436
};
3537
use std::str::FromStr;
3638
use strum::IntoEnumIterator;
@@ -79,23 +81,137 @@ fn find_closest_match(candidates: Vec<String>, target: &str) -> String {
7981
.expect("No candidates provided.") // Panic if `candidates` argument is empty
8082
}
8183

84+
/// Arguments to for a function call extracted from the SQL AST
85+
#[derive(Debug)]
86+
struct FunctionArgs {
87+
/// Function name
88+
name: ObjectName,
89+
/// Argument expressions
90+
args: Vec<FunctionArg>,
91+
/// ORDER BY clause, if any
92+
order_by: Vec<OrderByExpr>,
93+
/// OVER clause, if any
94+
over: Option<WindowType>,
95+
/// FILTER clause, if any
96+
filter: Option<Box<SQLExpr>>,
97+
/// NULL treatment clause, if any
98+
null_treatment: Option<NullTreatment>,
99+
/// DISTINCT
100+
distinct: bool,
101+
}
102+
103+
impl FunctionArgs {
104+
fn try_new(function: SQLFunction) -> Result<Self> {
105+
let SQLFunction {
106+
name,
107+
args,
108+
over,
109+
filter,
110+
mut null_treatment,
111+
within_group,
112+
} = function;
113+
114+
// Handle no argument form (aka `current_time` as opposed to `current_time()`)
115+
let FunctionArguments::List(args) = args else {
116+
return Ok(Self {
117+
name,
118+
args: vec![],
119+
order_by: vec![],
120+
over,
121+
filter,
122+
null_treatment,
123+
distinct: false,
124+
});
125+
};
126+
127+
let FunctionArgumentList {
128+
duplicate_treatment,
129+
args,
130+
clauses,
131+
} = args;
132+
133+
let distinct = match duplicate_treatment {
134+
Some(DuplicateTreatment::Distinct) => true,
135+
Some(DuplicateTreatment::All) => false,
136+
None => false,
137+
};
138+
139+
// Pull out argument handling
140+
let mut order_by = None;
141+
for clause in clauses {
142+
match clause {
143+
FunctionArgumentClause::IgnoreOrRespectNulls(nt) => {
144+
if null_treatment.is_some() {
145+
return not_impl_err!(
146+
"Calling {name}: Duplicated null treatment clause"
147+
);
148+
}
149+
null_treatment = Some(nt);
150+
}
151+
FunctionArgumentClause::OrderBy(oby) => {
152+
if order_by.is_some() {
153+
return not_impl_err!("Calling {name}: Duplicated ORDER BY clause in function arguments");
154+
}
155+
order_by = Some(oby);
156+
}
157+
FunctionArgumentClause::Limit(limit) => {
158+
return not_impl_err!(
159+
"Calling {name}: LIMIT not supported in function arguments: {limit}"
160+
)
161+
}
162+
FunctionArgumentClause::OnOverflow(overflow) => {
163+
return not_impl_err!(
164+
"Calling {name}: ON OVERFLOW not supported in function arguments: {overflow}"
165+
)
166+
}
167+
FunctionArgumentClause::Having(having) => {
168+
return not_impl_err!(
169+
"Calling {name}: HAVING not supported in function arguments: {having}"
170+
)
171+
}
172+
FunctionArgumentClause::Separator(sep) => {
173+
return not_impl_err!(
174+
"Calling {name}: SEPARATOR not supported in function arguments: {sep}"
175+
)
176+
}
177+
}
178+
}
179+
180+
if !within_group.is_empty() {
181+
return not_impl_err!("WITHIN GROUP is not supported yet: {within_group:?}");
182+
}
183+
184+
let order_by = order_by.unwrap_or_default();
185+
186+
Ok(Self {
187+
name,
188+
args,
189+
order_by,
190+
over,
191+
filter,
192+
null_treatment,
193+
distinct,
194+
})
195+
}
196+
}
197+
82198
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
83199
pub(super) fn sql_function_to_expr(
84200
&self,
85201
function: SQLFunction,
86202
schema: &DFSchema,
87203
planner_context: &mut PlannerContext,
88204
) -> Result<Expr> {
89-
let SQLFunction {
205+
let function_args = FunctionArgs::try_new(function)?;
206+
let FunctionArgs {
90207
name,
91208
args,
209+
order_by,
92210
over,
93-
distinct,
94211
filter,
95212
null_treatment,
96-
special: _, // true if not called with trailing parens
97-
order_by,
98-
} = function;
213+
distinct,
214+
} = function_args;
99215

100216
// If function is a window function (it has an OVER clause),
101217
// it shouldn't have ordering requirement as function argument

0 commit comments

Comments
 (0)