Skip to content

Commit e47c91e

Browse files
committed
[HSTACK] fix: compound_field_access doesn't identifier qualifier. apache#15153
1 parent be8aeee commit e47c91e

File tree

3 files changed

+132
-5
lines changed

3 files changed

+132
-5
lines changed

datafusion/core/tests/sql/select.rs

+41
Original file line numberDiff line numberDiff line change
@@ -350,3 +350,44 @@ async fn test_version_function() {
350350

351351
assert_eq!(version.value(0), expected_version);
352352
}
353+
#[tokio::test]
354+
async fn test_subscript() -> Result<()> {
355+
let ctx = SessionContext::new();
356+
ctx.sql(
357+
r#"CREATE TABLE test
358+
(struct_field STRUCT<substruct INT>)
359+
"#,
360+
)
361+
.await?;
362+
ctx.sql(r#"INSERT INTO test VALUES (STRUCT(1))"#).await?;
363+
364+
let df = ctx
365+
.sql(
366+
r#"SELECT *
367+
FROM test AS test1, test AS test2 WHERE
368+
test1.struct_field['substruct'] = test2.struct_field['substruct']"#,
369+
)
370+
.await?;
371+
let _ = df.collect().await?;
372+
373+
ctx.sql(
374+
r#"CREATE TABLE testx
375+
(struct_field STRUCT<substruct STRUCT<subsubstruct INT>>)
376+
"#,
377+
)
378+
.await?;
379+
ctx.sql(r#"INSERT INTO testx VALUES (STRUCT(STRUCT(1)))"#)
380+
.await?;
381+
382+
let df = ctx.sql(r#"SELECT *
383+
FROM testx AS test1, testx AS test2 WHERE
384+
test1.struct_field.substruct['subsubstruct'] = test2.struct_field.substruct['subsubstruct']"#).await?;
385+
let _ = df.collect().await?;
386+
387+
let df = ctx.sql(r#"SELECT *
388+
FROM testx AS test1, testx AS test2 WHERE
389+
test1.struct_field['substruct']['subsubstruct'] = test2.struct_field['substruct']['subsubstruct']"#).await?;
390+
let _ = df.collect().await?;
391+
392+
Ok(())
393+
}

datafusion/sql/src/expr/identifier.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ fn form_identifier(idents: &[String]) -> Result<(Option<TableReference>, &String
290290
}
291291
}
292292

293-
fn search_dfschema<'ids, 'schema>(
293+
pub(crate) fn search_dfschema<'ids, 'schema>(
294294
ids: &'ids [String],
295295
schema: &'schema DFSchema,
296296
) -> Option<(

datafusion/sql/src/expr/mod.rs

+90-4
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ use datafusion_expr::planner::{
2121
};
2222
use sqlparser::ast::{
2323
AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType,
24-
DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry,
25-
StructField, Subscript, TrimWhereField, Value,
24+
DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, Ident, MapEntry,
25+
Spanned, StructField, Subscript, TrimWhereField, Value,
2626
};
2727

2828
use datafusion_common::{
2929
internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, DFSchema,
30-
Result, ScalarValue,
30+
Result, ScalarValue, Span,
3131
};
3232

3333
use datafusion_expr::expr::ScalarFunction;
@@ -983,14 +983,100 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
983983
Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
984984
}
985985

986+
/// Extracts the root expression and access chain from a compound expression.
987+
///
988+
/// This function attempts to identify if a compound expression (like `a.b.c`) should be treated
989+
/// as a column reference with a qualifier (like `table.column`) or as a field access expression.
990+
///
991+
/// # Arguments
992+
///
993+
/// * `root` - The root SQL expression (e.g., the first part of `a.b.c`)
994+
/// * `access_chain` - Vector of access expressions (e.g., `.b` and `.c` parts)
995+
/// * `schema` - The schema to resolve column references against
996+
/// * `planner_context` - Context for planning expressions
997+
///
998+
/// # Returns
999+
///
1000+
/// A tuple containing:
1001+
/// * The resolved root expression
1002+
/// * The remaining access chain that should be processed as field accesses
1003+
fn extract_root_and_access_chain(
1004+
&self,
1005+
root: SQLExpr,
1006+
access_chain: Vec<AccessExpr>,
1007+
schema: &DFSchema,
1008+
planner_context: &mut PlannerContext,
1009+
) -> Result<(Expr, Vec<AccessExpr>)> {
1010+
if let SQLExpr::Identifier(Ident { value: id, .. }) = &root {
1011+
let mut ids = vec![id.clone()];
1012+
for access in &access_chain {
1013+
if let AccessExpr::Dot(SQLExpr::Identifier(Ident { value: id, .. })) =
1014+
access
1015+
{
1016+
ids.push(id.clone());
1017+
} else {
1018+
break;
1019+
}
1020+
}
1021+
1022+
if ids.len() > 1 {
1023+
// maybe it's a compound identifier
1024+
if let Some((field, Some(qualifier), nested_names)) =
1025+
identifier::search_dfschema(&ids, schema)
1026+
{
1027+
let span_num = ids.len() - nested_names.len();
1028+
let mut idx = 0;
1029+
let mut spans = vec![];
1030+
if let Some(s) = Span::try_from_sqlparser_span(root.span()) {
1031+
spans.push(s);
1032+
}
1033+
idx += 1;
1034+
1035+
let mut nested_access_chain = vec![];
1036+
for access in &access_chain {
1037+
if idx < span_num {
1038+
idx += 1;
1039+
if let AccessExpr::Dot(expr) = access {
1040+
if let Some(s) =
1041+
Span::try_from_sqlparser_span(expr.span())
1042+
{
1043+
spans.push(s);
1044+
}
1045+
} else {
1046+
unreachable!();
1047+
}
1048+
} else {
1049+
nested_access_chain.push(access.clone());
1050+
}
1051+
}
1052+
1053+
let root = Expr::Column(Column {
1054+
name: field.name().clone(),
1055+
relation: Some(qualifier.clone()),
1056+
spans: datafusion_common::Spans(spans),
1057+
});
1058+
let access_chain = nested_access_chain;
1059+
return Ok((root, access_chain));
1060+
}
1061+
}
1062+
}
1063+
let root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
1064+
Ok((root, access_chain))
1065+
}
1066+
9861067
fn sql_compound_field_access_to_expr(
9871068
&self,
9881069
root: SQLExpr,
9891070
access_chain: Vec<AccessExpr>,
9901071
schema: &DFSchema,
9911072
planner_context: &mut PlannerContext,
9921073
) -> Result<Expr> {
993-
let mut root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
1074+
let (mut root, access_chain) = self.extract_root_and_access_chain(
1075+
root,
1076+
access_chain,
1077+
schema,
1078+
planner_context,
1079+
)?;
9941080
let fields = access_chain
9951081
.into_iter()
9961082
.map(|field| match field {

0 commit comments

Comments
 (0)