Skip to content

Remove inline table scan analyzer rule #15201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions datafusion/core/tests/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1571,14 +1571,18 @@ async fn with_column_join_same_columns() -> Result<()> {

assert_snapshot!(
df_with_column.logical_plan(),
@r###"
@r"
Projection: t1.c1, t2.c1, Boolean(true) AS new_column
Limit: skip=0, fetch=1
Sort: t1.c1 ASC NULLS FIRST
Inner Join: t1.c1 = t2.c1
TableScan: t1
TableScan: t2
"###
SubqueryAlias: t1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am concerned about this change -- it seems like a regression to me.

Maybe it is ok, but I don't understand the difference/ why there are a bunch of projections / SubqueryAlias nows

If these are fixed by physical plan time it is fine, but from here it looks like something is wrong

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I explain it in the above #15201 (comment)

Projection: aggregate_test_100.c1
TableScan: aggregate_test_100
SubqueryAlias: t2
Projection: aggregate_test_100.c1
TableScan: aggregate_test_100
"
);

assert_snapshot!(
Expand Down Expand Up @@ -1738,14 +1742,18 @@ async fn with_column_renamed_join() -> Result<()> {

assert_snapshot!(
df_renamed.logical_plan(),
@r###"
@r"
Projection: t1.c1 AS AAA, t1.c2, t1.c3, t2.c1, t2.c2, t2.c3
Limit: skip=0, fetch=1
Sort: t1.c1 ASC NULLS FIRST, t1.c2 ASC NULLS FIRST, t1.c3 ASC NULLS FIRST, t2.c1 ASC NULLS FIRST, t2.c2 ASC NULLS FIRST, t2.c3 ASC NULLS FIRST
Inner Join: t1.c1 = t2.c1
TableScan: t1
TableScan: t2
"###
SubqueryAlias: t1
Projection: aggregate_test_100.c1, aggregate_test_100.c2, aggregate_test_100.c3
TableScan: aggregate_test_100
SubqueryAlias: t2
Projection: aggregate_test_100.c1, aggregate_test_100.c2, aggregate_test_100.c3
TableScan: aggregate_test_100
"
);

assert_snapshot!(
Expand Down
38 changes: 32 additions & 6 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -465,9 +465,7 @@ impl LogicalPlanBuilder {
projection: Option<Vec<usize>>,
filters: Vec<Expr>,
) -> Result<Self> {
TableScan::try_new(table_name, table_source, projection, filters, None)
.map(LogicalPlan::TableScan)
.map(Self::new)
Self::scan_with_filters_inner(table_name, table_source, projection, filters, None)
}

/// Convert a table provider into a builder with a TableScan with filter and fetch
Expand All @@ -478,9 +476,37 @@ impl LogicalPlanBuilder {
filters: Vec<Expr>,
fetch: Option<usize>,
) -> Result<Self> {
TableScan::try_new(table_name, table_source, projection, filters, fetch)
.map(LogicalPlan::TableScan)
.map(Self::new)
Self::scan_with_filters_inner(
table_name,
table_source,
projection,
filters,
fetch,
)
}

fn scan_with_filters_inner(
table_name: impl Into<TableReference>,
table_source: Arc<dyn TableSource>,
projection: Option<Vec<usize>>,
filters: Vec<Expr>,
fetch: Option<usize>,
) -> Result<Self> {
let table_scan =
TableScan::try_new(table_name, table_source, projection, filters, fetch)?;

// Inline TableScan
if table_scan.filters.is_empty() {
if let Some(p) = table_scan.source.get_logical_plan() {
let sub_plan = p.into_owned();
// Ensures that the reference to the inlined table remains the
// same, meaning we don't have to change any of the parent nodes
// that reference this table.
return Self::new(sub_plan).alias(table_scan.table_name);
}
}

Ok(Self::new(LogicalPlan::TableScan(table_scan)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like the same logic should apply to scan_with_filters_fetch below

}

/// Wrap a plan in a window
Expand Down
207 changes: 0 additions & 207 deletions datafusion/optimizer/src/analyzer/inline_table_scan.rs

This file was deleted.

3 changes: 0 additions & 3 deletions datafusion/optimizer/src/analyzer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@ use datafusion_common::Result;
use datafusion_expr::expr_rewriter::FunctionRewrite;
use datafusion_expr::{InvariantLevel, LogicalPlan};

use crate::analyzer::inline_table_scan::InlineTableScan;
use crate::analyzer::resolve_grouping_function::ResolveGroupingFunction;
use crate::analyzer::type_coercion::TypeCoercion;
use crate::utils::log_plan;

use self::function_rewrite::ApplyFunctionRewrites;

pub mod function_rewrite;
pub mod inline_table_scan;
pub mod resolve_grouping_function;
pub mod type_coercion;

Expand Down Expand Up @@ -96,7 +94,6 @@ impl Analyzer {
/// Create a new analyzer using the recommended list of rules
pub fn new() -> Self {
let rules: Vec<Arc<dyn AnalyzerRule + Send + Sync>> = vec![
Arc::new(InlineTableScan::new()),
Arc::new(ResolveGroupingFunction::new()),
Arc::new(TypeCoercion::new()),
];
Expand Down
26 changes: 26 additions & 0 deletions datafusion/sqllogictest/test_files/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -855,3 +855,29 @@ DROP TABLE t1;

statement ok
DROP TABLE t2;

statement count 0
create table t(a int) as values (1), (2), (3);

statement count 0
create view v as select a, count(a) from t group by a;

query II rowsort
select * from v;
----
1 1
2 1
3 1

query II rowsort
select "count(t.a)", a from v;
----
1 1
1 2
1 3

statement count 0
drop view v;

statement count 0
drop table t;
1 change: 0 additions & 1 deletion datafusion/sqllogictest/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ EXPLAIN VERBOSE SELECT a, b, c FROM simple_explain_test
initial_logical_plan
01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
02)--TableScan: simple_explain_test
logical_plan after inline_table_scan SAME TEXT AS ABOVE
logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
logical_plan after type_coercion SAME TEXT AS ABOVE
analyzed_logical_plan SAME TEXT AS ABOVE
Expand Down