Skip to content

Commit 2c69bd4

Browse files
committed
Support unparsing plans after applying optimize_projections rule (#55)
1 parent a6586cc commit 2c69bd4

File tree

5 files changed

+72
-30
lines changed

5 files changed

+72
-30
lines changed

datafusion/common/src/config.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,10 @@ config_namespace! {
636636
/// then the output will be coerced to a non-view.
637637
/// Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`.
638638
pub expand_views_at_output: bool, default = false
639+
640+
/// When set to true, the `optimize_projections` rule will not attempt to move, add, or remove existing projections.
641+
/// This flag helps maintain the original structure of the `LogicalPlan` when converting it back into SQL via the `unparser` module. It ensures the query layout remains simple and readable, relying on the underlying SQL engine to apply its own optimizations during execution.
642+
pub optimize_projections_preserve_existing_projections: bool, default = false
639643
}
640644
}
641645

datafusion/execution/src/config.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,15 @@ impl SessionConfig {
338338
self
339339
}
340340

341+
/// When set to true, the `optimize_projections` rule will not attempt to move, add, or remove existing projections.
342+
/// This is useful when optimization is used alongside unparsing logic to preserve the original layout and simplify the overall query structure.
343+
///
344+
/// [optimize_projections_preserve_existing_projections]: datafusion_common::config::OptimizerOptions::optimize_projections_preserve_existing_projections
345+
pub fn with_optimize_projections_preserve_existing_projections(mut self, enabled: bool) -> Self {
346+
self.options.optimizer.optimize_projections_preserve_existing_projections = enabled;
347+
self
348+
}
349+
341350
/// Enables or disables the use of pruning predicate for parquet readers to skip row groups
342351
pub fn with_parquet_pruning(mut self, enabled: bool) -> Self {
343352
self.options.execution.parquet.pruning = enabled;

datafusion/optimizer/src/optimize_projections/mod.rs

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ fn optimize_projections(
189189
// that its input only contains absolutely necessary columns for
190190
// the aggregate expressions. Note that necessary_indices refer to
191191
// fields in `aggregate.input.schema()`.
192-
add_projection_on_top_if_helpful(aggregate_input, necessary_exprs)
192+
add_projection_on_top_if_helpful(aggregate_input, necessary_exprs, config)
193193
})?
194194
.map_data(|aggregate_input| {
195195
// Create a new aggregate plan with the updated input and only the
@@ -233,9 +233,14 @@ fn optimize_projections(
233233
// refers to that schema
234234
let required_exprs =
235235
required_indices.get_required_exprs(&input_schema);
236-
let window_child =
237-
add_projection_on_top_if_helpful(window_child, required_exprs)?
238-
.data;
236+
237+
let window_child = add_projection_on_top_if_helpful(
238+
window_child,
239+
required_exprs,
240+
config,
241+
)?
242+
.data;
243+
239244
Window::try_new(new_window_expr, Arc::new(window_child))
240245
.map(LogicalPlan::Window)
241246
.map(Transformed::yes)
@@ -409,7 +414,7 @@ fn optimize_projections(
409414
optimize_projections(child, config, required_indices)?.transform_data(
410415
|new_input| {
411416
if projection_beneficial {
412-
add_projection_on_top_if_helpful(new_input, project_exprs)
417+
add_projection_on_top_if_helpful(new_input, project_exprs, config)
413418
} else {
414419
Ok(Transformed::no(new_input))
415420
}
@@ -708,16 +713,23 @@ fn split_join_requirements(
708713
///
709714
/// * `plan` - The input `LogicalPlan` to potentially add a projection to.
710715
/// * `project_exprs` - A list of expressions for the projection.
716+
/// * `config` - A reference to the optimizer configuration.
711717
///
712718
/// # Returns
713719
///
714720
/// A `Transformed` indicating if a projection was added
715721
fn add_projection_on_top_if_helpful(
716722
plan: LogicalPlan,
717723
project_exprs: Vec<Expr>,
724+
config: &dyn OptimizerConfig,
718725
) -> Result<Transformed<LogicalPlan>> {
719726
// Make sure projection decreases the number of columns, otherwise it is unnecessary.
720-
if project_exprs.len() >= plan.schema().fields().len() {
727+
if config
728+
.options()
729+
.optimizer
730+
.optimize_projections_preserve_existing_projections
731+
|| project_exprs.len() >= plan.schema().fields().len()
732+
{
721733
Ok(Transformed::no(plan))
722734
} else {
723735
Projection::try_new(project_exprs, Arc::new(plan))
@@ -759,7 +771,7 @@ fn rewrite_projection_given_requirements(
759771
// projection down
760772
optimize_projections(Arc::unwrap_or_clone(input), config, required_indices)?
761773
.transform_data(|input| {
762-
if is_projection_unnecessary(&input, &exprs_used)? {
774+
if is_projection_unnecessary(&input, &exprs_used, config)? {
763775
Ok(Transformed::yes(input))
764776
} else {
765777
Projection::try_new(exprs_used, Arc::new(input))
@@ -770,9 +782,15 @@ fn rewrite_projection_given_requirements(
770782
}
771783

772784
/// Projection is unnecessary, when
785+
/// - `optimize_projections_preserve_existing_projections` optimizer config is false, and
773786
/// - input schema of the projection, output schema of the projection are same, and
774787
/// - all projection expressions are either Column or Literal
775788
fn is_projection_unnecessary(input: &LogicalPlan, proj_exprs: &[Expr]) -> Result<bool> {
789+
!config
790+
.options()
791+
.optimizer
792+
.optimize_projections_preserve_existing_projections
793+
776794
let proj_schema = projection_schema(input, proj_exprs)?;
777795
Ok(&proj_schema == input.schema() && proj_exprs.iter().all(is_expr_trivial))
778796
}

datafusion/sql/src/unparser/plan.rs

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ impl Unparser<'_> {
279279
match plan {
280280
LogicalPlan::TableScan(scan) => {
281281
if let Some(unparsed_table_scan) =
282-
Self::unparse_table_scan_pushdown(plan, None)?
282+
Self::unparse_table_scan_pushdown(plan, None, select.already_projected())?
283283
{
284284
return self.select_to_sql_recursively(
285285
&unparsed_table_scan,
@@ -567,7 +567,8 @@ impl Unparser<'_> {
567567
let unparsed_table_scan = Self::unparse_table_scan_pushdown(
568568
plan,
569569
Some(plan_alias.alias.clone()),
570-
)?;
570+
select.already_projected(),
571+
)?;
571572
// if the child plan is a TableScan with pushdown operations, we don't need to
572573
// create an additional subquery for it
573574
if !select.already_projected() && unparsed_table_scan.is_none() {
@@ -696,6 +697,7 @@ impl Unparser<'_> {
696697
fn unparse_table_scan_pushdown(
697698
plan: &LogicalPlan,
698699
alias: Option<TableReference>,
700+
already_projected: bool,
699701
) -> Result<Option<LogicalPlan>> {
700702
match plan {
701703
LogicalPlan::TableScan(table_scan) => {
@@ -725,24 +727,29 @@ impl Unparser<'_> {
725727
}
726728
}
727729

728-
if let Some(project_vec) = &table_scan.projection {
729-
let project_columns = project_vec
730-
.iter()
731-
.cloned()
732-
.map(|i| {
733-
let schema = table_scan.source.schema();
734-
let field = schema.field(i);
735-
if alias.is_some() {
736-
Column::new(alias.clone(), field.name().clone())
737-
} else {
738-
Column::new(
739-
Some(table_scan.table_name.clone()),
740-
field.name().clone(),
741-
)
742-
}
743-
})
744-
.collect::<Vec<_>>();
745-
builder = builder.project(project_columns)?;
730+
// Avoid creating a duplicate Projection node, which would result in an additional subquery if a projection already exists.
731+
// For example, if the `optimize_projection` rule is applied, there will be a Projection node, and duplicate projection
732+
// information included in the TableScan node.
733+
if !already_projected {
734+
if let Some(project_vec) = &table_scan.projection {
735+
let project_columns = project_vec
736+
.iter()
737+
.cloned()
738+
.map(|i| {
739+
let schema = table_scan.source.schema();
740+
let field = schema.field(i);
741+
if alias.is_some() {
742+
Column::new(alias.clone(), field.name().clone())
743+
} else {
744+
Column::new(
745+
Some(table_scan.table_name.clone()),
746+
field.name().clone(),
747+
)
748+
}
749+
})
750+
.collect::<Vec<_>>();
751+
builder = builder.project(project_columns)?;
752+
}
746753
}
747754

748755
let filter_expr: Result<Option<Expr>> = table_scan
@@ -787,14 +794,17 @@ impl Unparser<'_> {
787794
Self::unparse_table_scan_pushdown(
788795
&subquery_alias.input,
789796
Some(subquery_alias.alias.clone()),
797+
already_projected,
790798
)
791799
}
792800
// SubqueryAlias could be rewritten to a plan with a projection as the top node by [rewrite::subquery_alias_inner_query_and_columns].
793801
// The inner table scan could be a scan with pushdown operations.
794802
LogicalPlan::Projection(projection) => {
795-
if let Some(plan) =
796-
Self::unparse_table_scan_pushdown(&projection.input, alias.clone())?
797-
{
803+
if let Some(plan) = Self::unparse_table_scan_pushdown(
804+
&projection.input,
805+
alias.clone(),
806+
already_projected,
807+
)? {
798808
let exprs = if alias.is_some() {
799809
let mut alias_rewriter =
800810
alias.as_ref().map(|alias_name| TableAliasRewriter {

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,7 @@ fn test_table_scan_pushdown() -> Result<()> {
882882
let query_from_table_scan_with_projection = LogicalPlanBuilder::from(
883883
table_scan(Some("t1"), &schema, Some(vec![0, 1]))?.build()?,
884884
)
885+
.project(vec![col("id"), col("age")])?
885886
.project(vec![wildcard()])?
886887
.build()?;
887888
let query_from_table_scan_with_projection =

0 commit comments

Comments
 (0)