Skip to content

Commit a2eca29

Browse files
ClSlaidalamb
andauthored
Stop copying LogicalPlan and Exprs in ReplaceDistinctWithAggregate (#10460)
* patch: implement rewrite for RDWA Signed-off-by: cailue <[email protected]> * refactor: rewrite replace_distinct_aggregate Signed-off-by: 蔡略 <[email protected]> * patch: recorrect aggr_expr Signed-off-by: 蔡略 <[email protected]> * Update datafusion/optimizer/src/replace_distinct_aggregate.rs --------- Signed-off-by: cailue <[email protected]> Signed-off-by: 蔡略 <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 9cc981b commit a2eca29

File tree

1 file changed

+49
-24
lines changed

1 file changed

+49
-24
lines changed

datafusion/optimizer/src/replace_distinct_aggregate.rs

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
use crate::optimizer::{ApplyOrder, ApplyOrder::BottomUp};
2020
use crate::{OptimizerConfig, OptimizerRule};
2121

22-
use datafusion_common::{Column, Result};
22+
use datafusion_common::tree_node::Transformed;
23+
use datafusion_common::{internal_err, Column, Result};
24+
use datafusion_expr::expr_rewriter::normalize_cols;
2325
use datafusion_expr::utils::expand_wildcard;
2426
use datafusion_expr::{
2527
aggregate_function::AggregateFunction as AggregateFunctionFunc, col,
@@ -66,20 +68,24 @@ impl ReplaceDistinctWithAggregate {
6668
}
6769

6870
impl OptimizerRule for ReplaceDistinctWithAggregate {
69-
fn try_optimize(
71+
fn supports_rewrite(&self) -> bool {
72+
true
73+
}
74+
75+
fn rewrite(
7076
&self,
71-
plan: &LogicalPlan,
77+
plan: LogicalPlan,
7278
_config: &dyn OptimizerConfig,
73-
) -> Result<Option<LogicalPlan>> {
79+
) -> Result<Transformed<LogicalPlan>> {
7480
match plan {
7581
LogicalPlan::Distinct(Distinct::All(input)) => {
76-
let group_expr = expand_wildcard(input.schema(), input, None)?;
77-
let aggregate = LogicalPlan::Aggregate(Aggregate::try_new(
78-
input.clone(),
82+
let group_expr = expand_wildcard(input.schema(), &input, None)?;
83+
let aggr_plan = LogicalPlan::Aggregate(Aggregate::try_new(
84+
input,
7985
group_expr,
8086
vec![],
8187
)?);
82-
Ok(Some(aggregate))
88+
Ok(Transformed::yes(aggr_plan))
8389
}
8490
LogicalPlan::Distinct(Distinct::On(DistinctOn {
8591
select_expr,
@@ -88,13 +94,15 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
8894
input,
8995
schema,
9096
})) => {
97+
let expr_cnt = on_expr.len();
98+
9199
// Construct the aggregation expression to be used to fetch the selected expressions.
92100
let aggr_expr = select_expr
93-
.iter()
101+
.into_iter()
94102
.map(|e| {
95103
Expr::AggregateFunction(AggregateFunction::new(
96104
AggregateFunctionFunc::FirstValue,
97-
vec![e.clone()],
105+
vec![e],
98106
false,
99107
None,
100108
sort_expr.clone(),
@@ -103,45 +111,62 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
103111
})
104112
.collect::<Vec<Expr>>();
105113

114+
let aggr_expr = normalize_cols(aggr_expr, input.as_ref())?;
115+
let group_expr = normalize_cols(on_expr, input.as_ref())?;
116+
106117
// Build the aggregation plan
107-
let plan = LogicalPlanBuilder::from(input.as_ref().clone())
108-
.aggregate(on_expr.clone(), aggr_expr.to_vec())?
109-
.build()?;
118+
let plan = LogicalPlan::Aggregate(Aggregate::try_new(
119+
input, group_expr, aggr_expr,
120+
)?);
121+
// TODO use LogicalPlanBuilder directly rather than recreating the Aggregate
122+
// when https://github.com/apache/datafusion/issues/10485 is available
123+
let lpb = LogicalPlanBuilder::from(plan);
110124

111-
let plan = if let Some(sort_expr) = sort_expr {
125+
let plan = if let Some(mut sort_expr) = sort_expr {
112126
// While sort expressions were used in the `FIRST_VALUE` aggregation itself above,
113127
// this on it's own isn't enough to guarantee the proper output order of the grouping
114128
// (`ON`) expression, so we need to sort those as well.
115-
LogicalPlanBuilder::from(plan)
116-
.sort(sort_expr[..on_expr.len()].to_vec())?
117-
.build()?
129+
130+
// truncate the sort_expr to the length of on_expr
131+
sort_expr.truncate(expr_cnt);
132+
133+
lpb.sort(sort_expr)?.build()?
118134
} else {
119-
plan
135+
lpb.build()?
120136
};
121137

122138
// Whereas the aggregation plan by default outputs both the grouping and the aggregation
123139
// expressions, for `DISTINCT ON` we only need to emit the original selection expressions.
140+
124141
let project_exprs = plan
125142
.schema()
126143
.iter()
127-
.skip(on_expr.len())
144+
.skip(expr_cnt)
128145
.zip(schema.iter())
129146
.map(|((new_qualifier, new_field), (old_qualifier, old_field))| {
130-
Ok(col(Column::from((new_qualifier, new_field)))
131-
.alias_qualified(old_qualifier.cloned(), old_field.name()))
147+
col(Column::from((new_qualifier, new_field)))
148+
.alias_qualified(old_qualifier.cloned(), old_field.name())
132149
})
133-
.collect::<Result<Vec<Expr>>>()?;
150+
.collect::<Vec<Expr>>();
134151

135152
let plan = LogicalPlanBuilder::from(plan)
136153
.project(project_exprs)?
137154
.build()?;
138155

139-
Ok(Some(plan))
156+
Ok(Transformed::yes(plan))
140157
}
141-
_ => Ok(None),
158+
_ => Ok(Transformed::no(plan)),
142159
}
143160
}
144161

162+
fn try_optimize(
163+
&self,
164+
_plan: &LogicalPlan,
165+
_config: &dyn OptimizerConfig,
166+
) -> Result<Option<LogicalPlan>> {
167+
internal_err!("Should have called ReplaceDistinctWithAggregate::rewrite")
168+
}
169+
145170
fn name(&self) -> &str {
146171
"replace_distinct_aggregate"
147172
}

0 commit comments

Comments
 (0)