Skip to content

Commit f775791

Browse files
move projection pushdown optimization logic to ExecutionPlan trait (#14235)
* first iteration * wrap up rest of them * refactor mroe * fix the tests * minor * test changes * Update projection.rs * investigate further * revert test changes with the fix * Update projection_pushdown.rs * minor fmt * fix imports --------- Co-authored-by: berkaysynnada <[email protected]>
1 parent 18f14ab commit f775791

29 files changed

+1461
-1442
lines changed

datafusion/core/src/datasource/physical_plan/csv.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ use datafusion_execution::TaskContext;
4545
use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
4646

4747
use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
48+
use datafusion_physical_plan::projection::{
49+
all_alias_free_columns, new_projections_for_columns, ProjectionExec,
50+
};
4851
use futures::{StreamExt, TryStreamExt};
4952
use object_store::buffered::BufWriter;
5053
use object_store::{GetOptions, GetResultPayload, ObjectStore};
@@ -479,6 +482,36 @@ impl ExecutionPlan for CsvExec {
479482
cache: self.cache.clone(),
480483
}))
481484
}
485+
486+
fn try_swapping_with_projection(
487+
&self,
488+
projection: &ProjectionExec,
489+
) -> Result<Option<Arc<dyn ExecutionPlan>>> {
490+
// If there is any non-column or alias-carrier expression, Projection should not be removed.
491+
// This process can be moved into CsvExec, but it would be an overlap of their responsibility.
492+
Ok(all_alias_free_columns(projection.expr()).then(|| {
493+
let mut file_scan = self.base_config().clone();
494+
let new_projections = new_projections_for_columns(
495+
projection,
496+
&file_scan
497+
.projection
498+
.unwrap_or((0..self.schema().fields().len()).collect()),
499+
);
500+
file_scan.projection = Some(new_projections);
501+
502+
Arc::new(
503+
CsvExec::builder(file_scan)
504+
.with_has_header(self.has_header())
505+
.with_delimeter(self.delimiter())
506+
.with_quote(self.quote())
507+
.with_escape(self.escape())
508+
.with_comment(self.comment())
509+
.with_newlines_in_values(self.newlines_in_values())
510+
.with_file_compression_type(self.file_compression_type)
511+
.build(),
512+
) as _
513+
}))
514+
}
482515
}
483516

484517
/// A Config for [`CsvOpener`]

0 commit comments

Comments
 (0)