Skip to content

Commit 30ff48e

Browse files
authored
Move Pruning into physical-optimizer crate (#13485)
* Move `Pruning` into `physical-optimizer` crate * fix check * fix issues * cargo update
1 parent c3681dc commit 30ff48e

File tree

5 files changed

+56
-54
lines changed

5 files changed

+56
-54
lines changed

datafusion-cli/Cargo.lock

Lines changed: 33 additions & 31 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/src/physical_optimizer/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ pub mod enforce_sorting;
2727
pub mod join_selection;
2828
pub mod optimizer;
2929
pub mod projection_pushdown;
30-
pub mod pruning;
3130
pub mod replace_with_order_preserving_variants;
3231
pub mod sanity_checker;
3332
#[cfg(test)]

datafusion/physical-optimizer/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,15 @@ arrow-schema = { workspace = true }
3737
datafusion-common = { workspace = true, default-features = true }
3838
datafusion-execution = { workspace = true }
3939
datafusion-expr-common = { workspace = true, default-features = true }
40+
datafusion-optimizer = { workspace = true }
4041
datafusion-physical-expr = { workspace = true }
4142
datafusion-physical-plan = { workspace = true }
4243
itertools = { workspace = true }
44+
log = { workspace = true }
4345
recursive = { workspace = true }
4446

4547
[dev-dependencies]
48+
datafusion-expr = { workspace = true }
4649
datafusion-functions-aggregate = { workspace = true }
50+
datafusion-functions-nested = { workspace = true }
4751
tokio = { workspace = true }

datafusion/physical-optimizer/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub mod limit_pushdown;
2525
pub mod limited_distinct_aggregation;
2626
mod optimizer;
2727
pub mod output_requirements;
28+
pub mod pruning;
2829
pub mod topk_aggregation;
2930
pub mod update_aggr_exprs;
3031

datafusion/core/src/physical_optimizer/pruning.rs renamed to datafusion/physical-optimizer/src/pruning.rs

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,30 @@
1818
//! [`PruningPredicate`] to apply filter [`Expr`] to prune "containers"
1919
//! based on statistics (e.g. Parquet Row Groups)
2020
//!
21-
//! [`Expr`]: crate::prelude::Expr
21+
//! [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html
2222
use std::collections::HashSet;
2323
use std::sync::Arc;
2424

25-
use crate::{
26-
common::{Column, DFSchema},
27-
error::{DataFusionError, Result},
28-
logical_expr::Operator,
29-
physical_plan::{ColumnarValue, PhysicalExpr},
30-
};
31-
25+
use arrow::array::AsArray;
3226
use arrow::{
3327
array::{new_null_array, ArrayRef, BooleanArray},
3428
datatypes::{DataType, Field, Schema, SchemaRef},
3529
record_batch::{RecordBatch, RecordBatchOptions},
3630
};
37-
use arrow_array::cast::AsArray;
31+
use log::trace;
32+
33+
use datafusion_common::error::{DataFusionError, Result};
3834
use datafusion_common::tree_node::TransformedResult;
3935
use datafusion_common::{
4036
internal_err, plan_datafusion_err, plan_err,
4137
tree_node::{Transformed, TreeNode},
4238
ScalarValue,
4339
};
40+
use datafusion_common::{Column, DFSchema};
41+
use datafusion_expr_common::operator::Operator;
4442
use datafusion_physical_expr::utils::{collect_columns, Guarantee, LiteralGuarantee};
4543
use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef};
46-
47-
use log::trace;
44+
use datafusion_physical_plan::{ColumnarValue, PhysicalExpr};
4845

4946
/// A source of runtime statistical information to [`PruningPredicate`]s.
5047
///
@@ -567,7 +564,7 @@ impl PruningPredicate {
567564
/// expressions like `b = false`, but it does handle the
568565
/// simplified version `b`. See [`ExprSimplifier`] to simplify expressions.
569566
///
570-
/// [`ExprSimplifier`]: crate::optimizer::simplify_expressions::ExprSimplifier
567+
/// [`ExprSimplifier`]: https://docs.rs/datafusion/latest/datafusion/optimizer/simplify_expressions/struct.ExprSimplifier.html
571568
pub fn prune<S: PruningStatistics>(&self, statistics: &S) -> Result<Vec<bool>> {
572569
let mut builder = BoolVecBuilder::new(statistics.num_containers());
573570

@@ -653,7 +650,7 @@ impl PruningPredicate {
653650

654651
// this is only used by `parquet` feature right now
655652
#[allow(dead_code)]
656-
pub(crate) fn required_columns(&self) -> &RequiredColumns {
653+
pub fn required_columns(&self) -> &RequiredColumns {
657654
&self.required_columns
658655
}
659656

@@ -762,7 +759,7 @@ fn is_always_true(expr: &Arc<dyn PhysicalExpr>) -> bool {
762759
/// Handles creating references to the min/max statistics
763760
/// for columns as well as recording which statistics are needed
764761
#[derive(Debug, Default, Clone)]
765-
pub(crate) struct RequiredColumns {
762+
pub struct RequiredColumns {
766763
/// The statistics required to evaluate this predicate:
767764
/// * The unqualified column in the input schema
768765
/// * Statistics type (e.g. Min or Max or Null_Count)
@@ -786,7 +783,7 @@ impl RequiredColumns {
786783
/// * `true` returns None
787784
#[allow(dead_code)]
788785
// this fn is only used by `parquet` feature right now, thus the `allow(dead_code)`
789-
pub(crate) fn single_column(&self) -> Option<&phys_expr::Column> {
786+
pub fn single_column(&self) -> Option<&phys_expr::Column> {
790787
if self.columns.windows(2).all(|w| {
791788
// check if all columns are the same (ignoring statistics and field)
792789
let c1 = &w[0].0;
@@ -1664,15 +1661,14 @@ mod tests {
16641661
use std::ops::{Not, Rem};
16651662

16661663
use super::*;
1667-
use crate::assert_batches_eq;
1668-
use crate::logical_expr::{col, lit};
1664+
use datafusion_common::assert_batches_eq;
1665+
use datafusion_expr::{col, lit};
16691666

16701667
use arrow::array::Decimal128Array;
16711668
use arrow::{
1672-
array::{BinaryArray, Int32Array, Int64Array, StringArray},
1669+
array::{BinaryArray, Int32Array, Int64Array, StringArray, UInt64Array},
16731670
datatypes::TimeUnit,
16741671
};
1675-
use arrow_array::UInt64Array;
16761672
use datafusion_expr::expr::InList;
16771673
use datafusion_expr::{cast, is_null, try_cast, Expr};
16781674
use datafusion_functions_nested::expr_fn::{array_has, make_array};
@@ -3536,7 +3532,7 @@ mod tests {
35363532
// more complex case with unknown column
35373533
let input = known_expression.clone().and(input.clone());
35383534
let expected = phys_expr::BinaryExpr::new(
3539-
known_expression_transformed.clone(),
3535+
Arc::<dyn PhysicalExpr>::clone(&known_expression_transformed),
35403536
Operator::And,
35413537
logical2physical(&lit(42), &schema),
35423538
);
@@ -3552,7 +3548,7 @@ mod tests {
35523548
// more complex case with unknown expression
35533549
let input = known_expression.and(input);
35543550
let expected = phys_expr::BinaryExpr::new(
3555-
known_expression_transformed.clone(),
3551+
Arc::<dyn PhysicalExpr>::clone(&known_expression_transformed),
35563552
Operator::And,
35573553
logical2physical(&lit(42), &schema),
35583554
);
@@ -4038,7 +4034,7 @@ mod tests {
40384034
) {
40394035
println!("Pruning with expr: {}", expr);
40404036
let expr = logical2physical(&expr, schema);
4041-
let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
4037+
let p = PruningPredicate::try_new(expr, Arc::<Schema>::clone(schema)).unwrap();
40424038
let result = p.prune(statistics).unwrap();
40434039
assert_eq!(result, expected);
40444040
}

0 commit comments

Comments
 (0)