18
18
//! [`PruningPredicate`] to apply filter [`Expr`] to prune "containers"
19
19
//! based on statistics (e.g. Parquet Row Groups)
20
20
//!
21
- //! [`Expr`]: crate::prelude:: Expr
21
+ //! [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum. Expr.html
22
22
use std:: collections:: HashSet ;
23
23
use std:: sync:: Arc ;
24
24
25
- use crate :: {
26
- common:: { Column , DFSchema } ,
27
- error:: { DataFusionError , Result } ,
28
- logical_expr:: Operator ,
29
- physical_plan:: { ColumnarValue , PhysicalExpr } ,
30
- } ;
31
-
25
+ use arrow:: array:: AsArray ;
32
26
use arrow:: {
33
27
array:: { new_null_array, ArrayRef , BooleanArray } ,
34
28
datatypes:: { DataType , Field , Schema , SchemaRef } ,
35
29
record_batch:: { RecordBatch , RecordBatchOptions } ,
36
30
} ;
37
- use arrow_array:: cast:: AsArray ;
31
+ use log:: trace;
32
+
33
+ use datafusion_common:: error:: { DataFusionError , Result } ;
38
34
use datafusion_common:: tree_node:: TransformedResult ;
39
35
use datafusion_common:: {
40
36
internal_err, plan_datafusion_err, plan_err,
41
37
tree_node:: { Transformed , TreeNode } ,
42
38
ScalarValue ,
43
39
} ;
40
+ use datafusion_common:: { Column , DFSchema } ;
41
+ use datafusion_expr_common:: operator:: Operator ;
44
42
use datafusion_physical_expr:: utils:: { collect_columns, Guarantee , LiteralGuarantee } ;
45
43
use datafusion_physical_expr:: { expressions as phys_expr, PhysicalExprRef } ;
46
-
47
- use log:: trace;
44
+ use datafusion_physical_plan:: { ColumnarValue , PhysicalExpr } ;
48
45
49
46
/// A source of runtime statistical information to [`PruningPredicate`]s.
50
47
///
@@ -567,7 +564,7 @@ impl PruningPredicate {
567
564
/// expressions like `b = false`, but it does handle the
568
565
/// simplified version `b`. See [`ExprSimplifier`] to simplify expressions.
569
566
///
570
- /// [`ExprSimplifier`]: crate:: optimizer:: simplify_expressions:: ExprSimplifier
567
+ /// [`ExprSimplifier`]: https://docs.rs/datafusion/latest/datafusion/ optimizer/ simplify_expressions/struct. ExprSimplifier.html
571
568
pub fn prune < S : PruningStatistics > ( & self , statistics : & S ) -> Result < Vec < bool > > {
572
569
let mut builder = BoolVecBuilder :: new ( statistics. num_containers ( ) ) ;
573
570
@@ -653,7 +650,7 @@ impl PruningPredicate {
653
650
654
651
// this is only used by `parquet` feature right now
655
652
#[ allow( dead_code) ]
656
- pub ( crate ) fn required_columns ( & self ) -> & RequiredColumns {
653
+ pub fn required_columns ( & self ) -> & RequiredColumns {
657
654
& self . required_columns
658
655
}
659
656
@@ -762,7 +759,7 @@ fn is_always_true(expr: &Arc<dyn PhysicalExpr>) -> bool {
762
759
/// Handles creating references to the min/max statistics
763
760
/// for columns as well as recording which statistics are needed
764
761
#[ derive( Debug , Default , Clone ) ]
765
- pub ( crate ) struct RequiredColumns {
762
+ pub struct RequiredColumns {
766
763
/// The statistics required to evaluate this predicate:
767
764
/// * The unqualified column in the input schema
768
765
/// * Statistics type (e.g. Min or Max or Null_Count)
@@ -786,7 +783,7 @@ impl RequiredColumns {
786
783
/// * `true` returns None
787
784
#[ allow( dead_code) ]
788
785
// this fn is only used by `parquet` feature right now, thus the `allow(dead_code)`
789
- pub ( crate ) fn single_column ( & self ) -> Option < & phys_expr:: Column > {
786
+ pub fn single_column ( & self ) -> Option < & phys_expr:: Column > {
790
787
if self . columns . windows ( 2 ) . all ( |w| {
791
788
// check if all columns are the same (ignoring statistics and field)
792
789
let c1 = & w[ 0 ] . 0 ;
@@ -1664,15 +1661,14 @@ mod tests {
1664
1661
use std:: ops:: { Not , Rem } ;
1665
1662
1666
1663
use super :: * ;
1667
- use crate :: assert_batches_eq;
1668
- use crate :: logical_expr :: { col, lit} ;
1664
+ use datafusion_common :: assert_batches_eq;
1665
+ use datafusion_expr :: { col, lit} ;
1669
1666
1670
1667
use arrow:: array:: Decimal128Array ;
1671
1668
use arrow:: {
1672
- array:: { BinaryArray , Int32Array , Int64Array , StringArray } ,
1669
+ array:: { BinaryArray , Int32Array , Int64Array , StringArray , UInt64Array } ,
1673
1670
datatypes:: TimeUnit ,
1674
1671
} ;
1675
- use arrow_array:: UInt64Array ;
1676
1672
use datafusion_expr:: expr:: InList ;
1677
1673
use datafusion_expr:: { cast, is_null, try_cast, Expr } ;
1678
1674
use datafusion_functions_nested:: expr_fn:: { array_has, make_array} ;
@@ -3536,7 +3532,7 @@ mod tests {
3536
3532
// more complex case with unknown column
3537
3533
let input = known_expression. clone ( ) . and ( input. clone ( ) ) ;
3538
3534
let expected = phys_expr:: BinaryExpr :: new (
3539
- known_expression_transformed . clone ( ) ,
3535
+ Arc :: < dyn PhysicalExpr > :: clone ( & known_expression_transformed ) ,
3540
3536
Operator :: And ,
3541
3537
logical2physical ( & lit ( 42 ) , & schema) ,
3542
3538
) ;
@@ -3552,7 +3548,7 @@ mod tests {
3552
3548
// more complex case with unknown expression
3553
3549
let input = known_expression. and ( input) ;
3554
3550
let expected = phys_expr:: BinaryExpr :: new (
3555
- known_expression_transformed . clone ( ) ,
3551
+ Arc :: < dyn PhysicalExpr > :: clone ( & known_expression_transformed ) ,
3556
3552
Operator :: And ,
3557
3553
logical2physical ( & lit ( 42 ) , & schema) ,
3558
3554
) ;
@@ -4038,7 +4034,7 @@ mod tests {
4038
4034
) {
4039
4035
println ! ( "Pruning with expr: {}" , expr) ;
4040
4036
let expr = logical2physical ( & expr, schema) ;
4041
- let p = PruningPredicate :: try_new ( expr, schema . clone ( ) ) . unwrap ( ) ;
4037
+ let p = PruningPredicate :: try_new ( expr, Arc :: < Schema > :: clone ( schema ) ) . unwrap ( ) ;
4042
4038
let result = p. prune ( statistics) . unwrap ( ) ;
4043
4039
assert_eq ! ( result, expected) ;
4044
4040
}
0 commit comments