@@ -21,7 +21,7 @@ use std::ops::Deref;
2121use std:: sync:: Arc ;
2222
2323use crate :: PhysicalExpr ;
24- use crate :: expressions:: { CastExpr , Column , Literal } ;
24+ use crate :: expressions:: { Column , Literal } ;
2525use crate :: scalar_function:: ScalarFunctionExpr ;
2626use crate :: utils:: collect_columns;
2727
@@ -33,6 +33,7 @@ use datafusion_common::{
3333 Result , ScalarValue , Statistics , assert_or_internal_err, internal_datafusion_err,
3434 plan_err,
3535} ;
36+ use datafusion_expr_common:: interval_arithmetic:: Interval ;
3637
3738use datafusion_physical_expr_common:: metrics:: ExecutionPlanMetricsSet ;
3839use datafusion_physical_expr_common:: metrics:: ExpressionEvaluatorMetrics ;
@@ -715,10 +716,11 @@ impl ProjectionExprs {
715716 }
716717 }
717718 } else {
718- // Try to propagate statistics through expressions like CAST
719+ // Propagate statistics through expressions (CAST, arithmetic, etc.)
720+ // using the interval arithmetic system (evaluate_bounds).
719721 project_column_statistics_through_expr (
720722 expr. as_ref ( ) ,
721- & mut stats. column_statistics ,
723+ & stats. column_statistics ,
722724 )
723725 } ;
724726 column_statistics. push ( col_stats) ;
@@ -729,43 +731,70 @@ impl ProjectionExprs {
729731 }
730732}
731733
732- /// Propagate column statistics through expressions that preserve order.
733- ///
734- /// Currently handles:
735- /// - `Column` references (direct passthrough)
736- /// - `CAST` expressions (casts min/max values to the target type)
737- ///
738- /// For other expressions, returns unknown statistics.
734+ /// Propagate min/max statistics through an expression using
735+ /// [`PhysicalExpr::evaluate_bounds`]. Works for any expression that
736+ /// implements `evaluate_bounds` (CAST, negation, arithmetic with literals, etc.).
739737fn project_column_statistics_through_expr (
740738 expr : & dyn PhysicalExpr ,
741- column_stats : & mut [ ColumnStatistics ] ,
739+ column_stats : & [ ColumnStatistics ] ,
742740) -> ColumnStatistics {
743- if let Some ( col) = expr. as_any ( ) . downcast_ref :: < Column > ( ) {
744- std:: mem:: take ( & mut column_stats[ col. index ( ) ] )
745- } else if let Some ( cast_expr) = expr. as_any ( ) . downcast_ref :: < CastExpr > ( ) {
746- let inner_stats =
747- project_column_statistics_through_expr ( cast_expr. expr . as_ref ( ) , column_stats) ;
748- let target_type = cast_expr. cast_type ( ) ;
749- ColumnStatistics {
750- min_value : inner_stats
751- . min_value
752- . cast_to ( target_type)
753- . unwrap_or ( Precision :: Absent ) ,
754- max_value : inner_stats
755- . max_value
756- . cast_to ( target_type)
757- . unwrap_or ( Precision :: Absent ) ,
758- null_count : inner_stats. null_count ,
759- distinct_count : inner_stats. distinct_count ,
760- // Sum and byte size change under CAST, don't propagate
741+ match compute_bounds_and_exactness ( expr, column_stats) {
742+ Some ( ( interval, all_exact) ) => ColumnStatistics {
743+ min_value : to_precision ( interval. lower ( ) . clone ( ) , all_exact) ,
744+ max_value : to_precision ( interval. upper ( ) . clone ( ) , all_exact) ,
745+ null_count : Precision :: Absent ,
746+ distinct_count : Precision :: Absent ,
761747 sum_value : Precision :: Absent ,
762748 byte_size : Precision :: Absent ,
763- }
749+ } ,
750+ None => ColumnStatistics :: new_unknown ( ) ,
751+ }
752+ }
753+
754+ /// Convert a bound value to the appropriate [`Precision`] level.
755+ fn to_precision ( value : ScalarValue , exact : bool ) -> Precision < ScalarValue > {
756+ if value. is_null ( ) {
757+ Precision :: Absent
758+ } else if exact {
759+ Precision :: Exact ( value)
764760 } else {
765- ColumnStatistics :: new_unknown ( )
761+ Precision :: Inexact ( value )
766762 }
767763}
768764
765+ /// Recursively compute the output [`Interval`] and whether all leaf
766+ /// statistics are exact, in a single traversal of the expression tree.
767+ fn compute_bounds_and_exactness (
768+ expr : & dyn PhysicalExpr ,
769+ column_stats : & [ ColumnStatistics ] ,
770+ ) -> Option < ( Interval , bool ) > {
771+ if let Some ( col) = expr. as_any ( ) . downcast_ref :: < Column > ( ) {
772+ let stats = & column_stats[ col. index ( ) ] ;
773+ let min = stats. min_value . get_value ( ) ?;
774+ let max = stats. max_value . get_value ( ) ?;
775+ let exact = stats. min_value . is_exact ( ) . unwrap_or ( false )
776+ && stats. max_value . is_exact ( ) . unwrap_or ( false ) ;
777+ return Some ( ( Interval :: try_new ( min. clone ( ) , max. clone ( ) ) . ok ( ) ?, exact) ) ;
778+ }
779+
780+ if let Some ( lit) = expr. as_any ( ) . downcast_ref :: < Literal > ( ) {
781+ let val = lit. value ( ) ;
782+ return Some ( ( Interval :: try_new ( val. clone ( ) , val. clone ( ) ) . ok ( ) ?, true ) ) ;
783+ }
784+
785+ let children = expr. children ( ) ;
786+ let mut child_intervals = Vec :: with_capacity ( children. len ( ) ) ;
787+ let mut all_exact = true ;
788+ for child in & children {
789+ let ( interval, exact) =
790+ compute_bounds_and_exactness ( child. as_ref ( ) , column_stats) ?;
791+ child_intervals. push ( interval) ;
792+ all_exact &= exact;
793+ }
794+ let child_refs: Vec < & Interval > = child_intervals. iter ( ) . collect ( ) ;
795+ Some ( ( expr. evaluate_bounds ( & child_refs) . ok ( ) ?, all_exact) )
796+ }
797+
769798impl < ' a > IntoIterator for & ' a ProjectionExprs {
770799 type Item = & ' a ProjectionExpr ;
771800 type IntoIter = std:: slice:: Iter < ' a , ProjectionExpr > ;
@@ -2824,13 +2853,17 @@ pub(crate) mod tests {
28242853 // Should have 2 column statistics
28252854 assert_eq ! ( output_stats. column_statistics. len( ) , 2 ) ;
28262855
2827- // First column (expression ) should have unknown statistics
2856+ // First column (col0 + 1 ) should have propagated min/max via evaluate_bounds
28282857 assert_eq ! (
2829- output_stats. column_statistics[ 0 ] . distinct_count ,
2830- Precision :: Absent
2858+ output_stats. column_statistics[ 0 ] . min_value ,
2859+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( - 3 ) ) )
28312860 ) ;
28322861 assert_eq ! (
28332862 output_stats. column_statistics[ 0 ] . max_value,
2863+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( 22 ) ) )
2864+ ) ;
2865+ assert_eq ! (
2866+ output_stats. column_statistics[ 0 ] . distinct_count,
28342867 Precision :: Absent
28352868 ) ;
28362869
0 commit comments