apache · alamb · Feb 26, 2025 · Feb 22, 2025 · Feb 22, 2025 · Feb 22, 2025
diff --git a/datafusion-testing b/datafusion-testing
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -1145,9 +1145,9 @@ async fn test_count_wildcard() -> Result<()> {
         .build()
         .unwrap();
 
-    let expected = "Sort: count(*) ASC NULLS LAST [count(*):Int64]\
-    \n  Projection: count(*) [count(*):Int64]\
-    \n    Aggregate: groupBy=[[test.b]], aggr=[[count(*)]] [b:UInt32, count(*):Int64]\
+    let expected = "Sort: count(Int64(1)) ASC NULLS LAST [count(Int64(1)):Int64]\
+    \n  Projection: count(Int64(1)) [count(Int64(1)):Int64]\
+    \n    Aggregate: groupBy=[[test.b]], aggr=[[count(Int64(1))]] [b:UInt32, count(Int64(1)):Int64]\
     \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]";
 
     let formatted_plan = plan.display_indent_schema().to_string();

diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
@@ -2455,7 +2455,7 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
     let ctx = create_join_context()?;
 
     let sql_results = ctx
-        .sql("select b,count(*) from t1 group by b order by count(*)")
+        .sql("select b,count(1) from t1 group by b order by count(1)")
         .await?
         .explain(false, false)?
         .collect()
@@ -2481,7 +2481,7 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
 async fn test_count_wildcard_on_where_in() -> Result<()> {
     let ctx = create_join_context()?;
     let sql_results = ctx
-        .sql("SELECT a,b FROM t1 WHERE a in (SELECT count(*) FROM t2)")
+        .sql("SELECT a,b FROM t1 WHERE a in (SELECT count(1) FROM t2)")
         .await?
         .explain(false, false)?
         .collect()
@@ -2522,7 +2522,7 @@ async fn test_count_wildcard_on_where_in() -> Result<()> {
 async fn test_count_wildcard_on_where_exist() -> Result<()> {
     let ctx = create_join_context()?;
     let sql_results = ctx
-        .sql("SELECT a, b FROM t1 WHERE EXISTS (SELECT count(*) FROM t2)")
+        .sql("SELECT a, b FROM t1 WHERE EXISTS (SELECT count(1) FROM t2)")
         .await?
         .explain(false, false)?
         .collect()
@@ -2559,7 +2559,7 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     let ctx = create_join_context()?;
 
     let sql_results = ctx
-        .sql("select count(*) OVER(ORDER BY a DESC RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING)  from t1")
+        .sql("select count(1) OVER(ORDER BY a DESC RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING)  from t1")
         .await?
         .explain(false, false)?
         .collect()
@@ -2598,7 +2598,7 @@ async fn test_count_wildcard_on_aggregate() -> Result<()> {
     register_alltypes_tiny_pages_parquet(&ctx).await?;
 
     let sql_results = ctx
-        .sql("select count(*) from t1")
+        .sql("select count(1) from t1")
         .await?
         .explain(false, false)?
         .collect()
@@ -2628,7 +2628,7 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     let ctx = create_join_context()?;
 
     let sql_results = ctx
-        .sql("select a,b from t1 where (select count(*) from t2 where t1.a = t2.a)>0;")
+        .sql("select a,b from t1 where (select count(1) from t2 where t1.a = t2.a)>0;")
         .await?
         .explain(false, false)?
         .collect()

diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
@@ -71,8 +71,8 @@ async fn explain_analyze_baseline_metrics() {
     );
     assert_metrics!(
         &formatted,
-        "ProjectionExec: expr=[count(*)",
-        "metrics=[output_rows=1, elapsed_compute="
+        "ProjectionExec: expr=[]",
+        "metrics=[output_rows=5, elapsed_compute="
     );
     assert_metrics!(
         &formatted,
@@ -687,7 +687,7 @@ async fn csv_explain_analyze() {
     // Only test basic plumbing and try to avoid having to change too
     // many things. explain_analyze_baseline_metrics covers the values
     // in greater depth
-    let needle = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[count(*)], metrics=[output_rows=5";
+    let needle = "ProjectionExec: expr=[count(Int64(1))@1 as count(*), c1@0 as c1], metrics=[output_rows=5";
     assert_contains!(&formatted, needle);
 
     let verbose_needle = "Output Rows";
@@ -778,13 +778,11 @@ async fn explain_logical_plan_only() {
     let actual = normalize_vec_for_explain(actual);
 
     let expected = vec![
-        vec![
-            "logical_plan",
-            "Aggregate: groupBy=[[]], aggr=[[count(*)]]\
-            \n  SubqueryAlias: t\
-            \n    Projection: \
-            \n      Values: (Utf8(\"a\"), Int64(1), Int64(100)), (Utf8(\"a\"), Int64(2), Int64(150))"
-        ]];
+        vec!["logical_plan", "Projection: count(Int64(1)) AS count(*)\
+        \n  Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]\
+        \n    SubqueryAlias: t\
+        \n      Projection: \
+        \n        Values: (Utf8(\"a\"), Int64(1), Int64(100)), (Utf8(\"a\"), Int64(2), Int64(150))"]];
     assert_eq!(expected, actual);
 }
 

diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -286,6 +286,7 @@ pub struct NamePreserver {
 
 /// If the qualified name of an expression is remembered, it will be preserved
 /// when rewriting the expression
+#[derive(Debug)]
 pub enum SavedName {
     /// Saved qualified name to be preserved
     Saved {

diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
@@ -100,6 +100,7 @@ impl fmt::Display for AggregateUDF {
 }
 
 /// Arguments passed to [`AggregateUDFImpl::value_from_stats`]
+#[derive(Debug)]
 pub struct StatisticsArgs<'a> {
     /// The statistics of the aggregate input
     pub statistics: &'a Statistics,

diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
@@ -17,15 +17,11 @@
 
 use ahash::RandomState;
 use datafusion_common::stats::Precision;
-use datafusion_expr::expr::{
-    schema_name_from_exprs, schema_name_from_sorts, AggregateFunctionParams,
-    WindowFunctionParams,
-};
 use datafusion_functions_aggregate_common::aggregate::count_distinct::BytesViewDistinctCountAccumulator;
 use datafusion_macros::user_doc;
 use datafusion_physical_expr::expressions;
 use std::collections::HashSet;
-use std::fmt::{Debug, Write};
+use std::fmt::Debug;
 use std::mem::{size_of, size_of_val};
 use std::ops::BitAnd;
 use std::sync::Arc;
@@ -51,11 +47,11 @@ use datafusion_common::{
     downcast_value, internal_err, not_impl_err, Result, ScalarValue,
 };
 use datafusion_expr::function::StateFieldsArgs;
-use datafusion_expr::{expr_vec_fmt, Expr, ReversedUDAF, StatisticsArgs, TypeSignature};
 use datafusion_expr::{
     function::AccumulatorArgs, utils::format_state_name, Accumulator, AggregateUDFImpl,
     Documentation, EmitTo, GroupsAccumulator, SetMonotonicity, Signature, Volatility,
 };
+use datafusion_expr::{Expr, ReversedUDAF, StatisticsArgs, TypeSignature};
 use datafusion_functions_aggregate_common::aggregate::count_distinct::{
     BytesDistinctCountAccumulator, FloatDistinctCountAccumulator,
     PrimitiveDistinctCountAccumulator,
@@ -148,185 +144,6 @@ impl AggregateUDFImpl for Count {
         "count"
     }
 
-    fn schema_name(&self, params: &AggregateFunctionParams) -> Result<String> {
-        let AggregateFunctionParams {
-            args,
-            distinct,
-            filter,
-            order_by,
-            null_treatment,
-        } = params;
-
-        let mut schema_name = String::new();
-
-        if is_count_wildcard(args) {
-            schema_name.write_str("count(*)")?;
-        } else {
-            schema_name.write_fmt(format_args!(
-                "{}({}{})",
-                self.name(),
-                if *distinct { "DISTINCT " } else { "" },
-                schema_name_from_exprs(args)?
-            ))?;
-        }
-
-        if let Some(null_treatment) = null_treatment {
-            schema_name.write_fmt(format_args!(" {}", null_treatment))?;
-        }
-
-        if let Some(filter) = filter {
-            schema_name.write_fmt(format_args!(" FILTER (WHERE {filter})"))?;
-        };
-
-        if let Some(order_by) = order_by {
-            schema_name.write_fmt(format_args!(
-                " ORDER BY [{}]",
-                schema_name_from_sorts(order_by)?
-            ))?;
-        };
-
-        Ok(schema_name)
-    }
-
-    fn window_function_schema_name(
-        &self,
-        params: &WindowFunctionParams,
-    ) -> Result<String> {
-        let WindowFunctionParams {
-            args,
-            partition_by,
-            order_by,
-            window_frame,
-            null_treatment,
-        } = params;
-
-        let mut schema_name = String::new();
-
-        if is_count_wildcard(args) {
-            schema_name.write_str("count(*)")?;
-        } else {
-            schema_name.write_fmt(format_args!(
-                "{}({})",
-                self.name(),
-                schema_name_from_exprs(args)?
-            ))?;
-        }
-
-        if let Some(null_treatment) = null_treatment {
-            schema_name.write_fmt(format_args!(" {}", null_treatment))?;
-        }
-
-        if !partition_by.is_empty() {
-            schema_name.write_fmt(format_args!(
-                " PARTITION BY [{}]",
-                schema_name_from_exprs(partition_by)?
-            ))?;
-        }
-
-        if !order_by.is_empty() {
-            schema_name.write_fmt(format_args!(
-                " ORDER BY [{}]",
-                schema_name_from_sorts(order_by)?
-            ))?;
-        };
-
-        schema_name.write_fmt(format_args!(" {window_frame}"))?;
-
-        Ok(schema_name)
-    }
-
-    fn display_name(&self, params: &AggregateFunctionParams) -> Result<String> {
-        let AggregateFunctionParams {
-            args,
-            distinct,
-            filter,
-            order_by,
-            null_treatment,
-        } = params;
-
-        let mut display_name = String::new();
-
-        if is_count_wildcard(args) {
-            display_name.write_str("count(*)")?;
-        } else {
-            display_name.write_fmt(format_args!(
-                "{}({}{})",
-                self.name(),
-                if *distinct { "DISTINCT " } else { "" },
-                args.iter()
-                    .map(|arg| format!("{arg}"))
-                    .collect::<Vec<String>>()
-                    .join(", ")
-            ))?;
-        }
-
-        if let Some(nt) = null_treatment {
-            display_name.write_fmt(format_args!(" {}", nt))?;
-        }
-        if let Some(fe) = filter {
-            display_name.write_fmt(format_args!(" FILTER (WHERE {fe})"))?;
-        }
-        if let Some(ob) = order_by {
-            display_name.write_fmt(format_args!(
-                " ORDER BY [{}]",
-                ob.iter()
-                    .map(|o| format!("{o}"))
-                    .collect::<Vec<String>>()
-                    .join(", ")
-            ))?;
-        }
-
-        Ok(display_name)
-    }
-
-    fn window_function_display_name(
-        &self,
-        params: &WindowFunctionParams,
-    ) -> Result<String> {
-        let WindowFunctionParams {
-            args,
-            partition_by,
-            order_by,
-            window_frame,
-            null_treatment,
-        } = params;
-
-        let mut display_name = String::new();
-
-        if is_count_wildcard(args) {
-            display_name.write_str("count(*)")?;
-        } else {
-            display_name.write_fmt(format_args!(
-                "{}({})",
-                self.name(),
-                expr_vec_fmt!(args)
-            ))?;
-        }
-
-        if let Some(null_treatment) = null_treatment {
-            display_name.write_fmt(format_args!(" {}", null_treatment))?;
-        }
-
-        if !partition_by.is_empty() {
-            display_name.write_fmt(format_args!(
-                " PARTITION BY [{}]",
-                expr_vec_fmt!(partition_by)
-            ))?;
-        }
-
-        if !order_by.is_empty() {
-            display_name
-                .write_fmt(format_args!(" ORDER BY [{}]", expr_vec_fmt!(order_by)))?;
-        };
-
-        display_name.write_fmt(format_args!(
-            " {} BETWEEN {} AND {}",
-            window_frame.units, window_frame.start_bound, window_frame.end_bound
-        ))?;
-
-        Ok(display_name)
-    }
-
     fn signature(&self) -> &Signature {
         &self.signature
     }
@@ -547,15 +364,6 @@ impl AggregateUDFImpl for Count {
     }
 }
 
-fn is_count_wildcard(args: &[Expr]) -> bool {
-    match args {
-        [] => true, // count()
-        // All const should be coerced to int64 or rejected by the signature
-        [Expr::Literal(ScalarValue::Int64(Some(_)))] => true, // count(1)
-        _ => false, // More than one argument or non-matching cases
-    }
-}
-
 #[derive(Debug)]
 struct CountAccumulator {
     count: i64,