apache · mihailoale-db · Mar 17, 2026
diff --git a/...st/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateResolver.scala b/...st/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateResolver.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.expressions.{
   AttributeReference,
   Expression,
   ExprId,
-  ExprUtils,
   IntegerLiteral,
   Literal
 }
@@ -83,8 +82,8 @@ class AggregateResolver(
    *
    * If the resulting [[Aggregate]] contains lateral columns references, delegate the resolution of
    * these columns to [[LateralColumnAliasResolver.handleLcaInAggregate]]. Otherwise, validate the
-   * [[Aggregate]] using the [[ExprUtils.assertValidAggregation]], update the `scopes` with the
-   * output of [[Aggregate]] and return the result.
+   * [[Aggregate]] using the [[AggregationValidator]], update the `scopes` with the output of
+   * [[Aggregate]] and return the result.
    *
    * Recursive CTE self-references are disallowed in aggregates per the SQL standard, as aggregates
    * must see a fixed input set before computing aggregated results.
@@ -156,9 +155,7 @@ class AggregateResolver(
           baseAggregate = aggregateWithLcaResolutionResult.baseAggregate
         )
       } else {
-        // TODO: This validation function does a post-traversal. This is discouraged in single-pass
-        //       Analyzer.
-        ExprUtils.assertValidAggregation(finalAggregate)
+        AggregationValidator(finalAggregate)
 
         AggregateResolutionResult(
           operator = finalAggregate,
@@ -228,8 +225,8 @@ class AggregateResolver(
    * going to contain all the aggregate expressions that don't have aggregate expressions in their
    * subtrees. The grouping expressions list will be [col1 AS `col1`].
    * All the [[Alias]]es should be stripped in order to pass logical plan comparison and to prevent
-   * unintentional exceptions from being thrown by [[ExprUtils.assertValidAggregation]], so the
-   * final grouping expressions list will be [col1].
+   * unintentional exceptions from being thrown by [[AggregationValidator]], so the final grouping
+   * expressions list will be [col1].
    */
   private def tryResolveGroupByAll(
       aggregateExpressions: ResolvedAggregateExpressions,

diff --git a/...src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregationValidator.scala b/...src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregationValidator.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.plans.logical.Aggregate
+
+/**
+ * Validates [[Aggregate]] operators in the single-pass resolver. Delegates to
+ * [[ExprUtils.assertValidAggregation]] with canonicalized expression equality since
+ * non-deterministic expressions are still not extracted (done in [[PullOutNondeterministic]] post
+ * hoc rule).
+ *
+ * TODO: [[ExprUtils.assertValidAggregation]] does a post-traversal. This is discouraged in the
+ * single-pass Analyzer.
+ */
+object AggregationValidator {
+
+  /**
+   * Applies [[ExprUtils.assertValidAggregation]] on a given [[Aggregate]].
+   */
+  def apply(aggregate: Aggregate): Unit = {
+    ExprUtils.assertValidAggregation(
+      aggregate,
+      (groupingExpression, checkedExpression) =>
+        groupingExpression.canonicalized == checkedExpression.canonicalized
+    )
+  }
+}
diff --git a/...in/scala/org/apache/spark/sql/catalyst/analysis/resolver/LateralColumnAliasResolver.scala b/...in/scala/org/apache/spark/sql/catalyst/analysis/resolver/LateralColumnAliasResolver.scala
@@ -28,7 +28,6 @@ import org.apache.spark.sql.catalyst.expressions.{
   Alias,
   Expression,
   ExprId,
-  ExprUtils,
   NamedExpression,
   ScalarSubquery,
   WindowExpression,
@@ -71,9 +70,7 @@ class LateralColumnAliasResolver(expressionResolver: ExpressionResolver, operato
       case _ @Project(projectList: Seq[_], aggregate: Aggregate) =>
         operatorResolutionContextStack.current.baseOperator = Some(aggregate)
 
-        // TODO: This validation function does a post-traversal. This is discouraged in single-pass
-        //       Analyzer.
-        ExprUtils.assertValidAggregation(aggregate)
+        AggregationValidator(aggregate)
 
         val remappedAliases = new HashMap[ExprId, Alias](projectList.size)
         projectList.foreach {

diff --git a/...lyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ProjectResolver.scala b/...lyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ProjectResolver.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis.resolver
 
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExprUtils}
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
 import org.apache.spark.sql.internal.SQLConf
 
@@ -115,7 +115,7 @@ class ProjectResolver(operatorResolver: Resolver, expressionResolver: Expression
    *
    *  - If the [[Aggregate]] contains lateral column references, delegate the resolution to
    *    [[LateralColumnAliasResolver.handleLcaInAggregate]];
-   *  - Otherwise, validate the result [[Aggregate]] using [[ExprUtils.assertValidAggregation]];
+   *  - Otherwise, validate the result [[Aggregate]] using [[AggregationValidator]];
    *
    *  Note: Recursive CTE self references are disallowed before entering this method by the
    *  pre-scan in [[resolve]].
@@ -142,9 +142,7 @@ class ProjectResolver(operatorResolver: Resolver, expressionResolver: Expression
           )
       (aggregateWithLcaResolutionResult.resolvedOperator, projectList)
     } else {
-      // TODO: This validation function does a post-traversal. This is discouraged in
-      // single-pass Analyzer.
-      ExprUtils.assertValidAggregation(aggregate)
+      AggregationValidator(aggregate)
 
       val resolvedAggregateList = resolvedProjectList.copy(
         aggregateListAliases = scopes.current.aggregateListAliases,

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -155,7 +155,11 @@ object ExprUtils extends EvalHelper with QueryErrorsBase {
     }
   }
 
-  def assertValidAggregation(a: Aggregate): Unit = {
+  def assertValidAggregation(
+      a: Aggregate,
+      semanticEquality: (Expression, Expression) => Boolean =
+        (groupingExpression, checkedExpression) =>
+          groupingExpression.semanticEquals(checkedExpression)): Unit = {
     def checkValidAggregateExpression(expr: Expression): Unit = expr match {
       case expr: AggregateExpression =>
         val aggFunction = expr.aggregateFunction
@@ -178,14 +182,14 @@ object ExprUtils extends EvalHelper with QueryErrorsBase {
         a.failAnalysis(
           errorClass = "MISSING_GROUP_BY",
           messageParameters = Map.empty)
-      case e: Attribute if !a.groupingExpressions.exists(_.semanticEquals(e)) =>
+      case e: Attribute if !a.groupingExpressions.exists(semanticEquality(_, e)) =>
         throw QueryCompilationErrors.columnNotInGroupByClauseError(e)
       case s: ScalarSubquery
-        if s.children.nonEmpty && !a.groupingExpressions.exists(_.semanticEquals(s)) =>
+        if s.children.nonEmpty && !a.groupingExpressions.exists(semanticEquality(_, s)) =>
         s.failAnalysis(
           errorClass = "SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION",
           messageParameters = Map("sqlExpr" -> toSQLExpr(s)))
-      case e if a.groupingExpressions.exists(_.semanticEquals(e)) => // OK
+      case e if a.groupingExpressions.exists(semanticEquality(_, e)) => // OK
       // There should be no Window in Aggregate - this case will fail later check anyway.
       // Perform this check for special case of lateral column alias, when the window
       // expression is not eligible to propagate to upper plan because it is not valid,

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -5100,6 +5100,13 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(sql("SELECT 1L UNION SELECT 1"), Row(1L))
     }
   }
+
+  test("SPARK-56035: Introduce `AggregateValidator` for single-pass `Aggregate` validation ") {
+    sql("SELECT col1 + rand() FROM VALUES(1) GROUP BY ALL")
+    sql("SELECT col1 + rand() FROM VALUES(1) GROUP BY 1")
+    sql("SELECT rand() FROM VALUES(1) GROUP BY ALL")
+    sql("SELECT col1 - rand() FROM VALUES(1) GROUP BY ALL")
+  }
 }
 
 case class Foo(bar: Option[String])