Skip to content

chore: Improve reporting of fallback reasons for CollectLimit #1694

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/user-guide/iceberg.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ This should produce the following output:
scala> spark.sql(s"SELECT * from t1").show()
25/04/28 07:29:37 INFO core/src/lib.rs: Comet native library version 0.9.0 initialized
25/04/28 07:29:37 WARN CometSparkSessionExtensions$CometExecRule: Comet cannot execute some parts of this plan natively (set spark.comet.explainFallback.enabled=false to disable this logging):
CollectLimit [COMET: CollectLimit is not supported]
CollectLimit
+- Project [COMET: toprettystring is not supported]
+- CometScanWrapper

Expand Down
42 changes: 30 additions & 12 deletions spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.apache.comet.rules

import scala.collection.mutable.ListBuffer

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.expressions.{Divide, DoubleLiteral, EqualNullSafe, EqualTo, Expression, FloatLiteral, GreaterThan, GreaterThanOrEqual, KnownFloatingPointNormalized, LessThan, LessThanOrEqual, NamedExpression, Remainder}
import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, Partial}
Expand All @@ -36,7 +38,7 @@ import org.apache.spark.sql.types.{DoubleType, FloatType}

import org.apache.comet.{CometConf, ExtendedExplainInfo}
import org.apache.comet.CometConf.{COMET_ANSI_MODE_ENABLED, COMET_NATIVE_SCAN_IMPL, COMET_SHUFFLE_FALLBACK_TO_COLUMNAR}
import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometBroadcastNotEnabledReason, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometLoaded, isCometNativeShuffleMode, isCometScan, isCometShuffleEnabled, isSpark40Plus, shouldApplySparkToColumnar, withInfo}
import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometBroadcastNotEnabledReason, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometLoaded, isCometNativeShuffleMode, isCometScan, isCometShuffleEnabled, isSpark40Plus, shouldApplySparkToColumnar, withInfo, withInfos}
import org.apache.comet.serde.OperatorOuterClass.Operator
import org.apache.comet.serde.QueryPlanSerde

Expand Down Expand Up @@ -196,18 +198,34 @@ case class CometExecRule(session: SparkSession) extends Rule[SparkPlan] {
op,
CometGlobalLimitExec(_, op, op.limit, op.child, SerializedPlan(None)))

case op: CollectLimitExec
if isCometNative(op.child) && CometConf.COMET_EXEC_COLLECT_LIMIT_ENABLED.get(conf)
&& isCometShuffleEnabled(conf)
&& op.offset == 0 =>
QueryPlanSerde
.operator2Proto(op)
.map { nativeOp =>
val cometOp =
CometCollectLimitExec(op, op.limit, op.offset, op.child)
CometSinkPlaceHolder(nativeOp, op, cometOp)
case op: CollectLimitExec =>
val fallbackReasons = new ListBuffer[String]()
if (!CometConf.COMET_EXEC_COLLECT_LIMIT_ENABLED.get(conf)) {
fallbackReasons += s"${CometConf.COMET_EXEC_COLLECT_LIMIT_ENABLED.key} is false"
}
if (!isCometShuffleEnabled(conf)) {
fallbackReasons += "Comet shuffle is not enabled"
}
if (op.offset != 0) {
fallbackReasons += "CollectLimit with non-zero offset is not supported"
}
if (fallbackReasons.nonEmpty) {
withInfos(op, fallbackReasons.toSet)
} else {
if (!isCometNative(op.child)) {
// no reason to report reason if child is not native
op
Comment on lines +212 to +217
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm is it possible that there are fallbackReasons as well as the child is not native at the same time? should we report that the child is not native then?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'Child is not native' reporting was removed because it led to lots of repetitive messages. The child that is not native would have already reported the reason (I think).

} else {
QueryPlanSerde
.operator2Proto(op)
.map { nativeOp =>
val cometOp =
CometCollectLimitExec(op, op.limit, op.offset, op.child)
CometSinkPlaceHolder(nativeOp, op, cometOp)
}
.getOrElse(op)
}
.getOrElse(op)
}

case op: ExpandExec =>
newPlanWithProto(
Expand Down
32 changes: 31 additions & 1 deletion spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1885,7 +1885,37 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
+ "where A.c1 = B.c1 ",
Set(
"Comet shuffle is not enabled: spark.comet.exec.shuffle.enabled is not enabled",
"make_interval is not supported")))
"make_interval is not supported")),
(
s"select * from $table LIMIT 10 OFFSET 3",
Set(
"Comet shuffle is not enabled",
"CollectLimit with non-zero offset is not supported")))
.foreach(test => {
val qry = test._1
val expected = test._2
val df = sql(qry)
df.collect() // force an execution
checkSparkAnswerAndCompareExplainPlan(df, expected)
})
}
}
}

test("explain: CollectLimit disabled") {
withSQLConf(
CometConf.COMET_ENABLED.key -> "true",
CometConf.COMET_EXEC_ENABLED.key -> "true",
CometConf.COMET_EXEC_COLLECT_LIMIT_ENABLED.key -> "false",
EXTENDED_EXPLAIN_PROVIDERS_KEY -> "org.apache.comet.ExtendedExplainInfo") {
val table = "test"
withTable(table) {
sql(s"create table $table(c0 int, c1 int , c2 float) using parquet")
sql(s"insert into $table values(0, 1, 100.000001)")
Seq(
(
s"select * from $table LIMIT 10",
Set("spark.comet.exec.collectLimit.enabled is false")))
.foreach(test => {
val qry = test._1
val expected = test._2
Expand Down
Loading