Skip to content

Commit 5b7421b

Browse files
committed
fix test failures
1 parent f2d6cff commit 5b7421b

3 files changed

Lines changed: 68 additions & 26 deletions

File tree

spark/src/main/scala/org/apache/comet/planner/CometPlanner.scala

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ case class CometPlanner(session: SparkSession) extends Rule[SparkPlan] with Logg
8989
// Phase 1 must run before BroadcastConsumerIndex.build: Phase 1's generic-exec prediction
9090
// reads children's LIKELY_COMET tags (post-order walk), and the index reads BHJ tags to
9191
// decide which broadcasts have a Comet consumer.
92-
val annotated1 = phase1LikelyComet(
92+
val annotated1: SparkPlan = phase1LikelyComet(
9393
prepared,
9494
PlanningContext(
9595
session = session,
@@ -107,8 +107,9 @@ case class CometPlanner(session: SparkSession) extends Rule[SparkPlan] with Logg
107107

108108
val annotated2 = phase2Decision(annotated1, context)
109109
val emitted = phase3Emit(annotated2, context)
110-
val reverted = revertOrphanedBroadcasts(emitted)
111-
val cleaned = cleanupLogicalLinks(reverted)
110+
val broadcastsReverted = revertBroadcastsWithoutCometConsumer(emitted)
111+
val shufflesReverted = revertRedundantColumnarShuffle(broadcastsReverted)
112+
val cleaned = cleanupLogicalLinks(shufflesReverted)
112113
val blocked = convertBlocks(cleaned)
113114
val finalPlan = postPass(blocked, context)
114115

@@ -179,7 +180,7 @@ case class CometPlanner(session: SparkSession) extends Rule[SparkPlan] with Logg
179180
* Shuffle doesn't need the equivalent revert because a Spark parent with a Comet columnar
180181
* shuffle child is handled naturally by Spark's transition insertion.
181182
*/
182-
private def revertOrphanedBroadcasts(plan: SparkPlan): SparkPlan = {
183+
private def revertBroadcastsWithoutCometConsumer(plan: SparkPlan): SparkPlan = {
183184
if (CometConf.COMET_EXEC_BROADCAST_FORCE_ENABLED.get()) {
184185
return plan
185186
}
@@ -199,6 +200,50 @@ case class CometPlanner(session: SparkSession) extends Rule[SparkPlan] with Logg
199200
out
200201
}
201202

203+
/**
204+
* Revert a `CometShuffleExchangeExec` with `CometColumnarShuffle` whose parent and child are
205+
* both non-Comet `HashAggregateExec` / `ObjectHashAggregateExec` back to the original Spark
206+
* `ShuffleExchangeExec`. Mirrors the legacy `revertRedundantColumnarShuffle` (PR #4010): the
207+
* partial-final-aggregate pattern where both aggregates fall back to Spark would otherwise keep
208+
* a columnar shuffle between them, adding row->arrow->shuffle->arrow->row conversion with no
209+
* Comet consumer on either side.
210+
*
211+
* Phase 1's optimistic-true prediction for shuffles allows the legitimate
212+
* `Sort-over-Spark-leaf` pattern to convert (the shuffle does row->arrow at exchange time). The
213+
* same optimism produces the redundant pattern when both ends remain Spark, which this pass
214+
* cleans up. Narrow match on aggregate-shuffle-aggregate keeps the intervention surgical; other
215+
* Spark-Comet-Spark sandwiches are handled by `revertBroadcastsWithoutCometConsumer` or Spark's
216+
* transition insertion.
217+
*/
218+
private def revertRedundantColumnarShuffle(plan: SparkPlan): SparkPlan = {
219+
def isAggregate(p: SparkPlan): Boolean =
220+
p.isInstanceOf[org.apache.spark.sql.execution.aggregate.HashAggregateExec] ||
221+
p.isInstanceOf[org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec]
222+
223+
def isRedundantShuffle(child: SparkPlan): Boolean = child match {
224+
case s: CometShuffleExchangeExec =>
225+
s.shuffleType == org.apache.spark.sql.comet.execution.shuffle.CometColumnarShuffle &&
226+
isAggregate(s.child)
227+
case _ => false
228+
}
229+
230+
var reverted = 0
231+
val out = plan.transform {
232+
case op if isAggregate(op) && op.children.exists(isRedundantShuffle) =>
233+
val newChildren = op.children.map {
234+
case s: CometShuffleExchangeExec
235+
if s.shuffleType == org.apache.spark.sql.comet.execution.shuffle.CometColumnarShuffle
236+
&& isAggregate(s.child) =>
237+
reverted += 1
238+
s.originalPlan.withNewChildren(Seq(s.child))
239+
case other => other
240+
}
241+
op.withNewChildren(newChildren)
242+
}
243+
if (reverted > 0) logDebug(s"CometPlanner: reverted $reverted redundant columnar shuffles")
244+
out
245+
}
246+
202247
private def isNativeCompatible(node: SparkPlan): Boolean =
203248
node.isInstanceOf[CometNativeExec] || node.getTagValue(CometTags.NATIVE_OP).isDefined
204249

spark/src/main/scala/org/apache/comet/planner/phases/Phase1LikelyComet.scala

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,23 +99,18 @@ object Phase1LikelyComet extends Logging {
9999
case _: BatchScanExec =>
100100
CometConf.COMET_NATIVE_SCAN_ENABLED.get(conf)
101101

102-
case s: ShuffleExchangeExec =>
103-
// A shuffle's LIKELY_COMET reflects whether its data source can provide native input.
104-
// Optimistic-true broke upward propagation: a Spark partial HashAgg below a shuffle could
105-
// not flag the shuffle as non-likely, which made the final HashAgg above read a stale-true
106-
// child, which made Phase 2 see `parentLikely=true` on the shuffle and convert it. The
107-
// legacy rule lived with this and added `revertRedundantColumnarShuffle` (PR #4010) as a
108-
// post-pass; the planner avoids the conversion in the first place by predicting
109-
// accurately. Phase 3 still re-checks at emit time. S2C-eligible leaves count because
110-
// Phase 2 wraps them in `CometSparkToColumnarExec`.
111-
s.children.exists(c => childCanProvideNativeInput(c, conf))
102+
case _: ShuffleExchangeExec =>
103+
// Optimistic: shuffles can absorb non-native children via `CometColumnarShuffle` (the
104+
// shuffle's row->arrow conversion runs at exchange time). Tying the prediction to the
105+
// child here would prevent the legacy `Sort over Spark LocalTableScan via Comet shuffle`
106+
// shape from working. The redundant `HashAgg(JVM) -> CometShuffle -> HashAgg(JVM)` case
107+
// that this optimism would otherwise create is handled by `revertRedundantColumnarShuffle`
108+
// as a post-pass (mirrors PR #4010). Phase 3 re-checks at emit time.
109+
true
112110

113-
case b: BroadcastExchangeExec =>
114-
// Same shape as shuffle: a broadcast over Spark-only data cannot itself go native, so
115-
// Phase 1 must report that honestly for parent BHJs' children-OK checks to be correct.
116-
// Phase 2's BroadcastConsumerIndex still gates conversion on a downstream Comet BHJ
117-
// wanting this broadcast; this only changes whether the broadcast is a candidate at all.
118-
b.children.exists(c => childCanProvideNativeInput(c, conf))
111+
case _: BroadcastExchangeExec =>
112+
// Same as shuffle. Phase 3 re-checks at emit time.
113+
true
119114

120115
// AQE stage re-entry: a prior CometPlanner pass converted an exchange, AQE materialized it
121116
// and wrapped it in a query stage. Phase 3 re-emits the stage itself as a Comet-compatible

spark/src/test/scala/org/apache/comet/planner/CometPlannerSuite.scala

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ class CometPlannerSuite extends CometTestBase {
274274

275275
// --- Demand-aware shuffle contract (translation of revert-pass tests) ----------------------
276276

277-
test("CometPlanner should not emit Comet shuffle between Spark aggregates") {
277+
test("CometPlanner should revert redundant Comet shuffle between Spark aggregates") {
278278
withTempView("test_data") {
279279
createTestDataFrame.createOrReplaceTempView("test_data")
280280

@@ -284,11 +284,13 @@ class CometPlannerSuite extends CometTestBase {
284284
assert(countOperators(sparkPlan, classOf[ShuffleExchangeExec]) == 1)
285285
assert(countOperators(sparkPlan, classOf[HashAggregateExec]) == 2)
286286

287-
// Disable partial aggregate so both aggregates fall back to Spark JVM. The planner's
288-
// Phase 2 demand-aware rule should emit Passthrough for the shuffle (selfLikely=true but
289-
// parent/child both have LIKELY_COMET=false), so the shuffle stays Spark with no revert
290-
// pass needed. Minimal reproducer for the redundant-shuffle pattern that PR #4010 fixed
291-
// for the legacy rule via revertRedundantColumnarShuffle.
287+
// Disable partial aggregate so both aggregates fall back to Spark JVM. Phase 1's
288+
// optimistic shuffle prediction lets Phase 2 speculatively convert the shuffle (needed
289+
// for the legitimate Sort-over-Spark-leaf shape). The post-Phase-3
290+
// `revertRedundantColumnarShuffle` pass then detects the resulting
291+
// `HashAgg(JVM) -> CometColumnarShuffle -> HashAgg(JVM)` pattern and reverts the shuffle
292+
// to plain Spark, avoiding the row->arrow->shuffle->arrow->row round-trip with no Comet
293+
// consumer on either side. Mirrors PR #4010.
292294
withSQLConf(
293295
CometConf.COMET_ENABLE_PARTIAL_HASH_AGGREGATE.key -> "false",
294296
CometConf.COMET_EXEC_LOCAL_TABLE_SCAN_ENABLED.key -> "true") {

0 commit comments

Comments
 (0)