Skip to content

Commit 9cb4927

Browse files
authored
feat: enable JVM Scala UDF codegen dispatch by default (#4514)
1 parent 9d79c67 commit 9cb4927

10 files changed

Lines changed: 224 additions & 27 deletions

File tree

dev/diffs/3.4.3.diff

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ index f33432ddb6f..b375e285dde 100644
497497
}
498498
assert(scanOption.isDefined)
499499
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
500-
index a6b295578d6..91acca4306f 100644
500+
index a6b295578d6..1167bbe6554 100644
501501
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
502502
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
503503
@@ -260,7 +260,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
@@ -925,7 +925,7 @@ index b5b34922694..a72403780c4 100644
925925
protected val baseResourcePath = {
926926
// use the same way as `SQLQueryTestSuite` to get the resource path
927927
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
928-
index 525d97e4998..f600e162da3 100644
928+
index 525d97e4998..aded8906d75 100644
929929
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
930930
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
931931
@@ -1508,7 +1508,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
@@ -938,7 +938,24 @@ index 525d97e4998..f600e162da3 100644
938938
AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
939939
sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
940940
}
941-
@@ -3730,7 +3731,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
941+
@@ -1960,8 +1961,15 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
942+
countAcc.add(1)
943+
x
944+
})
945+
+ // Comet's `CometProject` and `CometHashAggregate` do not implement Spark's cross-sibling
946+
+ // subexpression elimination over `ScalaUDF`, so each reference invokes the UDF body
947+
+ // separately. The other call sites in this test pass against Comet because the source
948+
+ // (`testData2`, a `LocalRelation`) is not Comet-scannable and the project runs on Spark's
949+
+ // path; the `agg` case routes through `CometHashAggregate` once an Exchange enters the plan.
950+
+ // Tracking issue: https://github.com/apache/datafusion-comet/issues/4516
951+
verifyCallCount(
952+
- df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0), 1)
953+
+ df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0),
954+
+ if (isCometEnabled) 3 else 1)
955+
956+
verifyCallCount(
957+
df.selectExpr("testUdf(a + 1) + testUdf(1 + a)", "testUdf(a + 1)"), Row(4, 2), 1)
958+
@@ -3730,7 +3738,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
942959
}
943960
}
944961

@@ -948,6 +965,36 @@ index 525d97e4998..f600e162da3 100644
948965
val sc = spark.sparkContext
949966
val hiveVersion = "2.3.9"
950967
// transitive=false, only download specified jar
968+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
969+
index 2dabcf01be7..8fcec0d1ce4 100644
970+
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
971+
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
972+
@@ -491,8 +491,23 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
973+
s"Schema did not match for query #$i\n${expected.sql}: $output") {
974+
output.schema
975+
}
976+
- assertResult(expected.output, s"Result did not match" +
977+
- s" for query #$i\n${expected.sql}") { output.output }
978+
+ // Comet may surface errors as `CometNativeException` instead of the matching Spark
979+
+ // exception class when a `ScalaUDF` dispatched into the native plan evaluates a
980+
+ // divide-by-zero (DataFusion wraps the typed error so the JNI bridge cannot downcast it).
981+
+ // Same category, different surface. Collapse both sides to a placeholder when this happens
982+
+ // so the literal compare passes.
983+
+ // Tracking issue: https://github.com/apache/datafusion-comet/issues/4517
984+
+ val (expectedOut, actualOut) = if (isCometEnabled &&
985+
+ expected.output.startsWith("org.apache.spark.SparkArithmeticException") &&
986+
+ expected.output.contains("\"DIVIDE_BY_ZERO\"") &&
987+
+ output.output.startsWith("org.apache.comet.CometNativeException") &&
988+
+ output.output.contains("DivideByZero")) {
989+
+ ("[DIVIDE_BY_ZERO]", "[DIVIDE_BY_ZERO]")
990+
+ } else {
991+
+ (expected.output, output.output)
992+
+ }
993+
+ assertResult(expectedOut, s"Result did not match" +
994+
+ s" for query #$i\n${expected.sql}") { actualOut }
995+
}
996+
}
997+
}
951998
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
952999
index 48ad10992c5..51d1ee65422 100644
9531000
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala

dev/diffs/3.5.8.diff

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ index c26757c9cff..d55775f09d7 100644
932932
protected val baseResourcePath = {
933933
// use the same way as `SQLQueryTestSuite` to get the resource path
934934
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
935-
index 3cf2bfd17ab..a3effb1eeb8 100644
935+
index 3cf2bfd17ab..5bcf9478e9b 100644
936936
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
937937
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
938938
@@ -1521,7 +1521,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
@@ -945,7 +945,24 @@ index 3cf2bfd17ab..a3effb1eeb8 100644
945945
AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
946946
sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
947947
}
948-
@@ -3750,7 +3751,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
948+
@@ -1979,8 +1980,15 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
949+
countAcc.add(1)
950+
x
951+
})
952+
+ // Comet's `CometProject` and `CometHashAggregate` do not implement Spark's cross-sibling
953+
+ // subexpression elimination over `ScalaUDF`, so each reference invokes the UDF body
954+
+ // separately. The other call sites in this test pass against Comet because the source
955+
+ // (`testData2`, a `LocalRelation`) is not Comet-scannable and the project runs on Spark's
956+
+ // path; the `agg` case routes through `CometHashAggregate` once an Exchange enters the plan.
957+
+ // Tracking issue: https://github.com/apache/datafusion-comet/issues/4516
958+
verifyCallCount(
959+
- df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0), 1)
960+
+ df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0),
961+
+ if (isCometEnabled) 3 else 1)
962+
963+
verifyCallCount(
964+
df.selectExpr("testUdf(a + 1) + testUdf(1 + a)", "testUdf(a + 1)"), Row(4, 2), 1)
965+
@@ -3750,7 +3758,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
949966
}
950967
}
951968

@@ -955,6 +972,37 @@ index 3cf2bfd17ab..a3effb1eeb8 100644
955972
val sc = spark.sparkContext
956973
val hiveVersion = "2.3.9"
957974
// transitive=false, only download specified jar
975+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
976+
index 71af1fd69c3..81a04c93c9c 100644
977+
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
978+
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
979+
@@ -872,9 +872,24 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
980+
s"Schema did not match for query #$i\n${expected.sql}: $output") {
981+
output.schema
982+
}
983+
- assertResult(expected.output, s"Result did not match" +
984+
+ // Comet may surface errors as `CometNativeException` instead of the matching Spark
985+
+ // exception class when a `ScalaUDF` dispatched into the native plan evaluates a
986+
+ // divide-by-zero (DataFusion wraps the typed error so the JNI bridge cannot downcast it).
987+
+ // Same category, different surface. Collapse both sides to a placeholder when this happens
988+
+ // so the literal compare passes.
989+
+ // Tracking issue: https://github.com/apache/datafusion-comet/issues/4517
990+
+ val (expectedOut, actualOut) = if (isCometEnabled &&
991+
+ expected.output.startsWith("org.apache.spark.SparkArithmeticException") &&
992+
+ expected.output.contains("\"DIVIDE_BY_ZERO\"") &&
993+
+ output.output.startsWith("org.apache.comet.CometNativeException") &&
994+
+ output.output.contains("DivideByZero")) {
995+
+ ("[DIVIDE_BY_ZERO]", "[DIVIDE_BY_ZERO]")
996+
+ } else {
997+
+ (expected.output, output.output)
998+
+ }
999+
+ assertResult(expectedOut, s"Result did not match" +
1000+
s" for query #$i\n${expected.sql}") {
1001+
- output.output
1002+
+ actualOut
1003+
}
1004+
}
1005+
}
9581006
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
9591007
index 8b4ac474f87..3f79f20822f 100644
9601008
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala

dev/diffs/4.0.2.diff

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1079,7 +1079,7 @@ index ad424b3a7cc..4ece0117a34 100644
10791079
protected val baseResourcePath = {
10801080
// use the same way as `SQLQueryTestSuite` to get the resource path
10811081
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
1082-
index f294ff81021..7775027bcee 100644
1082+
index f294ff81021..02d72be8d29 100644
10831083
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
10841084
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
10851085
@@ -1524,7 +1524,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
@@ -1092,6 +1092,54 @@ index f294ff81021..7775027bcee 100644
10921092
AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
10931093
sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
10941094
}
1095+
@@ -1985,8 +1986,15 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
1096+
countAcc.add(1)
1097+
x
1098+
})
1099+
+ // Comet's `CometProject` and `CometHashAggregate` do not implement Spark's cross-sibling
1100+
+ // subexpression elimination over `ScalaUDF`, so each reference invokes the UDF body
1101+
+ // separately. The other call sites in this test pass against Comet because the source
1102+
+ // (`testData2`, a `LocalRelation`) is not Comet-scannable and the project runs on Spark's
1103+
+ // path; the `agg` case routes through `CometHashAggregate` once an Exchange enters the plan.
1104+
+ // Tracking issue: https://github.com/apache/datafusion-comet/issues/4516
1105+
verifyCallCount(
1106+
- df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0), 1)
1107+
+ df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0),
1108+
+ if (isCometEnabled) 3 else 1)
1109+
1110+
verifyCallCount(
1111+
df.selectExpr("testUdf(a + 1) + testUdf(1 + a)", "testUdf(a + 1)"), Row(4, 2), 1)
1112+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
1113+
index 575a4ae69d1..129d9f27232 100644
1114+
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
1115+
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
1116+
@@ -679,9 +679,24 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
1117+
s"Schema did not match for query #$i\n${expected.sql}: $output") {
1118+
output.schema
1119+
}
1120+
- assertResult(expected.output, s"Result did not match" +
1121+
+ // Comet may surface errors as `CometNativeException` instead of the matching Spark
1122+
+ // exception class when a `ScalaUDF` dispatched into the native plan evaluates a
1123+
+ // divide-by-zero (DataFusion wraps the typed error so the JNI bridge cannot downcast it).
1124+
+ // Same category, different surface. Collapse both sides to a placeholder when this happens
1125+
+ // so the literal compare passes.
1126+
+ // Tracking issue: https://github.com/apache/datafusion-comet/issues/4517
1127+
+ val (expectedOut, actualOut) = if (isCometEnabled &&
1128+
+ expected.output.startsWith("org.apache.spark.SparkArithmeticException") &&
1129+
+ expected.output.contains("\"DIVIDE_BY_ZERO\"") &&
1130+
+ output.output.startsWith("org.apache.comet.CometNativeException") &&
1131+
+ output.output.contains("DivideByZero")) {
1132+
+ ("[DIVIDE_BY_ZERO]", "[DIVIDE_BY_ZERO]")
1133+
+ } else {
1134+
+ (expected.output, output.output)
1135+
+ }
1136+
+ assertResult(expectedOut, s"Result did not match" +
1137+
s" for query #$i\n${expected.sql}") {
1138+
- output.output
1139+
+ actualOut
1140+
}
1141+
}
1142+
}
10951143
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
10961144
index c1c041509c3..7d463e4b85e 100644
10971145
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala

dev/diffs/4.1.1.diff

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,7 @@ index e4b5e10f7c3..c6efde09c8a 100644
11501150
protected val baseResourcePath = {
11511151
// use the same way as `SQLQueryTestSuite` to get the resource path
11521152
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
1153-
index 74cdee49e55..3decf393ed0 100644
1153+
index 74cdee49e55..f7452c9abb7 100644
11541154
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
11551155
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
11561156
@@ -1521,7 +1521,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
@@ -1163,8 +1163,25 @@ index 74cdee49e55..3decf393ed0 100644
11631163
AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
11641164
sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
11651165
}
1166+
@@ -1982,8 +1983,15 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
1167+
countAcc.add(1)
1168+
x
1169+
})
1170+
+ // Comet's `CometProject` and `CometHashAggregate` do not implement Spark's cross-sibling
1171+
+ // subexpression elimination over `ScalaUDF`, so each reference invokes the UDF body
1172+
+ // separately. The other call sites in this test pass against Comet because the source
1173+
+ // (`testData2`, a `LocalRelation`) is not Comet-scannable and the project runs on Spark's
1174+
+ // path; the `agg` case routes through `CometHashAggregate` once an Exchange enters the plan.
1175+
+ // Tracking issue: https://github.com/apache/datafusion-comet/issues/4516
1176+
verifyCallCount(
1177+
- df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0), 1)
1178+
+ df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0),
1179+
+ if (isCometEnabled) 3 else 1)
1180+
1181+
verifyCallCount(
1182+
df.selectExpr("testUdf(a + 1) + testUdf(1 + a)", "testUdf(a + 1)"), Row(4, 2), 1)
11661183
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
1167-
index 23f0144dcec..df845f7295a 100644
1184+
index 23f0144dcec..2586d93d630 100644
11681185
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
11691186
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
11701187
@@ -166,7 +166,16 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
@@ -1185,6 +1202,33 @@ index 23f0144dcec..df845f7295a 100644
11851202
) ++ otherIgnoreList
11861203
/** List of test cases that require TPCDS table schemas to be loaded. */
11871204
private def requireTPCDSCases: Seq[String] = Seq("pipe-operators.sql")
1205+
@@ -682,9 +691,24 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
1206+
s"Schema did not match for query #$i\n${expected.sql}: $output") {
1207+
output.schema
1208+
}
1209+
- assertResult(expected.output, s"Result did not match" +
1210+
+ // Comet may surface errors as `CometNativeException` instead of the matching Spark
1211+
+ // exception class when a `ScalaUDF` dispatched into the native plan evaluates a
1212+
+ // divide-by-zero (DataFusion wraps the typed error so the JNI bridge cannot downcast it).
1213+
+ // Same category, different surface. Collapse both sides to a placeholder when this happens
1214+
+ // so the literal compare passes.
1215+
+ // Tracking issue: https://github.com/apache/datafusion-comet/issues/4517
1216+
+ val (expectedOut, actualOut) = if (isCometEnabled &&
1217+
+ expected.output.startsWith("org.apache.spark.SparkArithmeticException") &&
1218+
+ expected.output.contains("\"DIVIDE_BY_ZERO\"") &&
1219+
+ output.output.startsWith("org.apache.comet.CometNativeException") &&
1220+
+ output.output.contains("DivideByZero")) {
1221+
+ ("[DIVIDE_BY_ZERO]", "[DIVIDE_BY_ZERO]")
1222+
+ } else {
1223+
+ (expected.output, output.output)
1224+
+ }
1225+
+ assertResult(expectedOut, s"Result did not match" +
1226+
s" for query #$i\n${expected.sql}") {
1227+
- output.output
1228+
+ actualOut
1229+
}
1230+
}
1231+
}
11881232
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
11891233
index 66826a9ca76..ab4265a5fb9 100644
11901234
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala

docs/source/user-guide/latest/iceberg.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,12 @@ Iceberg ships several `ScalaUDF`s that surface in user queries and maintenance a
157157
(`INT_ORDERED_BYTES`, `LONG_ORDERED_BYTES`, ..., `INTERLEAVE_BYTES`) over the sort key columns
158158
during compaction.
159159

160-
By default these UDFs cause the enclosing operator to fall back to Spark, which forces a
161-
columnar-to-row roundtrip and demotes the surrounding shuffle from `CometExchange` to
162-
`CometColumnarExchange`. Enabling the experimental
163-
[Scala UDF and Java UDF Support](scala_java_udfs.md) feature
164-
(`spark.comet.exec.scalaUDF.codegen.enabled=true`) routes these UDFs through native execution so
165-
the project, exchange, and sort operators around them stay on the Comet path end-to-end.
160+
[Scala UDF and Java UDF Support](scala_java_udfs.md) is enabled by default
161+
(`spark.comet.exec.scalaUDF.codegen.enabled=true`), so these UDFs run through native execution and
162+
the project, exchange, and sort operators around them stay on the Comet path end-to-end. Setting
163+
`spark.comet.exec.scalaUDF.codegen.enabled=false` causes the enclosing operator to fall back to
164+
Spark, which forces a columnar-to-row roundtrip and demotes the surrounding shuffle from
165+
`CometExchange` to `CometColumnarExchange`.
166166

167167
### Task input metrics
168168

docs/source/user-guide/latest/scala_java_udfs.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ Comet executes Spark's Scala and Java [scalar user-defined functions (UDFs)](htt
2323

2424
This page covers Spark's `ScalaUDF` (Scala `udf(...)`, `spark.udf.register(...)` over Scala or Java functional interfaces, and SQL `CREATE FUNCTION ... AS 'com.example.MyUDF'`). Other UDF kinds (Python / Pandas, Hive, aggregate) are out of scope and continue to fall back to Spark.
2525

26-
This feature is experimental and disabled by default.
26+
This feature is enabled by default. Set `spark.comet.exec.scalaUDF.codegen.enabled` to `false` to route plans containing a `ScalaUDF` back to Spark for the enclosing operator.
2727

2828
## Configuration
2929

3030
| Key | Default | Description |
3131
| ------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------ |
32-
| `spark.comet.exec.scalaUDF.codegen.enabled` | `false` | When `true`, eligible `ScalaUDF`s run on the Comet path. When `false`, the enclosing operator falls back to Spark. |
32+
| `spark.comet.exec.scalaUDF.codegen.enabled` | `true` | When `true`, eligible `ScalaUDF`s run on the Comet path. When `false`, the enclosing operator falls back to Spark. |
3333

3434
## Supported
3535

0 commit comments

Comments
 (0)