Skip to content

Commit e56d8e5

Browse files
committed
test: remove "Comet (Scan)" cases from microbenchmarks
The "Comet (Scan)" case set COMET_ENABLED=true with COMET_EXEC_ENABLED=false, intending to isolate scan performance. With spark.comet.scan.impl=auto (the default), CometScanRule.nativeDataFusionScan refuses to install when exec is disabled, so the case actually measured native_iceberg_compat scan plus Spark ColumnarToRow. Comparing it against the other Comet case (which uses native_datafusion plus CometNativeColumnarToRow) made the result a proxy for scan-impl choice rather than the intended isolation. Drop the "Comet (Scan)" cases everywhere and rename the combined case to "Comet". Update doc comments accordingly.
1 parent 6f50ccf commit e56d8e5

5 files changed

Lines changed: 12 additions & 55 deletions

File tree

spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ trait CometBenchmarkBase
9797
}
9898

9999
/**
100-
* Runs an expression benchmark with standard cases: Spark, Comet (Scan), Comet (Scan + Exec).
101-
* This provides a consistent benchmark structure for expression evaluation.
100+
* Runs an expression benchmark with standard cases: Spark, Comet. This provides a consistent
101+
* benchmark structure for expression evaluation.
102102
*
103103
* @param name
104104
* Benchmark name
@@ -107,7 +107,7 @@ trait CometBenchmarkBase
107107
* @param query
108108
* SQL query to benchmark
109109
* @param extraCometConfigs
110-
* Additional configurations to apply for Comet cases (optional)
110+
* Additional configurations to apply for the Comet case (optional)
111111
*/
112112
final def runExpressionBenchmark(
113113
name: String,
@@ -122,14 +122,6 @@ trait CometBenchmarkBase
122122
}
123123
}
124124

125-
benchmark.addCase("Comet (Scan)") { _ =>
126-
withSQLConf(
127-
CometConf.COMET_ENABLED.key -> "true",
128-
CometConf.COMET_EXEC_ENABLED.key -> "false") {
129-
spark.sql(query).noop()
130-
}
131-
}
132-
133125
val cometExecConfigs = Map(
134126
CometConf.COMET_ENABLED.key -> "true",
135127
CometConf.COMET_EXEC_ENABLED.key -> "true",
@@ -158,7 +150,7 @@ trait CometBenchmarkBase
158150
}
159151
}
160152

161-
benchmark.addCase("Comet (Scan + Exec)") { _ =>
153+
benchmark.addCase("Comet") { _ =>
162154
withSQLConf(cometExecConfigs.toSeq: _*) {
163155
spark.sql(query).noop()
164156
}

spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import org.apache.comet.CometConf
3131
* @param query
3232
* SQL query to benchmark
3333
* @param extraCometConfigs
34-
* Additional Comet configurations for the scan+exec case
34+
* Additional Comet configurations for the Comet case
3535
*/
3636
case class CsvExprConfig(
3737
name: String,

spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
8484
spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop()
8585
}
8686

87-
benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
88-
withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
89-
spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop()
90-
}
91-
}
92-
93-
benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
87+
benchmark.addCase("SQL Parquet - Comet") { _ =>
9488
withSQLConf(
9589
CometConf.COMET_ENABLED.key -> "true",
9690
CometConf.COMET_EXEC_ENABLED.key -> "true") {
@@ -128,15 +122,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
128122
"col2, col3 FROM parquetV1Table")
129123
}
130124

131-
benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
132-
withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
133-
spark.sql(
134-
"SELECT (SELECT max(col1) AS parquetV1Table FROM parquetV1Table) AS a, " +
135-
"col2, col3 FROM parquetV1Table")
136-
}
137-
}
138-
139-
benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
125+
benchmark.addCase("SQL Parquet - Comet") { _ =>
140126
withSQLConf(
141127
CometConf.COMET_ENABLED.key -> "true",
142128
CometConf.COMET_EXEC_ENABLED.key -> "true",
@@ -164,13 +150,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
164150
spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop()
165151
}
166152

167-
benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
168-
withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
169-
spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop()
170-
}
171-
}
172-
173-
benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
153+
benchmark.addCase("SQL Parquet - Comet") { _ =>
174154
withSQLConf(
175155
CometConf.COMET_ENABLED.key -> "true",
176156
CometConf.COMET_EXEC_ENABLED.key -> "true") {
@@ -199,16 +179,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
199179
.noop()
200180
}
201181

202-
benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
203-
withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
204-
spark
205-
.sql("SELECT col1, col2, SUM(col3) FROM parquetV1Table " +
206-
"GROUP BY col1, col2 GROUPING SETS ((col1), (col2))")
207-
.noop()
208-
}
209-
}
210-
211-
benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
182+
benchmark.addCase("SQL Parquet - Comet") { _ =>
212183
withSQLConf(
213184
CometConf.COMET_ENABLED.key -> "true",
214185
CometConf.COMET_EXEC_ENABLED.key -> "true") {
@@ -251,13 +222,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
251222
spark.sql(query).noop()
252223
}
253224

254-
benchmark.addCase("SQL Parquet - Comet (Scan) (BloomFilterAgg)") { _ =>
255-
withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
256-
spark.sql(query).noop()
257-
}
258-
}
259-
260-
benchmark.addCase("SQL Parquet - Comet (Scan, Exec) (BloomFilterAgg)") { _ =>
225+
benchmark.addCase("SQL Parquet - Comet (BloomFilterAgg)") { _ =>
261226
withSQLConf(
262227
CometConf.COMET_ENABLED.key -> "true",
263228
CometConf.COMET_EXEC_ENABLED.key -> "true") {

spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ import org.apache.comet.CometConf
3232
* @param query
3333
* SQL query to benchmark
3434
* @param extraCometConfigs
35-
* Additional Comet configurations for the scan+exec case
35+
* Additional Comet configurations for the Comet case
3636
*/
3737
case class JsonExprConfig(
3838
name: String,

spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import org.apache.comet.CometConf
2828
* @param query
2929
* SQL query to benchmark
3030
* @param extraCometConfigs
31-
* Additional Comet configurations for the scan+exec case
31+
* Additional Comet configurations for the Comet case
3232
*/
3333
case class StringExprConfig(
3434
name: String,

0 commit comments

Comments
 (0)