Skip to content

Commit 76418c6

Browse files
committed
test: add CometRegExpBenchmark covering all rlike modes
1 parent e1b9b2a commit 76418c6

1 file changed

Lines changed: 121 additions & 0 deletions

File tree

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
import org.apache.spark.benchmark.Benchmark
23+
24+
import org.apache.comet.CometConf
25+
26+
/**
27+
* Configuration for a single rlike pattern under benchmark.
28+
*
29+
* @param name
30+
* short label for the pattern
31+
* @param pattern
32+
* the regex literal supplied to rlike
33+
*/
34+
case class RegExpPattern(name: String, pattern: String)
35+
36+
/**
37+
* Benchmark `rlike` across all execution modes:
38+
* - Spark
39+
* - Comet (Scan only)
40+
* - Comet (Scan + Exec, native Rust regex)
41+
* - Comet (Scan + Exec, JVM-side java.util.regex)
42+
*
43+
* To run:
44+
* {{{
45+
* SPARK_GENERATE_BENCHMARK_FILES=1 \
46+
* make benchmark-org.apache.spark.sql.benchmark.CometRegExpBenchmark
47+
* }}}
48+
*
49+
* Results land in `spark/benchmarks/CometRegExpBenchmark-**results.txt`.
50+
*/
51+
object CometRegExpBenchmark extends CometBenchmarkBase {
52+
53+
// Patterns chosen to span common rlike shapes. Avoid Java-only constructs
54+
// that the native (Rust) path cannot accept, since those would be skipped
55+
// rather than benchmarked in the native case.
56+
private val patterns = List(
57+
RegExpPattern("character_class", "[0-9]+"),
58+
RegExpPattern("anchored", "^[0-9]"),
59+
RegExpPattern("alternation", "abc|def|ghi"),
60+
RegExpPattern("multi_class", "[a-zA-Z][0-9]+"),
61+
RegExpPattern("repetition", "(ab){2,}"))
62+
63+
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
64+
runBenchmarkWithTable("rlike modes", 1024) { v =>
65+
withTempPath { dir =>
66+
withTempTable("parquetV1Table") {
67+
prepareTable(
68+
dir,
69+
spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 10) AS c1 FROM $tbl"))
70+
71+
patterns.foreach { p =>
72+
val query = s"select c1 rlike '${p.pattern}' from parquetV1Table"
73+
runBenchmark(p.name) {
74+
runRLikeModes(p.name, v, query)
75+
}
76+
}
77+
}
78+
}
79+
}
80+
}
81+
82+
/** Runs all four modes for a single rlike query. */
83+
private def runRLikeModes(name: String, cardinality: Long, query: String): Unit = {
84+
val benchmark = new Benchmark(name, cardinality, output = output)
85+
86+
benchmark.addCase("Spark") { _ =>
87+
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
88+
spark.sql(query).noop()
89+
}
90+
}
91+
92+
benchmark.addCase("Comet (Scan)") { _ =>
93+
withSQLConf(
94+
CometConf.COMET_ENABLED.key -> "true",
95+
CometConf.COMET_EXEC_ENABLED.key -> "false") {
96+
spark.sql(query).noop()
97+
}
98+
}
99+
100+
val baseExec = Map(
101+
CometConf.COMET_ENABLED.key -> "true",
102+
CometConf.COMET_EXEC_ENABLED.key -> "true",
103+
"spark.sql.optimizer.constantFolding.enabled" -> "false")
104+
105+
benchmark.addCase("Comet (Exec, native Rust regex)") { _ =>
106+
val configs = baseExec ++ Map(CometConf.getExprAllowIncompatConfigKey("regexp") -> "true")
107+
withSQLConf(configs.toSeq: _*) {
108+
spark.sql(query).noop()
109+
}
110+
}
111+
112+
benchmark.addCase("Comet (Exec, JVM regex)") { _ =>
113+
val configs = baseExec ++ Map(CometConf.COMET_REGEXP_USE_JVM.key -> "true")
114+
withSQLConf(configs.toSeq: _*) {
115+
spark.sql(query).noop()
116+
}
117+
}
118+
119+
benchmark.run()
120+
}
121+
}

0 commit comments

Comments
 (0)