Skip to content

Commit 1c66f44

Browse files
committed
refactor: rename regexp.useJVM boolean to regexp.engine enum (rust|java)
1 parent 23a9e52 commit 1c66f44

4 files changed

Lines changed: 14 additions & 12 deletions

File tree

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -383,17 +383,19 @@ object CometConf extends ShimCometConf {
383383
.booleanConf
384384
.createWithDefault(false)
385385

386-
val COMET_REGEXP_USE_JVM: ConfigEntry[Boolean] =
387-
conf("spark.comet.exec.regexp.useJVM")
386+
val COMET_REGEXP_ENGINE: ConfigEntry[String] =
387+
conf("spark.comet.exec.regexp.engine")
388388
.category(CATEGORY_EXEC)
389389
.doc(
390-
"Experimental. When true, route supported regular-expression expressions through a " +
391-
"JVM-side UDF (java.util.regex.Pattern) for Spark-compatible semantics, at the cost " +
392-
"of JNI roundtrips per batch. When false, falls back to whichever native or Spark " +
393-
"path the expression normally selects. Only RLike is routed today; additional " +
390+
"Experimental. Selects the engine used to evaluate supported regular-expression " +
391+
"expressions. `rust` uses the native DataFusion regexp engine. `java` routes through " +
392+
"a JVM-side UDF (java.util.regex.Pattern) for Spark-compatible semantics, at the " +
393+
"cost of JNI roundtrips per batch. Only RLike is routed today; additional " +
394394
"expressions may opt in over time.")
395-
.booleanConf
396-
.createWithDefault(false)
395+
.stringConf
396+
.transform(_.toLowerCase(Locale.ROOT))
397+
.checkValues(Set("rust", "java"))
398+
.createWithDefault("rust")
397399

398400
val COMET_EXEC_SHUFFLE_WITH_HASH_PARTITIONING_ENABLED: ConfigEntry[Boolean] =
399401
conf("spark.comet.native.shuffle.partitioning.hash.enabled")

spark/src/main/scala/org/apache/comet/serde/strings.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,15 @@ object CometRLike extends CometExpressionSerde[RLike] {
267267
"Uses Rust regexp engine, which has different behavior to Java regexp engine")
268268

269269
override def getSupportLevel(expr: RLike): SupportLevel = {
270-
if (CometConf.COMET_REGEXP_USE_JVM.get()) {
270+
if (CometConf.COMET_REGEXP_ENGINE.get() == "java") {
271271
Compatible(None)
272272
} else {
273273
super.getSupportLevel(expr)
274274
}
275275
}
276276

277277
override def convert(expr: RLike, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = {
278-
if (CometConf.COMET_REGEXP_USE_JVM.get()) {
278+
if (CometConf.COMET_REGEXP_ENGINE.get() == "java") {
279279
convertViaJvmUdf(expr, inputs, binding)
280280
} else {
281281
convertViaNativeRegex(expr, inputs, binding)

spark/src/test/scala/org/apache/comet/CometRegExpJvmSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
2727
class CometRegExpJvmSuite extends CometTestBase with AdaptiveSparkPlanHelper {
2828

2929
override protected def sparkConf: SparkConf =
30-
super.sparkConf.set("spark.comet.exec.regexp.useJVM", "true")
30+
super.sparkConf.set("spark.comet.exec.regexp.engine", "java")
3131

3232
test("rlike: Java regex semantics, with flag on") {
3333
withTable("t") {

spark/src/test/scala/org/apache/spark/sql/benchmark/CometRegExpBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ object CometRegExpBenchmark extends CometBenchmarkBase {
110110
}
111111

112112
benchmark.addCase("Comet (Exec, JVM regex)") { _ =>
113-
val configs = baseExec ++ Map(CometConf.COMET_REGEXP_USE_JVM.key -> "true")
113+
val configs = baseExec ++ Map(CometConf.COMET_REGEXP_ENGINE.key -> "java")
114114
withSQLConf(configs.toSeq: _*) {
115115
spark.sql(query).noop()
116116
}

0 commit comments

Comments
 (0)