apache
diff --git a/‎.github/workflows/pr_benchmark_check.yml‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/pr_benchmark_check.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎common/src/main/java/org/apache/comet/udf/CometUdfBridge.java‎
Lines changed: 22 additions & 36 deletions b/‎common/src/main/java/org/apache/comet/udf/CometUdfBridge.java‎
Lines changed: 22 additions & 36 deletions
diff --git a/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 3 additions & 6 deletions b/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎common/src/main/scala/org/apache/comet/udf/CometUDF.scala‎
Lines changed: 2 additions & 2 deletions b/‎common/src/main/scala/org/apache/comet/udf/CometUDF.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/diffs/3.4.3.diff‎
Lines changed: 18 additions & 60 deletions b/‎dev/diffs/3.4.3.diff‎
Lines changed: 18 additions & 60 deletions
diff --git a/‎dev/diffs/3.5.8.diff‎
Lines changed: 18 additions & 60 deletions b/‎dev/diffs/3.5.8.diff‎
Lines changed: 18 additions & 60 deletions
@@ -84,5 +84,9 @@ jobs:
             ${{ runner.os }}-benchmark-maven-
 
       - name: Check Scala compilation and linting
+        # Pin to spark-4.0 (Scala 2.13.16) because the default profile is now
+        # spark-4.1 / Scala 2.13.17, and semanticdb-scalac_2.13.17 is not yet
+        # published, which breaks `-Psemanticdb`. See pr_build_linux.yml for
+        # the same exclusion in the main lint matrix.
         run: |
-          ./mvnw -B compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Psemanticdb -DskipTests
+          ./mvnw -B compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Psemanticdb -Pspark-4.0 -DskipTests
@@ -19,8 +19,7 @@
 
 package org.apache.comet.udf;
 
-import java.util.LinkedHashMap;
-import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.arrow.c.ArrowArray;
 import org.apache.arrow.c.ArrowSchema;
@@ -36,23 +35,10 @@
  */
 public class CometUdfBridge {
 
-  // Per-thread, bounded LRU of UDF instances keyed by class name. Comet
-  // native execution threads (Tokio/DataFusion worker pool) are reused
-  // across tasks within an executor, so the effective lifetime of cached
-  // entries is the worker thread (i.e. the executor JVM). This is fine for
-  // stateless UDFs like ArrayExistsUDF; future stateful UDFs would need
-  // explicit per-task isolation.
-  private static final int CACHE_CAPACITY = 64;
-
-  private static final ThreadLocal<LinkedHashMap<String, CometUDF>> INSTANCES =
-      ThreadLocal.withInitial(
-          () ->
-              new LinkedHashMap<String, CometUDF>(CACHE_CAPACITY, 0.75f, true) {
-                @Override
-                protected boolean removeEldestEntry(Map.Entry<String, CometUDF> eldest) {
-                  return size() > CACHE_CAPACITY;
-                }
-              });
+  // Process-wide cache of UDF instances keyed by class name. CometUDF
+  // implementations are required to be stateless (see CometUDF), so a
+  // single shared instance per class is safe across native worker threads.
+  private static final ConcurrentHashMap<String, CometUDF> INSTANCES = new ConcurrentHashMap<>();
 
   /**
    * Called from native via JNI.
@@ -69,23 +55,23 @@ public static void evaluate(
       long[] inputSchemaPtrs,
       long outArrayPtr,
       long outSchemaPtr) {
-    LinkedHashMap<String, CometUDF> cache = INSTANCES.get();
-    CometUDF udf = cache.get(udfClassName);
-    if (udf == null) {
-      try {
-        // Resolve via the executor's context classloader so user-supplied UDF jars
-        // (added via spark.jars / --jars) are visible.
-        ClassLoader cl = Thread.currentThread().getContextClassLoader();
-        if (cl == null) {
-          cl = CometUdfBridge.class.getClassLoader();
-        }
-        udf =
-            (CometUDF) Class.forName(udfClassName, true, cl).getDeclaredConstructor().newInstance();
-      } catch (ReflectiveOperationException e) {
-        throw new RuntimeException("Failed to instantiate CometUDF: " + udfClassName, e);
-      }
-      cache.put(udfClassName, udf);
-    }
+    CometUDF udf =
+        INSTANCES.computeIfAbsent(
+            udfClassName,
+            name -> {
+              try {
+                // Resolve via the executor's context classloader so user-supplied UDF jars
+                // (added via spark.jars / --jars) are visible.
+                ClassLoader cl = Thread.currentThread().getContextClassLoader();
+                if (cl == null) {
+                  cl = CometUdfBridge.class.getClassLoader();
+                }
+                return (CometUDF)
+                    Class.forName(name, true, cl).getDeclaredConstructor().newInstance();
+              } catch (ReflectiveOperationException e) {
+                throw new RuntimeException("Failed to instantiate CometUDF: " + name, e);
+              }
+            });
 
     BufferAllocator allocator = org.apache.comet.package$.MODULE$.CometArrowAllocator();
 
 
@@ -94,12 +94,9 @@ object CometConf extends ShimCometConf {
     .createWithEnvVarOrDefault("ENABLE_COMET", true)
 
   val COMET_NATIVE_SCAN_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.scan.enabled")
-    .category(CATEGORY_SCAN)
-    .doc(
-      "Whether to enable native scans. When this is turned on, Spark will use Comet to " +
-        "read supported data sources (currently only Parquet is supported natively). Note " +
-        "that to enable native vectorized execution, both this config and " +
-        "`spark.comet.exec.enabled` need to be enabled.")
+    .category(CATEGORY_TESTING)
+    .doc("Whether to enable native scans. Intended for use in Comet's own test suites to " +
+      "selectively disable native scans; not intended for production use.")
     .booleanConf
     .createWithDefault(true)
 
 
@@ -30,8 +30,8 @@ import org.apache.spark.sql.types.DataType
  *   - Scalar (literal-folded) arguments arrive as length-1 vectors and must be read at index 0.
  *   - The returned vector's length must match the longest input.
  *
- * Implementations must have a public no-arg constructor and should be stateless: instances are
- * cached per executor thread for the lifetime of the JVM.
+ * Implementations must have a public no-arg constructor and must be stateless: a single instance
+ * per class is cached and shared across native worker threads for the lifetime of the JVM.
  */
 trait CometUDF {
 
 
@@ -1998,9 +1998,9 @@ index 104b4e416cd..b8af360fa14 100644
            // Note that, if record level filtering is enabled, it should be a single record.
            // If no filter is pushed down to Parquet, it should be the total length of data.
 -          assert(actual > 1 && actual < data.length)
-+          // Only enable Comet test iff it's scan only, since with native execution
++          // Skip when Comet is enabled since with native execution
 +          // `stripSparkFilter` can't remove the native filter
-+          if (!isCometEnabled || isCometScanOnly) {
++          if (!isCometEnabled) {
 +            assert(actual > 1 && actual < data.length)
 +          }
          }
@@ -2011,9 +2011,9 @@ index 104b4e416cd..b8af360fa14 100644
          // Note that, if record level filtering is enabled, it should be a single record.
          // If no filter is pushed down to Parquet, it should be the total length of data.
 -        assert(actual > 1 && actual < data.length)
-+        // Only enable Comet test iff it's scan only, since with native execution
++        // Skip when Comet is enabled since with native execution
 +        // `stripSparkFilter` can't remove the native filter
-+        if (!isCometEnabled || isCometScanOnly) {
++        if (!isCometEnabled) {
 +          assert(actual > 1 && actual < data.length)
 +        }
        }
@@ -2926,32 +2926,14 @@ index dd55fcfe42c..99bc018008a 100644
      if (testTags.exists(_.isInstanceOf[DisableAdaptiveExecution])) {
        super.test(testName, testTags: _*) {
          withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
-@@ -242,6 +272,29 @@ private[sql] trait SQLTestUtilsBase
+@@ -242,6 +272,11 @@ private[sql] trait SQLTestUtilsBase
      protected override def _sqlContext: SQLContext = self.spark.sqlContext
    }
 
 +  /**
 +   * Whether Comet extension is enabled
 +   */
 +  protected def isCometEnabled: Boolean = SparkSession.isCometEnabled
-+
-+  /**
-+   * Whether to enable ansi mode This is only effective when
-+   * [[isCometEnabled]] returns true.
-+   */
-+  protected def enableCometAnsiMode: Boolean = {
-+    val v = System.getenv("ENABLE_COMET_ANSI_MODE")
-+    v != null && v.toBoolean
-+  }
-+
-+  /**
-+   * Whether Spark should only apply Comet scan optimization. This is only effective when
-+   * [[isCometEnabled]] returns true.
-+   */
-+  protected def isCometScanOnly: Boolean = {
-+    val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
-+    v != null && v.toBoolean
-+  }
 +
    protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
      SparkSession.setActiveSession(spark)
@@ -2969,7 +2951,7 @@ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSessio
 index ed2e309fa07..a5ea58146ad 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-@@ -74,6 +74,31 @@ trait SharedSparkSessionBase
+@@ -74,6 +74,20 @@ trait SharedSparkSessionBase
        // this rule may potentially block testing of other optimization rules such as
        // ConstantPropagation etc.
        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
@@ -2980,23 +2962,12 @@ index ed2e309fa07..a5ea58146ad 100644
 +        .set("spark.comet.enabled", "true")
 +        .set("spark.comet.parquet.respectFilterPushdown", "true")
 +
-+      if (!isCometScanOnly) {
-+        conf
-+          .set("spark.comet.exec.enabled", "true")
-+          .set("spark.shuffle.manager",
-+            "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
-+          .set("spark.comet.exec.shuffle.enabled", "true")
-+          .set("spark.comet.memoryOverhead", "10g")
-+      } else {
-+        conf
-+          .set("spark.comet.exec.enabled", "false")
-+          .set("spark.comet.exec.shuffle.enabled", "false")
-+      }
-+
-+      if (enableCometAnsiMode) {
-+        conf
-+          .set("spark.sql.ansi.enabled", "true")
-+      }
++      conf
++        .set("spark.comet.exec.enabled", "true")
++        .set("spark.shuffle.manager",
++          "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
++        .set("spark.comet.exec.shuffle.enabled", "true")
++        .set("spark.comet.memoryOverhead", "10g")
 +    }
      conf.set(
        StaticSQLConf.WAREHOUSE_PATH,
@@ -3093,7 +3064,7 @@ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.sca
 index 07361cfdce9..97dab2a3506 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
 +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
-@@ -55,25 +55,54 @@ object TestHive
+@@ -55,25 +55,41 @@ object TestHive
      new SparkContext(
        System.getProperty("spark.sql.test.master", "local[1]"),
        "TestSQLContext",
@@ -3140,24 +3111,11 @@ index 07361cfdce9..97dab2a3506 100644
 +            .set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
 +            .set("spark.comet.enabled", "true")
 +
-+          val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
-+          if (v == null || !v.toBoolean) {
-+            conf
-+              .set("spark.comet.exec.enabled", "true")
-+              .set("spark.shuffle.manager",
-+                "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
-+              .set("spark.comet.exec.shuffle.enabled", "true")
-+          } else {
-+            conf
-+              .set("spark.comet.exec.enabled", "false")
-+              .set("spark.comet.exec.shuffle.enabled", "false")
-+          }
-+
-+          val a = System.getenv("ENABLE_COMET_ANSI_MODE")
-+          if (a != null && a.toBoolean) {
-+            conf
-+              .set("spark.sql.ansi.enabled", "true")
-+          }
++          conf
++            .set("spark.comet.exec.enabled", "true")
++            .set("spark.shuffle.manager",
++              "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
++            .set("spark.comet.exec.shuffle.enabled", "true")
 +        }
 
 +        conf
 
@@ -1979,9 +1979,9 @@ index 8e88049f51e..20d7ef7b1bc 100644
            // Note that, if record level filtering is enabled, it should be a single record.
            // If no filter is pushed down to Parquet, it should be the total length of data.
 -          assert(actual > 1 && actual < data.length)
-+          // Only enable Comet test iff it's scan only, since with native execution
++          // Skip when Comet is enabled since with native execution
 +          // `stripSparkFilter` can't remove the native filter
-+          if (!isCometEnabled || isCometScanOnly) {
++          if (!isCometEnabled) {
 +            assert(actual > 1 && actual < data.length)
 +          }
          }
@@ -1992,9 +1992,9 @@ index 8e88049f51e..20d7ef7b1bc 100644
          // Note that, if record level filtering is enabled, it should be a single record.
          // If no filter is pushed down to Parquet, it should be the total length of data.
 -        assert(actual > 1 && actual < data.length)
-+        // Only enable Comet test iff it's scan only, since with native execution
++        // Skip when Comet is enabled since with native execution
 +        // `stripSparkFilter` can't remove the native filter
-+        if (!isCometEnabled || isCometScanOnly) {
++        if (!isCometEnabled) {
 +          assert(actual > 1 && actual < data.length)
 +        }
        }
@@ -2878,32 +2878,14 @@ index e937173a590..7d20538bc68 100644
      if (testTags.exists(_.isInstanceOf[DisableAdaptiveExecution])) {
        super.test(testName, testTags: _*) {
          withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
-@@ -242,6 +272,29 @@ private[sql] trait SQLTestUtilsBase
+@@ -242,6 +272,11 @@ private[sql] trait SQLTestUtilsBase
      protected override def _sqlContext: SQLContext = self.spark.sqlContext
    }
 
 +  /**
 +   * Whether Comet extension is enabled
 +   */
 +  protected def isCometEnabled: Boolean = SparkSession.isCometEnabled
-+
-+  /**
-+   * Whether to enable ansi mode This is only effective when
-+   * [[isCometEnabled]] returns true.
-+   */
-+  protected def enableCometAnsiMode: Boolean = {
-+    val v = System.getenv("ENABLE_COMET_ANSI_MODE")
-+    v != null && v.toBoolean
-+  }
-+
-+  /**
-+   * Whether Spark should only apply Comet scan optimization. This is only effective when
-+   * [[isCometEnabled]] returns true.
-+   */
-+  protected def isCometScanOnly: Boolean = {
-+    val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
-+    v != null && v.toBoolean
-+  }
 +
    protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
      SparkSession.setActiveSession(spark)
@@ -2921,7 +2903,7 @@ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSessio
 index ed2e309fa07..a5ea58146ad 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-@@ -74,6 +74,31 @@ trait SharedSparkSessionBase
+@@ -74,6 +74,20 @@ trait SharedSparkSessionBase
        // this rule may potentially block testing of other optimization rules such as
        // ConstantPropagation etc.
        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
@@ -2932,23 +2914,12 @@ index ed2e309fa07..a5ea58146ad 100644
 +        .set("spark.comet.enabled", "true")
 +        .set("spark.comet.parquet.respectFilterPushdown", "true")
 +
-+      if (!isCometScanOnly) {
-+        conf
-+          .set("spark.comet.exec.enabled", "true")
-+          .set("spark.shuffle.manager",
-+            "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
-+          .set("spark.comet.exec.shuffle.enabled", "true")
-+          .set("spark.comet.memoryOverhead", "10g")
-+      } else {
-+        conf
-+          .set("spark.comet.exec.enabled", "false")
-+          .set("spark.comet.exec.shuffle.enabled", "false")
-+      }
-+
-+      if (enableCometAnsiMode) {
-+        conf
-+          .set("spark.sql.ansi.enabled", "true")
-+      }
++      conf
++        .set("spark.comet.exec.enabled", "true")
++        .set("spark.shuffle.manager",
++          "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
++        .set("spark.comet.exec.shuffle.enabled", "true")
++        .set("spark.comet.memoryOverhead", "10g")
 +    }
      conf.set(
        StaticSQLConf.WAREHOUSE_PATH,
@@ -3045,7 +3016,7 @@ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.sca
 index 1d646f40b3e..5babe505301 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
 +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
-@@ -53,25 +53,54 @@ object TestHive
+@@ -53,25 +53,41 @@ object TestHive
      new SparkContext(
        System.getProperty("spark.sql.test.master", "local[1]"),
        "TestSQLContext",
@@ -3092,24 +3063,11 @@ index 1d646f40b3e..5babe505301 100644
 +            .set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
 +            .set("spark.comet.enabled", "true")
 +
-+          val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
-+          if (v == null || !v.toBoolean) {
-+            conf
-+              .set("spark.comet.exec.enabled", "true")
-+              .set("spark.shuffle.manager",
-+                "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
-+              .set("spark.comet.exec.shuffle.enabled", "true")
-+          } else {
-+            conf
-+              .set("spark.comet.exec.enabled", "false")
-+              .set("spark.comet.exec.shuffle.enabled", "false")
-+          }
-+
-+          val a = System.getenv("ENABLE_COMET_ANSI_MODE")
-+          if (a != null && a.toBoolean) {
-+            conf
-+              .set("spark.sql.ansi.enabled", "true")
-+          }
++          conf
++            .set("spark.comet.exec.enabled", "true")
++            .set("spark.shuffle.manager",
++              "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
++            .set("spark.comet.exec.shuffle.enabled", "true")
 +        }
 
 +        conf