fix after merging in upstream/main.

mbutrovich · mbutrovich · commit 9f8aa07d62db · 2026-05-14T13:23:59.000-04:00
diff --git a/common/src/main/java/org/apache/comet/udf/CometUdfBridge.java b/common/src/main/java/org/apache/comet/udf/CometUdfBridge.java
@@ -19,7 +19,8 @@
 
 package org.apache.comet.udf;
 
-import java.util.concurrent.ConcurrentHashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
 
 import org.apache.arrow.c.ArrowArray;
 import org.apache.arrow.c.ArrowSchema;
@@ -37,10 +38,23 @@
  */
 public class CometUdfBridge {
 
-  // Process-wide cache of UDF instances keyed by class name. CometUDF
-  // implementations are required to be stateless (see CometUDF), so a
-  // single shared instance per class is safe across native worker threads.
-  private static final ConcurrentHashMap<String, CometUDF> INSTANCES = new ConcurrentHashMap<>();
+  // Per-thread, bounded LRU of UDF instances keyed by class name. Comet
+  // native execution threads (Tokio/DataFusion worker pool) are reused
+  // across tasks within an executor, so the effective lifetime of cached
+  // entries is the worker thread (i.e. the executor JVM). Fine for
+  // stateless UDFs; future stateful UDFs would need explicit per-task
+  // isolation.
+  private static final int CACHE_CAPACITY = 64;
+
+  private static final ThreadLocal<LinkedHashMap<String, CometUDF>> INSTANCES =
+      ThreadLocal.withInitial(
+          () ->
+              new LinkedHashMap<String, CometUDF>(CACHE_CAPACITY, 0.75f, true) {
+                @Override
+                protected boolean removeEldestEntry(Map.Entry<String, CometUDF> eldest) {
+                  return size() > CACHE_CAPACITY;
+                }
+              });
 
   /**
    * Called from native via JNI.
@@ -50,15 +64,19 @@ public class CometUdfBridge {
    * @param inputSchemaPtrs addresses of pre-allocated FFI_ArrowSchema structs (one per input)
    * @param outArrayPtr address of pre-allocated FFI_ArrowArray for the result
    * @param outSchemaPtr address of pre-allocated FFI_ArrowSchema for the result
-   * @param numRows row count of the current batch. Mirrors DataFusion's {@code
-   *     ScalarFunctionArgs.number_rows}; the only batch-size signal a zero-input UDF (e.g. a
-   *     zero-arg non-deterministic ScalaUDF) ever sees.
-   * @param taskContext propagated Spark {@link TaskContext} from the driving Spark task thread, or
-   *     {@code null} outside a Spark task. Treated as ground truth for the call: installed as the
-   *     thread-local on entry, with the prior value (if any) saved and restored in {@code finally}.
-   *     Lets partition-sensitive built-ins ({@code Rand}, {@code Uuid}, {@code
-   *     MonotonicallyIncreasingID}) work from Tokio workers and avoids reusing a stale TaskContext
-   *     left on a worker by a previous task.
+   * @param numRows number of rows in the current batch. Mirrors DataFusion's {@code
+   *     ScalarFunctionArgs.number_rows} and gives UDFs an explicit batch-size signal for cases
+   *     where no input arg is a batch-length array (e.g. a zero-arg non-deterministic ScalaUDF).
+   *     UDFs that already read size from their input vectors can ignore it.
+   * @param taskContext Spark {@link TaskContext} captured on the driving Spark task thread and
+   *     passed through from native. May be {@code null} when the bridge is invoked outside a Spark
+   *     task (unit tests, direct native driver runs). When non-null and the current thread has no
+   *     {@code TaskContext} of its own, the bridge installs it as the thread-local for the duration
+   *     of the UDF call so the UDF body (including partition-sensitive built-ins like {@code Rand}
+   *     / {@code Uuid} / {@code MonotonicallyIncreasingID} that read the partition index via {@code
+   *     TaskContext.get().partitionId()}) sees the real context rather than null. The thread-local
+   *     is cleared in a {@code finally} so Tokio workers don't leak a stale TaskContext across
+   *     invocations.
    */
   public static void evaluate(
       String udfClassName,
@@ -68,23 +86,17 @@ public static void evaluate(
       long outSchemaPtr,
       int numRows,
       TaskContext taskContext) {
-    // Save-and-restore rather than only-install-if-null: the propagated context is the ground
-    // truth for this call. Any value already on the thread is either (a) the same object on a
-    // Spark task thread, or (b) stale from a prior task on a reused Tokio worker.
-    TaskContext prior = TaskContext.get();
-    if (taskContext != null) {
+    boolean installedTaskContext = false;
+    if (taskContext != null && TaskContext.get() == null) {
       CometTaskContextShim.set(taskContext);
+      installedTaskContext = true;
     }
     try {
       evaluateInternal(
           udfClassName, inputArrayPtrs, inputSchemaPtrs, outArrayPtr, outSchemaPtr, numRows);
     } finally {
-      if (taskContext != null) {
-        if (prior != null) {
-          CometTaskContextShim.set(prior);
-        } else {
-          CometTaskContextShim.unset();
-        }
+      if (installedTaskContext) {
+        CometTaskContextShim.unset();
       }
     }
   }
@@ -96,23 +108,23 @@ private static void evaluateInternal(
       long outArrayPtr,
       long outSchemaPtr,
       int numRows) {
-    CometUDF udf =
-        INSTANCES.computeIfAbsent(
-            udfClassName,
-            name -> {
-              try {
-                // Resolve via the executor's context classloader so user-supplied UDF jars
-                // (added via spark.jars / --jars) are visible.
-                ClassLoader cl = Thread.currentThread().getContextClassLoader();
-                if (cl == null) {
-                  cl = CometUdfBridge.class.getClassLoader();
-                }
-                return (CometUDF)
-                    Class.forName(name, true, cl).getDeclaredConstructor().newInstance();
-              } catch (ReflectiveOperationException e) {
-                throw new RuntimeException("Failed to instantiate CometUDF: " + name, e);
-              }
-            });
+    LinkedHashMap<String, CometUDF> cache = INSTANCES.get();
+    CometUDF udf = cache.get(udfClassName);
+    if (udf == null) {
+      try {
+        // Resolve via the executor's context classloader so user-supplied UDF jars
+        // (added via spark.jars / --jars) are visible.
+        ClassLoader cl = Thread.currentThread().getContextClassLoader();
+        if (cl == null) {
+          cl = CometUdfBridge.class.getClassLoader();
+        }
+        udf =
+            (CometUDF) Class.forName(udfClassName, true, cl).getDeclaredConstructor().newInstance();
+      } catch (ReflectiveOperationException e) {
+        throw new RuntimeException("Failed to instantiate CometUDF: " + udfClassName, e);
+      }
+      cache.put(udfClassName, udf);
+    }
 
     BufferAllocator allocator = org.apache.comet.package$.MODULE$.CometArrowAllocator();
 
diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs
@@ -462,8 +462,8 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan(
             };
 
             // Capture the driving Spark task's TaskContext as a JNI global reference when
-            // non-null. The `Arc<Global<JObject>>` releases its global ref on drop, so cleanup
-            // is automatic when the ExecutionContext drops.
+            // non-null. The `Arc<Global<JObject>>` releases its global ref on drop, so
+            // cleanup is automatic when the ExecutionContext drops.
             let task_context = if !task_context_obj.is_null() {
                 Some(Arc::new(jni_new_global_ref!(env, task_context_obj)?))
             } else {
diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs
@@ -183,8 +183,11 @@ pub struct PhysicalPlanner {
     partition: i32,
     session_ctx: Arc<SessionContext>,
     query_context_registry: Arc<datafusion_comet_spark_expr::QueryContextMap>,
-    /// Captured at `createPlan` time on `ExecutionContext`; see that struct for the
-    /// propagation rationale. `None` when no driving Spark task is available.
+    /// Spark `TaskContext` captured on the driving Spark task thread and stashed on the
+    /// [`ExecutionContext`] at `createPlan` time. Threaded into every [`JvmScalarUdfExpr`] the
+    /// planner builds so the JNI bridge can install it as the thread-local `TaskContext` on
+    /// the Tokio worker that drives the UDF. `None` when no driving Spark task is available
+    /// (unit tests, direct native driver runs).
     task_context: Option<Arc<Global<JObject<'static>>>>,
 }
 
@@ -205,20 +208,27 @@ impl PhysicalPlanner {
         }
     }
 
-    pub fn with_exec_id(mut self, exec_context_id: i64) -> Self {
-        self.exec_context_id = exec_context_id;
-        self
+    pub fn with_exec_id(self, exec_context_id: i64) -> Self {
+        Self {
+            exec_context_id,
+            partition: self.partition,
+            session_ctx: Arc::clone(&self.session_ctx),
+            query_context_registry: Arc::clone(&self.query_context_registry),
+            task_context: self.task_context,
+        }
     }
 
-    /// Attach the Spark `TaskContext` global reference captured at `createPlan` time. Cloned
-    /// into every `JvmScalarUdfExpr` the planner builds so the JNI bridge can install it as
-    /// the thread-local on the Tokio worker driving the UDF.
-    pub fn with_task_context(
-        mut self,
-        task_context: Option<Arc<Global<JObject<'static>>>>,
-    ) -> Self {
-        self.task_context = task_context;
-        self
+    /// Attach a propagated Spark `TaskContext` global reference. Called by the JNI `executePlan`
+    /// entry with whatever was captured at `createPlan` time. The planner clones this `Option`
+    /// into every `JvmScalarUdfExpr` it builds.
+    pub fn with_task_context(self, task_context: Option<Arc<Global<JObject<'static>>>>) -> Self {
+        Self {
+            exec_context_id: self.exec_context_id,
+            partition: self.partition,
+            session_ctx: self.session_ctx,
+            query_context_registry: self.query_context_registry,
+            task_context,
+        }
     }
 
     /// Return session context of this planner.
diff --git a/native/spark-expr/src/jvm_udf/mod.rs b/native/spark-expr/src/jvm_udf/mod.rs
@@ -41,13 +41,16 @@ pub struct JvmScalarUdfExpr {
     args: Vec<Arc<dyn PhysicalExpr>>,
     return_type: DataType,
     return_nullable: bool,
-    /// Captured at `createPlan` time and threaded here by the planner. Passed through the
-    /// JNI bridge so `CometUdfBridge.evaluate` can install it as the Tokio worker's
-    /// thread-local `TaskContext`. Without this, partition-sensitive built-ins inside a UDF
-    /// tree (`Rand`, `Uuid`, `MonotonicallyIncreasingID`, user code reading
-    /// `TaskContext.get()`) see `null` and seed / branch incorrectly. `None` when no driving
-    /// Spark task is available; the bridge then leaves whatever `TaskContext.get()` already
-    /// returns in place.
+    /// Spark `TaskContext` captured on the driving Spark task thread, stashed in the
+    /// [`ExecutionContext`] at `createPlan` time, and threaded here by the planner. Passed
+    /// through the JNI bridge so [`CometUdfBridge.evaluate`] can install it as the
+    /// thread-local `TaskContext` on the Tokio worker that drives the UDF call. Without this,
+    /// partition-sensitive built-ins inside a user UDF tree (`Rand`, `Uuid`,
+    /// `MonotonicallyIncreasingID`, custom UDF code that reads
+    /// `TaskContext.get().partitionId()`) see a null `TaskContext` and seed / branch
+    /// incorrectly. `None` means the surrounding driver had no `TaskContext` to propagate
+    /// (unit tests, direct native driver runs); the bridge then leaves whatever
+    /// `TaskContext.get()` already returns in place.
     task_context: Option<Arc<Global<JObject<'static>>>>,
 }
 
@@ -120,10 +123,10 @@ impl PhysicalExpr for JvmScalarUdfExpr {
     }
 
     fn evaluate(&self, batch: &RecordBatch) -> DFResult<ColumnarValue> {
-        // Step 1: evaluate child expressions to get Arrow arrays. Scalar children
-        // (e.g. literal patterns) are sent as length-1 vectors rather than expanded
-        // to batch-row count, so the JVM bridge does not pay an O(rows) copy for
-        // values that never vary across the batch.
+        // Scalar children (e.g. literal patterns) are sent as length-1 vectors rather than
+        // expanded to batch-row count, so the JVM bridge does not pay an O(rows) copy for
+        // values that never vary across the batch. The JVM side gets `numRows` directly via
+        // the bridge so it doesn't need the scalar to carry batch length.
         let arrays: Vec<ArrayRef> = self
             .args
             .iter()
@@ -133,7 +136,6 @@ impl PhysicalExpr for JvmScalarUdfExpr {
             })
             .collect::<DFResult<_>>()?;
 
-        // Step 2: allocate FFI structs on the Rust heap and collect their raw pointers.
         // The JVM writes into the out_array/out_schema slots and reads from the in_ slots.
         let in_ffi_arrays: Vec<Box<FFI_ArrowArray>> = arrays
             .iter()
@@ -157,7 +159,6 @@ impl PhysicalExpr for JvmScalarUdfExpr {
             .map(|b| b.as_ref() as *const FFI_ArrowSchema as i64)
             .collect();
 
-        // Allocate output FFI slots.
         let mut out_array = Box::new(FFI_ArrowArray::empty());
         let mut out_schema = Box::new(FFI_ArrowSchema::empty());
         let out_arr_ptr = out_array.as_mut() as *mut FFI_ArrowArray as i64;
@@ -166,22 +167,20 @@ impl PhysicalExpr for JvmScalarUdfExpr {
         let class_name = self.class_name.clone();
         let n_args = arrays.len();
 
-        // Step 3: attach a JNI env for this thread and call the static bridge method.
         JVMClasses::with_env(|env| {
             let bridge = JVMClasses::get().comet_udf_bridge.as_ref().ok_or_else(|| {
                 CometError::from(ExecutionError::GeneralError(
                     "JVM UDF bridge unavailable: org.apache.comet.udf.CometUdfBridge \
-                     class was not found on the JVM classpath."
+                     class was not found on the JVM classpath. Set \
+                     spark.comet.exec.regexp.engine=rust to disable this path."
                         .to_string(),
                 ))
             })?;
 
-            // Build the JVM String for the class name.
             let jclass_name = env
                 .new_string(&class_name)
                 .map_err(|e| CometError::JNI { source: e })?;
 
-            // Build the long[] arrays for input pointers.
             let in_arr_java = env
                 .new_long_array(n_args)
                 .map_err(|e| CometError::JNI { source: e })?;
@@ -196,9 +195,10 @@ impl PhysicalExpr for JvmScalarUdfExpr {
                 .set_region(env, 0, &in_sch_ptrs)
                 .map_err(|e| CometError::JNI { source: e })?;
 
-            // Pass a null jobject when no TaskContext was propagated so the bridge's null-guard
-            // leaves the worker thread's current TaskContext.get() in place. The borrow must
-            // outlive `call_static_method_unchecked`.
+            // Resolve the TaskContext reference once before building the arg array so the
+            // borrow lives until `call_static_method_unchecked` returns. When no TaskContext
+            // was propagated, pass a null object so the bridge's null-guard leaves the thread-
+            // local alone.
             let null_task_context = JObject::null();
             let task_context_ref: &JObject = match &self.task_context {
                 Some(gref) => gref.as_obj(),
@@ -229,15 +229,26 @@ impl PhysicalExpr for JvmScalarUdfExpr {
             Ok(())
         })?;
 
-        // Step 4: import the result from the FFI slots filled by the JVM.
         // SAFETY: `*out_array` moves the FFI_ArrowArray out of the Box (the heap
         // allocation is freed by the move), and `from_ffi` wraps it in an Arc that
         // keeps the JVM-installed release callback alive until the resulting
         // ArrayData drops. `out_schema` is borrowed; its release callback runs
         // exactly once when the Box drops at end of scope.
         let result_data = unsafe { from_ffi(*out_array, &out_schema) }
             .map_err(|e| CometError::Arrow { source: e })?;
-        Ok(ColumnarValue::Array(make_array(result_data)))
+        let result_array = make_array(result_data);
+
+        // The JVM may produce arrays with different field names (e.g. Arrow Java's
+        // ListVector uses "$data$" for child fields) than what DataFusion expects
+        // (e.g. "item"). Cast to the declared return_type to normalize schema.
+        let result_array = if result_array.data_type() != &self.return_type {
+            arrow::compute::cast(&result_array, &self.return_type)
+                .map_err(|e| CometError::Arrow { source: e })?
+        } else {
+            result_array
+        };
+
+        Ok(ColumnarValue::Array(result_array))
     }
 
     fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
diff --git a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
@@ -128,8 +128,10 @@ class CometExecIterator(
       taskAttemptId,
       taskCPUs,
       keyUnwrapper,
-      // Propagated to Tokio workers running JVM UDFs so they see this Spark task's
-      // TaskContext. See CometUdfBridge.evaluate.
+      // Capture the Spark task thread's TaskContext at `createPlan` time. Stashed native-side
+      // in the ExecutionContext and passed through the JVM UDF bridge so that Tokio workers
+      // running JVM UDFs see the real `TaskContext` via their thread-local. See
+      // `CometUdfBridge.evaluate` and `CometTaskContextShim` for the receive side.
       TaskContext.get())
   }