revert unintended diff from main

mbutrovich · mbutrovich · commit 1111c6fa936f · 2026-05-15T09:26:10.000-04:00
diff --git a/common/src/main/java/org/apache/comet/udf/CometUdfBridge.java b/common/src/main/java/org/apache/comet/udf/CometUdfBridge.java
@@ -80,19 +80,15 @@ public class CometUdfBridge {
    * @param inputSchemaPtrs addresses of pre-allocated FFI_ArrowSchema structs (one per input)
    * @param outArrayPtr address of pre-allocated FFI_ArrowArray for the result
    * @param outSchemaPtr address of pre-allocated FFI_ArrowSchema for the result
-   * @param numRows number of rows in the current batch. Mirrors DataFusion's {@code
-   *     ScalarFunctionArgs.number_rows} and gives UDFs an explicit batch-size signal for cases
-   *     where no input arg is a batch-length array (e.g. a zero-arg non-deterministic ScalaUDF).
-   *     UDFs that already read size from their input vectors can ignore it.
-   * @param taskContext Spark {@link TaskContext} captured on the driving Spark task thread and
-   *     passed through from native. May be {@code null} when the bridge is invoked outside a Spark
-   *     task (unit tests, direct native driver runs). When non-null and the current thread has no
-   *     {@code TaskContext} of its own, the bridge installs it as the thread-local for the duration
-   *     of the UDF call so the UDF body (including partition-sensitive built-ins like {@code Rand}
-   *     / {@code Uuid} / {@code MonotonicallyIncreasingID} that read the partition index via {@code
-   *     TaskContext.get().partitionId()}) sees the real context rather than null. The thread-local
-   *     is cleared in a {@code finally} so Tokio workers don't leak a stale TaskContext across
-   *     invocations. The task attempt ID drawn from this context also keys the UDF-instance cache,
+   * @param numRows row count of the current batch. Mirrors DataFusion's {@code
+   *     ScalarFunctionArgs.number_rows}; the only batch-size signal a zero-input UDF (e.g. a
+   *     zero-arg non-deterministic ScalaUDF) ever sees.
+   * @param taskContext propagated Spark {@link TaskContext} from the driving Spark task thread, or
+   *     {@code null} outside a Spark task. Treated as ground truth for the call: installed as the
+   *     thread-local on entry, with the prior value (if any) saved and restored in {@code finally}.
+   *     Lets partition-sensitive built-ins ({@code Rand}, {@code Uuid}, {@code
+   *     MonotonicallyIncreasingID}) work from Tokio workers and avoids reusing a stale TaskContext
+   *     left on a worker by a previous task. Its task attempt ID also keys the UDF-instance cache,
    *     so a UDF holding per-task state in fields sees a consistent instance for every call within
    *     the task regardless of which Tokio worker is polling.
    */
@@ -113,10 +109,12 @@ public static void evaluate(
     assert outArrayPtr != 0L : "outArrayPtr must be a valid FFI pointer";
     assert outSchemaPtr != 0L : "outSchemaPtr must be a valid FFI pointer";
 
-    boolean installedTaskContext = false;
-    if (taskContext != null && TaskContext.get() == null) {
+    // Save-and-restore rather than only-install-if-null: the propagated `taskContext` is the
+    // ground truth for this call. Any value already on the thread is either (a) the same object
+    // on a Spark task thread, or (b) stale from a prior task on a reused Tokio worker.
+    TaskContext prior = TaskContext.get();
+    if (taskContext != null) {
       CometTaskContextShim.set(taskContext);
-      installedTaskContext = true;
       assert TaskContext.get() == taskContext
           : "TaskContext install did not take effect on this thread";
     }
@@ -130,8 +128,12 @@ public static void evaluate(
           numRows,
           taskContext);
     } finally {
-      if (installedTaskContext) {
-        CometTaskContextShim.unset();
+      if (taskContext != null) {
+        if (prior != null) {
+          CometTaskContextShim.set(prior);
+        } else {
+          CometTaskContextShim.unset();
+        }
       }
     }
   }