[mq] [skip ddci] working branch - merge 01be608 on top of master at 34333bb

gh-worker-dd-mergequeue-cf854d[bot] · web-flow · commit 16cecf116712 · 2026-04-27T11:38:13.000Z
{"baseBranch":"master","baseCommit":"34333bb5c3004ba42420fc7330b87f95dd5a659b","createdAt":"2026-04-27T11:36:38.066776Z","headSha":"01be60830445979e0a80dc077af88d45b97f039d","id":"5516d928-9a6c-4849-aa2a-2b7ad3df453f","nextMergeabilityCheckAt":"2026-04-27T12:38:01.756528Z","priority":"200","pullRequestNumber":"11199","queuedAt":"2026-04-27T11:38:06.026410Z","status":"STATUS_QUEUED"}
diff --git a/dd-java-agent/instrumentation/spark/spark-common/src/main/java/datadog/trace/instrumentation/spark/AbstractDatadogSparkListener.java b/dd-java-agent/instrumentation/spark/spark-common/src/main/java/datadog/trace/instrumentation/spark/AbstractDatadogSparkListener.java
@@ -426,7 +426,7 @@ private void addDatabricksSpecificTags(
     if (properties != null) {
       String databricksJobId = getDatabricksJobId(properties);
       String databricksJobRunId = getDatabricksJobRunId(properties, databricksClusterName);
-      String databricksTaskRunId = getDatabricksTaskRunId(properties);
+      String databricksTaskRunId = getDatabricksTaskRunId(properties, databricksJobRunId);
 
       // ids to link those spans to databricks job/task traces
       builder.withTag("databricks_job_id", databricksJobId);
@@ -1177,10 +1177,14 @@ private static String getDatabricksJobRunId(
   }
 
   @SuppressForbidden // split with one-char String use a fast-path without regex usage
-  private static String getDatabricksTaskRunId(Properties properties) {
-    // spark.databricks.job.runId is the runId of the task, not of the Job
+  private static String getDatabricksTaskRunId(Properties properties, String jobRunId) {
+    // spark.databricks.job.runId is the runId of the task, not of the Job, until Databricks 18.2
     String taskRunId = properties.getProperty("spark.databricks.job.runId");
-    if (taskRunId != null) {
+    // On Databricks 18.2+, spark.databricks.job.runId now returns the job run ID
+    // There is no easy config key to extract the task run ID, so we use the fallback extraction
+    // methods
+    // Task run ID is crucial for the spans parent-child relationship inside the trace
+    if (taskRunId != null && !taskRunId.equals(jobRunId)) {
       return taskRunId;
     }
 
diff --git a/dd-java-agent/instrumentation/spark/spark-common/src/testFixtures/groovy/datadog/trace/instrumentation/spark/AbstractSparkStructuredStreamingTest.groovy b/dd-java-agent/instrumentation/spark/spark-common/src/testFixtures/groovy/datadog/trace/instrumentation/spark/AbstractSparkStructuredStreamingTest.groovy
@@ -44,7 +44,7 @@ class AbstractSparkStructuredStreamingTest extends InstrumentationSpecification
       .config("spark.databricks.sparkContextId", "3291395623902517763")
       .config("spark.databricks.job.id", "3822225623902514353")
       .config("spark.databricks.job.parentRunId", "3851395623902519743")
-      .config("spark.databricks.job.runId", "3851395623902519743")
+      .config("spark.databricks.job.runId", "4851395623902519743")
       .getOrCreate()
   }
 
@@ -303,7 +303,7 @@ class AbstractSparkStructuredStreamingTest extends InstrumentationSpecification
           spanType "spark"
           parent()
           links({
-            link(DDTraceId.from((long)12052652441736835200), (long)-6394091631972716416)
+            link(DDTraceId.from((long)12052652441736835200), (long)1375416004467624525)
           })
         }
         span {
diff --git a/dd-java-agent/instrumentation/spark/spark-common/src/testFixtures/groovy/datadog/trace/instrumentation/spark/AbstractSparkTest.groovy b/dd-java-agent/instrumentation/spark/spark-common/src/testFixtures/groovy/datadog/trace/instrumentation/spark/AbstractSparkTest.groovy
@@ -422,6 +422,50 @@ abstract class AbstractSparkTest extends InstrumentationSpecification {
     sparkSession.stop()
   }
 
+  def "fallback to jobGroup.id when spark.databricks.job.runId equals parentRunId on Databricks 18.2+"() {
+    setup:
+    def sparkSession = SparkSession.builder()
+      .config("spark.master", "local")
+      .config("spark.default.parallelism", "2")
+      .config("spark.sql.shuffle.partitions", "2")
+      .config("spark.databricks.sparkContextId", "some_id")
+      .getOrCreate()
+
+    sparkSession.sparkContext().setLocalProperty("spark.databricks.job.id", "1234")
+    sparkSession.sparkContext().setLocalProperty("spark.databricks.job.runId", "5678") // Same as parentRunId
+    sparkSession.sparkContext().setLocalProperty("spark.jobGroup.id", "0000_job-1234-run-7890-action-0000")
+    sparkSession.sparkContext().setLocalProperty("spark.databricks.job.parentRunId", "5678")
+    TestSparkComputation.generateTestSparkComputation(sparkSession)
+
+    expect:
+    assertTraces(1) {
+      trace(3) {
+        span {
+          operationName "spark.job"
+          spanType "spark"
+          traceId 8944764253919609482G
+          parentSpanId 3503717452567411167G
+          assert span.tags["databricks_job_id"] == "1234"
+          assert span.tags["databricks_job_run_id"] == "5678"
+          assert span.tags["databricks_task_run_id"] == "7890"
+        }
+        span {
+          operationName "spark.stage"
+          spanType "spark"
+          childOf(span(0))
+        }
+        span {
+          operationName "spark.stage"
+          spanType "spark"
+          childOf(span(0))
+        }
+      }
+    }
+
+    cleanup:
+    sparkSession.stop()
+  }
+
   def "compute the databricks parent context"() {
     setup:
     def contextWithJobRunId = new DatabricksParentContext("1234", "5678", "9012")