fix: ensure call_llm spans are always ended in multi-agent scenarios

weiguangli-io · weiguangli-io · commit d481370b6a29 · 2026-04-29T11:47:49.000+08:00
Replace `tracer.start_as_current_span('call_llm')` context manager with explicit span lifecycle management in `_call_llm_with_tracing()`. In multi-agent setups using `transfer_to_agent`, the async generator receives `GeneratorExit` after the sub-agent completes execution. At that point, the OTel context manager's `finally` block calls `context.detach(token)` which raises `ValueError` because the contextvars token became stale during the async context switch. This exception prevents `span.end()` from ever being reached, so the span is never exported to trace backends. The fix uses `tracer.start_span()` + manual `context.attach()`/ `context.detach()` with a `try/finally` that catches the `ValueError` from `detach()` and always calls `span.end()`. Fixes #4715
diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -24,6 +24,7 @@
 
 from google.adk.platform import time as platform_time
 from google.genai import types
+from opentelemetry import context as otel_context
 from opentelemetry import trace
 from websockets.exceptions import ConnectionClosed
 from websockets.exceptions import ConnectionClosedOK
@@ -41,6 +42,7 @@
 from ...models.base_llm_connection import BaseLlmConnection
 from ...models.llm_request import LlmRequest
 from ...models.llm_response import LlmResponse
+
 from ...telemetry import tracing
 from ...telemetry.tracing import trace_call_llm
 from ...telemetry.tracing import trace_send_data
@@ -1169,7 +1171,17 @@ async def _call_llm_async(
   ) -> AsyncGenerator[LlmResponse, None]:
 
     async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
-      with tracer.start_as_current_span('call_llm') as span:
+      # Use explicit span management instead of start_as_current_span context
+      # manager to ensure span.end() is always called. In multi-agent scenarios
+      # with transfer_to_agent, the async generator may receive GeneratorExit
+      # after an async context switch (sub-agent execution). This causes
+      # context.detach() to raise ValueError (stale contextvars token), which
+      # prevents span.end() from being reached when using the context manager.
+      # See: https://github.com/google/adk-python/issues/4715
+      span = tracer.start_span('call_llm')
+      ctx = trace.set_span_in_context(span)
+      token = otel_context.attach(ctx)
+      try:
         # Runs before_model_callback inside the call_llm span so
         # plugins observe the same span as after/error callbacks.
         if response := await self._handle_before_model_callback(
@@ -1262,6 +1274,12 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
                   llm_response = altered
 
               yield llm_response
+      finally:
+        try:
+          otel_context.detach(token)
+        except ValueError:
+          pass
+        span.end()
 
     async with Aclosing(_call_llm_with_tracing()) as agen:
       async for event in agen:
diff --git a/tests/unittests/telemetry/test_functional.py b/tests/unittests/telemetry/test_functional.py
@@ -82,6 +82,9 @@ def do_replace(tracer):
     monkeypatch.setattr(
         tracer, "start_as_current_span", real_tracer.start_as_current_span
     )
+    monkeypatch.setattr(
+        tracer, 'start_span', real_tracer.start_span
+    )
 
   do_replace(tracing.tracer)
 

Original file line number	Diff line number	Diff line change
`@@ -82,6 +82,9 @@ def do_replace(tracer):`
`82`	`82`	`monkeypatch.setattr(`
`83`	`83`	`tracer, "start_as_current_span", real_tracer.start_as_current_span`
`84`	`84`	`)`
	`85`	`+ monkeypatch.setattr(`
	`86`	`+ tracer, 'start_span', real_tracer.start_span`
	`87`	`+ )`
`85`	`88`
`86`	`89`	`do_replace(tracing.tracer)`
`87`	`90`