Skip to content

Commit d481370

Browse files
committed
fix: ensure call_llm spans are always ended in multi-agent scenarios
Replace `tracer.start_as_current_span('call_llm')` context manager with explicit span lifecycle management in `_call_llm_with_tracing()`. In multi-agent setups using `transfer_to_agent`, the async generator receives `GeneratorExit` after the sub-agent completes execution. At that point, the OTel context manager's `finally` block calls `context.detach(token)` which raises `ValueError` because the contextvars token became stale during the async context switch. This exception prevents `span.end()` from ever being reached, so the span is never exported to trace backends. The fix uses `tracer.start_span()` + manual `context.attach()`/ `context.detach()` with a `try/finally` that catches the `ValueError` from `detach()` and always calls `span.end()`. Fixes #4715
1 parent 662354a commit d481370

2 files changed

Lines changed: 22 additions & 1 deletion

File tree

src/google/adk/flows/llm_flows/base_llm_flow.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from google.adk.platform import time as platform_time
2626
from google.genai import types
27+
from opentelemetry import context as otel_context
2728
from opentelemetry import trace
2829
from websockets.exceptions import ConnectionClosed
2930
from websockets.exceptions import ConnectionClosedOK
@@ -41,6 +42,7 @@
4142
from ...models.base_llm_connection import BaseLlmConnection
4243
from ...models.llm_request import LlmRequest
4344
from ...models.llm_response import LlmResponse
45+
4446
from ...telemetry import tracing
4547
from ...telemetry.tracing import trace_call_llm
4648
from ...telemetry.tracing import trace_send_data
@@ -1169,7 +1171,17 @@ async def _call_llm_async(
11691171
) -> AsyncGenerator[LlmResponse, None]:
11701172

11711173
async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
1172-
with tracer.start_as_current_span('call_llm') as span:
1174+
# Use explicit span management instead of start_as_current_span context
1175+
# manager to ensure span.end() is always called. In multi-agent scenarios
1176+
# with transfer_to_agent, the async generator may receive GeneratorExit
1177+
# after an async context switch (sub-agent execution). This causes
1178+
# context.detach() to raise ValueError (stale contextvars token), which
1179+
# prevents span.end() from being reached when using the context manager.
1180+
# See: https://github.com/google/adk-python/issues/4715
1181+
span = tracer.start_span('call_llm')
1182+
ctx = trace.set_span_in_context(span)
1183+
token = otel_context.attach(ctx)
1184+
try:
11731185
# Runs before_model_callback inside the call_llm span so
11741186
# plugins observe the same span as after/error callbacks.
11751187
if response := await self._handle_before_model_callback(
@@ -1262,6 +1274,12 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
12621274
llm_response = altered
12631275

12641276
yield llm_response
1277+
finally:
1278+
try:
1279+
otel_context.detach(token)
1280+
except ValueError:
1281+
pass
1282+
span.end()
12651283

12661284
async with Aclosing(_call_llm_with_tracing()) as agen:
12671285
async for event in agen:

tests/unittests/telemetry/test_functional.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ def do_replace(tracer):
8282
monkeypatch.setattr(
8383
tracer, "start_as_current_span", real_tracer.start_as_current_span
8484
)
85+
monkeypatch.setattr(
86+
tracer, 'start_span', real_tracer.start_span
87+
)
8588

8689
do_replace(tracing.tracer)
8790

0 commit comments

Comments
 (0)