Skip to content

Commit 94d6f82

Browse files
committed
fix: ensure call_llm spans are always ended in multi-agent scenarios
Replace `tracer.start_as_current_span('call_llm')` context manager with explicit span lifecycle management in `_call_llm_with_tracing()`. In multi-agent setups using `transfer_to_agent`, the async generator receives `GeneratorExit` after the sub-agent completes execution. At that point, the OTel context manager's `finally` block calls `context.detach(token)` which raises `ValueError` because the contextvars token became stale during the async context switch. This exception prevents `span.end()` from ever being reached, so the span is never exported to trace backends. The fix uses `tracer.start_span()` + manual `context.attach()`/ `context.detach()` with a `try/finally` that catches the `ValueError` from `detach()` and always calls `span.end()`. Fixes #4715
1 parent a61ccf3 commit 94d6f82

2 files changed

Lines changed: 23 additions & 1 deletion

File tree

src/google/adk/flows/llm_flows/base_llm_flow.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
from typing import TYPE_CHECKING
2626

2727
from google.genai import types
28+
from opentelemetry import context as otel_context
29+
from opentelemetry import trace
2830
from websockets.exceptions import ConnectionClosed
2931
from websockets.exceptions import ConnectionClosedOK
3032

@@ -44,6 +46,7 @@
4446
from ...models.base_llm_connection import BaseLlmConnection
4547
from ...models.llm_request import LlmRequest
4648
from ...models.llm_response import LlmResponse
49+
4750
from ...telemetry import tracing
4851
from ...telemetry.tracing import trace_call_llm
4952
from ...telemetry.tracing import trace_send_data
@@ -1127,7 +1130,17 @@ async def _call_llm_async(
11271130
llm = self.__get_llm(invocation_context)
11281131

11291132
async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
1130-
with tracer.start_as_current_span('call_llm') as span:
1133+
# Use explicit span management instead of start_as_current_span context
1134+
# manager to ensure span.end() is always called. In multi-agent scenarios
1135+
# with transfer_to_agent, the async generator may receive GeneratorExit
1136+
# after an async context switch (sub-agent execution). This causes
1137+
# context.detach() to raise ValueError (stale contextvars token), which
1138+
# prevents span.end() from being reached when using the context manager.
1139+
# See: https://github.com/google/adk-python/issues/4715
1140+
span = tracer.start_span('call_llm')
1141+
ctx = trace.set_span_in_context(span)
1142+
token = otel_context.attach(ctx)
1143+
try:
11311144
if invocation_context.run_config.support_cfc:
11321145
invocation_context.live_request_queue = LiveRequestQueue()
11331146
responses_generator = self.run_live(invocation_context)
@@ -1187,6 +1200,12 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
11871200
llm_response = altered_llm_response
11881201

11891202
yield llm_response
1203+
finally:
1204+
try:
1205+
otel_context.detach(token)
1206+
except ValueError:
1207+
pass
1208+
span.end()
11901209

11911210
async with Aclosing(_call_llm_with_tracing()) as agen:
11921211
async for event in agen:

tests/unittests/telemetry/test_functional.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ def do_replace(tracer):
7575
monkeypatch.setattr(
7676
tracer, 'start_as_current_span', real_tracer.start_as_current_span
7777
)
78+
monkeypatch.setattr(
79+
tracer, 'start_span', real_tracer.start_span
80+
)
7881

7982
do_replace(tracing.tracer)
8083
do_replace(base_agent.tracer)

0 commit comments

Comments
 (0)