google
diff --git a/‎src/google/adk/flows/llm_flows/base_llm_flow.py‎
Lines changed: 66 additions & 37 deletions b/‎src/google/adk/flows/llm_flows/base_llm_flow.py‎
Lines changed: 66 additions & 37 deletions
@@ -24,6 +24,7 @@
 
 from google.adk.platform import time as platform_time
 from google.genai import types
+from opentelemetry import trace
 from websockets.exceptions import ConnectionClosed
 from websockets.exceptions import ConnectionClosedOK
 
@@ -306,6 +307,7 @@ async def _run_and_handle_error(
     invocation_context: InvocationContext,
     llm_request: LlmRequest,
     model_response_event: Event,
+    call_llm_span: Optional[trace.Span] = None,
 ) -> AsyncGenerator[LlmResponse, None]:
   """Wraps an LLM response generator with error callback handling.
 
@@ -319,6 +321,9 @@ async def _run_and_handle_error(
     invocation_context: The invocation context.
     llm_request: The LLM request.
     model_response_event: The model response event.
+    call_llm_span: The call_llm span to rebind error callbacks to.
+      When provided, on_model_error callbacks run under this span so
+      plugins observe the same span as before/after model callbacks.
 
   Yields:
     LlmResponse objects from the generator.
@@ -380,11 +385,19 @@ async def _run_on_model_error_callbacks(
     callback_context = CallbackContext(
         invocation_context, event_actions=model_response_event.actions
     )
-    error_response = await _run_on_model_error_callbacks(
-        callback_context=callback_context,
-        llm_request=llm_request,
-        error=model_error,
-    )
+    if call_llm_span is not None:
+      with trace.use_span(call_llm_span, end_on_exit=False):
+        error_response = await _run_on_model_error_callbacks(
+            callback_context=callback_context,
+            llm_request=llm_request,
+            error=model_error,
+        )
+    else:
+      error_response = await _run_on_model_error_callbacks(
+          callback_context=callback_context,
+          llm_request=llm_request,
+          error=model_error,
+      )
     if error_response is not None:
       yield error_response
     else:
@@ -1102,28 +1115,30 @@ async def _call_llm_async(
       llm_request: LlmRequest,
       model_response_event: Event,
   ) -> AsyncGenerator[LlmResponse, None]:
-    # Runs before_model_callback if it exists.
-    if response := await self._handle_before_model_callback(
-        invocation_context, llm_request, model_response_event
-    ):
-      yield response
-      return
-
-    llm_request.config = llm_request.config or types.GenerateContentConfig()
-    llm_request.config.labels = llm_request.config.labels or {}
-
-    # Add agent name as a label to the llm_request. This will help with slicing
-    # the billing reports on a per-agent basis.
-    if _ADK_AGENT_NAME_LABEL_KEY not in llm_request.config.labels:
-      llm_request.config.labels[_ADK_AGENT_NAME_LABEL_KEY] = (
-          invocation_context.agent.name
-      )
-
-    # Calls the LLM.
-    llm = self.__get_llm(invocation_context)
 
     async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
       with tracer.start_as_current_span('call_llm') as span:
+        # Runs before_model_callback inside the call_llm span so
+        # plugins observe the same span as after/error callbacks.
+        if response := await self._handle_before_model_callback(
+            invocation_context, llm_request, model_response_event
+        ):
+          yield response
+          return
+
+        llm_request.config = llm_request.config or types.GenerateContentConfig()
+        llm_request.config.labels = llm_request.config.labels or {}
+
+        # Add agent name as a label to the llm_request. This will help
+        # with slicing billing reports on a per-agent basis.
+        if _ADK_AGENT_NAME_LABEL_KEY not in llm_request.config.labels:
+          llm_request.config.labels[_ADK_AGENT_NAME_LABEL_KEY] = (
+              invocation_context.agent.name
+          )
+
+        # Calls the LLM.
+        llm = self.__get_llm(invocation_context)
+
         if invocation_context.run_config.support_cfc:
           invocation_context.live_request_queue = LiveRequestQueue()
           responses_generator = self.run_live(invocation_context)
@@ -1133,14 +1148,20 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
                   invocation_context,
                   llm_request,
                   model_response_event,
+                  call_llm_span=span,
               )
           ) as agen:
             async for llm_response in agen:
-              # Runs after_model_callback if it exists.
-              if altered_llm_response := await self._handle_after_model_callback(
-                  invocation_context, llm_response, model_response_event
-              ):
-                llm_response = altered_llm_response
+              # Rebind to call_llm span for after_model_callback.
+              with trace.use_span(span, end_on_exit=False):
+                if altered := (
+                    await self._handle_after_model_callback(
+                        invocation_context,
+                        llm_response,
+                        model_response_event,
+                    )
+                ):
+                  llm_response = altered
               # only yield partial response in SSE streaming mode
               if (
                   invocation_context.run_config.streaming_mode
@@ -1151,9 +1172,9 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
               if llm_response.turn_complete:
                 invocation_context.live_request_queue.close()
         else:
-          # Check if we can make this llm call or not. If the current call
-          # pushes the counter beyond the max set value, then the execution is
-          # stopped right here, and exception is thrown.
+          # Check if we can make this llm call or not. If the current
+          # call pushes the counter beyond the max set value, then the
+          # execution is stopped right here, and exception is thrown.
           invocation_context.increment_llm_call_count()
           responses_generator = llm.generate_content_async(
               llm_request,
@@ -1166,6 +1187,7 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
                   invocation_context,
                   llm_request,
                   model_response_event,
+                  call_llm_span=span,
               )
           ) as agen:
             async for llm_response in agen:
@@ -1176,11 +1198,16 @@ async def _call_llm_with_tracing() -> AsyncGenerator[LlmResponse, None]:
                   llm_response,
                   span,
               )
-              # Runs after_model_callback if it exists.
-              if altered_llm_response := await self._handle_after_model_callback(
-                  invocation_context, llm_response, model_response_event
-              ):
-                llm_response = altered_llm_response
+              # Rebind to call_llm span for after_model_callback.
+              with trace.use_span(span, end_on_exit=False):
+                if altered := (
+                    await self._handle_after_model_callback(
+                        invocation_context,
+                        llm_response,
+                        model_response_event,
+                    )
+                ):
+                  llm_response = altered
 
               yield llm_response
 
@@ -1235,13 +1262,15 @@ async def _run_and_handle_error(
       invocation_context: InvocationContext,
       llm_request: LlmRequest,
       model_response_event: Event,
+      call_llm_span: Optional[trace.Span] = None,
   ) -> AsyncGenerator[LlmResponse, None]:
     async with Aclosing(
         _run_and_handle_error(
             response_generator,
             invocation_context,
             llm_request,
             model_response_event,
+            call_llm_span=call_llm_span,
         )
     ) as agen:
       async for response in agen: