Skip to content

Commit c76fb92

Browse files
caohy1988claude
andcommitted
feat(plugins): add on_agent_error_callback and on_run_error_callback
Implements RFC #5044 to close the error callback coverage gap at the agent and runner levels. Previously, unhandled exceptions escaping agent execution or runner execution left dangling *_STARTING events with no terminal error event. Changes: - Add on_agent_error_callback and on_run_error_callback to BasePlugin (notification-only, exception always re-raised) - Wire try/except Exception in BaseAgent.run_async() and run_live() - Wire try/except Exception in Runner._exec_with_plugin() - Add AGENT_ERROR and INVOCATION_ERROR event types to BigQueryAgentAnalyticsPlugin with traceback capture and cleanup - Keep after_agent_callback and after_run_callback as success-only (no semantic change to existing callbacks) - Catch Exception (not BaseException) to exclude CancelledError/ KeyboardInterrupt from error callback dispatch Closes #4863 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f973673 commit c76fb92

File tree

6 files changed

+779
-70
lines changed

6 files changed

+779
-70
lines changed

src/google/adk/agents/base_agent.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,13 @@ async def run_async(
293293
if ctx.end_invocation:
294294
return
295295

296-
async with Aclosing(self._run_async_impl(ctx)) as agen:
297-
async for event in agen:
298-
yield event
296+
try:
297+
async with Aclosing(self._run_async_impl(ctx)) as agen:
298+
async for event in agen:
299+
yield event
300+
except Exception as e:
301+
await self._handle_agent_error_callback(ctx, e)
302+
raise
299303

300304
if ctx.end_invocation:
301305
return
@@ -326,9 +330,13 @@ async def run_live(
326330
if ctx.end_invocation:
327331
return
328332

329-
async with Aclosing(self._run_live_impl(ctx)) as agen:
330-
async for event in agen:
331-
yield event
333+
try:
334+
async with Aclosing(self._run_live_impl(ctx)) as agen:
335+
async for event in agen:
336+
yield event
337+
except Exception as e:
338+
await self._handle_agent_error_callback(ctx, e)
339+
raise
332340

333341
if event := await self._handle_after_agent_callback(ctx):
334342
yield event
@@ -548,6 +556,27 @@ async def _handle_after_agent_callback(
548556
)
549557
return None
550558

559+
async def _handle_agent_error_callback(
560+
self,
561+
invocation_context: InvocationContext,
562+
error: Exception,
563+
) -> None:
564+
"""Runs the on_agent_error_callback for all plugins.
565+
566+
This is notification-only: the exception is always re-raised by the
567+
caller after this method returns.
568+
569+
Args:
570+
invocation_context: The invocation context for this agent.
571+
error: The exception that escaped agent execution.
572+
"""
573+
callback_context = CallbackContext(invocation_context)
574+
await invocation_context.plugin_manager.run_on_agent_error_callback(
575+
agent=self,
576+
callback_context=callback_context,
577+
error=error,
578+
)
579+
551580
@override
552581
def model_post_init(self, __context: Any) -> None:
553582
self.__set_parent_agent_for_sub_agents()

src/google/adk/plugins/base_plugin.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,3 +370,45 @@ async def on_tool_error_callback(
370370
allows the original error to be raised.
371371
"""
372372
pass
373+
374+
async def on_agent_error_callback(
375+
self,
376+
*,
377+
agent: BaseAgent,
378+
callback_context: CallbackContext,
379+
error: Exception,
380+
) -> None:
381+
"""Callback executed when an unhandled exception escapes agent execution.
382+
383+
This is a notification-only callback. The exception is always re-raised
384+
after all registered plugins have been notified. Plugins should NOT
385+
suppress the exception.
386+
387+
Unlike ``on_model_error_callback`` which can return a replacement
388+
response, this callback has no return value — there is no meaningful
389+
recovery action at the agent level that a plugin can provide.
390+
391+
Args:
392+
agent: The agent instance that encountered the error.
393+
callback_context: The callback context for the agent invocation.
394+
error: The exception that was raised during agent execution.
395+
"""
396+
pass
397+
398+
async def on_run_error_callback(
399+
self,
400+
*,
401+
invocation_context: InvocationContext,
402+
error: Exception,
403+
) -> None:
404+
"""Callback executed when an unhandled exception escapes runner execution.
405+
406+
This is a notification-only callback. The exception is always re-raised
407+
after all registered plugins have been notified. Plugins should NOT
408+
suppress the exception.
409+
410+
Args:
411+
invocation_context: The context for the entire invocation.
412+
error: The exception that was raised during runner execution.
413+
"""
414+
pass

src/google/adk/plugins/bigquery_agent_analytics_plugin.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import logging
2929
import mimetypes
3030
import os
31+
import traceback as traceback_module
3132

3233
# Enable gRPC fork support so child processes created via os.fork()
3334
# can safely create new gRPC channels. Must be set before grpc's
@@ -1763,8 +1764,12 @@ def _get_events_schema() -> list[bigquery.SchemaField]:
17631764
"AGENT_COMPLETED": [
17641765
"CAST(JSON_VALUE(latency_ms, '$.total_ms') AS INT64) AS total_ms",
17651766
],
1767+
"AGENT_ERROR": [
1768+
"CAST(JSON_VALUE(latency_ms, '$.total_ms') AS INT64) AS total_ms",
1769+
],
17661770
"INVOCATION_STARTING": [],
17671771
"INVOCATION_COMPLETED": [],
1772+
"INVOCATION_ERROR": [],
17681773
"STATE_DELTA": [
17691774
"JSON_QUERY(attributes, '$.state_delta') AS state_delta",
17701775
],
@@ -3279,3 +3284,108 @@ async def on_tool_error_callback(
32793284
parent_span_id_override=None if has_ambient else parent_span_id,
32803285
),
32813286
)
3287+
3288+
@_safe_callback
3289+
async def on_agent_error_callback(
3290+
self,
3291+
*,
3292+
agent: Any,
3293+
callback_context: CallbackContext,
3294+
error: Exception,
3295+
) -> None:
3296+
"""Callback when an agent execution fails with an unhandled exception.
3297+
3298+
Emits an AGENT_ERROR event and pops the agent span from TraceManager.
3299+
The after_agent_callback (AGENT_COMPLETED) is NOT called on failure,
3300+
so this callback handles the span cleanup that would otherwise be done
3301+
there.
3302+
3303+
Args:
3304+
agent: The agent instance that failed.
3305+
callback_context: The callback context.
3306+
error: The exception that escaped agent execution.
3307+
"""
3308+
span_id, duration = TraceManager.pop_span()
3309+
parent_span_id, _ = TraceManager.get_current_span_and_parent()
3310+
3311+
has_ambient = trace.get_current_span().get_span_context().is_valid
3312+
3313+
error_tb = "".join(
3314+
traceback_module.format_exception(
3315+
type(error), error, error.__traceback__
3316+
)
3317+
)
3318+
# Truncate traceback to max_content_length.
3319+
max_len = self.config.max_content_length
3320+
if max_len and len(error_tb) > max_len:
3321+
error_tb = error_tb[:max_len] + "... [truncated]"
3322+
3323+
await self._log_event(
3324+
"AGENT_ERROR",
3325+
callback_context,
3326+
event_data=EventData(
3327+
status="ERROR",
3328+
error_message=str(error),
3329+
latency_ms=duration,
3330+
span_id_override=None if has_ambient else span_id,
3331+
parent_span_id_override=(None if has_ambient else parent_span_id),
3332+
),
3333+
raw_content={"error_traceback": error_tb},
3334+
)
3335+
3336+
@_safe_callback
3337+
async def on_run_error_callback(
3338+
self,
3339+
*,
3340+
invocation_context: "InvocationContext",
3341+
error: Exception,
3342+
) -> None:
3343+
"""Callback when a runner execution fails with an unhandled exception.
3344+
3345+
Emits an INVOCATION_ERROR event and performs the cleanup that
3346+
after_run_callback (INVOCATION_COMPLETED) would normally do. Since
3347+
after_run_callback is success-only, this callback must handle
3348+
TraceManager cleanup, context var reset, and flush.
3349+
3350+
Args:
3351+
invocation_context: The context of the current invocation.
3352+
error: The exception that escaped runner execution.
3353+
"""
3354+
try:
3355+
callback_ctx = CallbackContext(invocation_context)
3356+
trace_id = TraceManager.get_trace_id(callback_ctx)
3357+
3358+
span_id, duration = TraceManager.pop_span()
3359+
parent_span_id = TraceManager.get_current_span_id()
3360+
3361+
has_ambient = trace.get_current_span().get_span_context().is_valid
3362+
3363+
error_tb = "".join(
3364+
traceback_module.format_exception(
3365+
type(error), error, error.__traceback__
3366+
)
3367+
)
3368+
max_len = self.config.max_content_length
3369+
if max_len and len(error_tb) > max_len:
3370+
error_tb = error_tb[:max_len] + "... [truncated]"
3371+
3372+
await self._log_event(
3373+
"INVOCATION_ERROR",
3374+
callback_ctx,
3375+
event_data=EventData(
3376+
trace_id_override=trace_id,
3377+
status="ERROR",
3378+
error_message=str(error),
3379+
latency_ms=duration,
3380+
span_id_override=None if has_ambient else span_id,
3381+
parent_span_id_override=(None if has_ambient else parent_span_id),
3382+
),
3383+
raw_content={"error_traceback": error_tb},
3384+
)
3385+
finally:
3386+
# Cleanup must run even if _log_event raises, otherwise
3387+
# stale invocation metadata leaks into the next invocation.
3388+
TraceManager.clear_stack()
3389+
_active_invocation_id_ctx.set(None)
3390+
_root_agent_name_ctx.set(None)
3391+
await self.flush()

src/google/adk/plugins/plugin_manager.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
"after_model_callback",
5353
"on_tool_error_callback",
5454
"on_model_error_callback",
55+
"on_agent_error_callback",
56+
"on_run_error_callback",
5557
]
5658

5759
logger = logging.getLogger("google_adk." + __name__)
@@ -257,6 +259,34 @@ async def run_on_tool_error_callback(
257259
error=error,
258260
)
259261

262+
async def run_on_agent_error_callback(
263+
self,
264+
*,
265+
agent: BaseAgent,
266+
callback_context: CallbackContext,
267+
error: Exception,
268+
) -> None:
269+
"""Runs the `on_agent_error_callback` for all plugins."""
270+
await self._run_callbacks(
271+
"on_agent_error_callback",
272+
agent=agent,
273+
callback_context=callback_context,
274+
error=error,
275+
)
276+
277+
async def run_on_run_error_callback(
278+
self,
279+
*,
280+
invocation_context: InvocationContext,
281+
error: Exception,
282+
) -> None:
283+
"""Runs the `on_run_error_callback` for all plugins."""
284+
await self._run_callbacks(
285+
"on_run_error_callback",
286+
invocation_context=invocation_context,
287+
error=error,
288+
)
289+
260290
async def _run_callbacks(
261291
self, callback_name: PluginCallbackName, **kwargs: Any
262292
) -> Optional[Any]:

0 commit comments

Comments
 (0)