Skip to content

Commit bed95cb

Browse files
caohy1988claude
andcommitted
feat(plugins): add on_agent_error_callback and on_run_error_callback
Merges PRs #5045 and #5047 onto latest main, resolving conflicts. Implements RFC #5044 to close the error callback coverage gap: - Add on_agent_error_callback and on_run_error_callback to BasePlugin - Wire try/except in BaseAgent.run_async(), run_live(), and Runner._exec_with_plugin() - Add _run_notification_callbacks to PluginManager (best-effort: logs and continues on plugin errors, never masks app exceptions) - Add AGENT_ERROR and INVOCATION_ERROR event types to BQAA plugin with traceback capture and TraceManager cleanup - Add v_agent_error and v_invocation_error views with error_traceback column - Keep after_agent_callback and after_run_callback as success-only - Catch Exception, not BaseException Fixes #4863, implements #5044 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2d61cb6 commit bed95cb

6 files changed

Lines changed: 922 additions & 73 deletions

File tree

src/google/adk/agents/base_agent.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -291,9 +291,13 @@ async def run_async(
291291
if ctx.end_invocation:
292292
return
293293

294-
async with Aclosing(self._run_async_impl(ctx)) as agen:
295-
async for event in agen:
296-
yield event
294+
try:
295+
async with Aclosing(self._run_async_impl(ctx)) as agen:
296+
async for event in agen:
297+
yield event
298+
except Exception as e:
299+
await self._handle_agent_error_callback(ctx, e)
300+
raise
297301

298302
if ctx.end_invocation:
299303
return
@@ -323,9 +327,13 @@ async def run_live(
323327
if ctx.end_invocation:
324328
return
325329

326-
async with Aclosing(self._run_live_impl(ctx)) as agen:
327-
async for event in agen:
328-
yield event
330+
try:
331+
async with Aclosing(self._run_live_impl(ctx)) as agen:
332+
async for event in agen:
333+
yield event
334+
except Exception as e:
335+
await self._handle_agent_error_callback(ctx, e)
336+
raise
329337

330338
if event := await self._handle_after_agent_callback(ctx):
331339
yield event
@@ -545,6 +553,27 @@ async def _handle_after_agent_callback(
545553
)
546554
return None
547555

556+
async def _handle_agent_error_callback(
557+
self,
558+
invocation_context: InvocationContext,
559+
error: Exception,
560+
) -> None:
561+
"""Runs the on_agent_error_callback for all plugins.
562+
563+
This is notification-only: the exception is always re-raised by
564+
the caller after this method returns.
565+
566+
Args:
567+
invocation_context: The invocation context for this agent.
568+
error: The exception that escaped agent execution.
569+
"""
570+
callback_context = CallbackContext(invocation_context)
571+
await invocation_context.plugin_manager.run_on_agent_error_callback(
572+
agent=self,
573+
callback_context=callback_context,
574+
error=error,
575+
)
576+
548577
@override
549578
def model_post_init(self, __context: Any) -> None:
550579
self.__set_parent_agent_for_sub_agents()

src/google/adk/plugins/base_plugin.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,3 +370,41 @@ async def on_tool_error_callback(
370370
allows the original error to be raised.
371371
"""
372372
pass
373+
374+
async def on_agent_error_callback(
375+
self,
376+
*,
377+
agent: BaseAgent,
378+
callback_context: CallbackContext,
379+
error: Exception,
380+
) -> None:
381+
"""Callback executed when an unhandled exception escapes agent execution.
382+
383+
This is a notification-only callback. The exception is always re-raised
384+
after all registered plugins have been notified. Plugins should NOT
385+
suppress the exception.
386+
387+
Args:
388+
agent: The agent instance that encountered the error.
389+
callback_context: The callback context for the agent invocation.
390+
error: The exception that was raised during agent execution.
391+
"""
392+
pass
393+
394+
async def on_run_error_callback(
395+
self,
396+
*,
397+
invocation_context: InvocationContext,
398+
error: Exception,
399+
) -> None:
400+
"""Callback executed when an unhandled exception escapes runner execution.
401+
402+
This is a notification-only callback. The exception is always re-raised
403+
after all registered plugins have been notified. Plugins should NOT
404+
suppress the exception.
405+
406+
Args:
407+
invocation_context: The context for the entire invocation.
408+
error: The exception that was raised during runner execution.
409+
"""
410+
pass

src/google/adk/plugins/bigquery_agent_analytics_plugin.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import logging
2929
import mimetypes
3030
import os
31+
import traceback as traceback_module
3132

3233
# Enable gRPC fork support so child processes created via os.fork()
3334
# can safely create new gRPC channels. Must be set before grpc's
@@ -1870,8 +1871,15 @@ def _get_events_schema() -> list[bigquery.SchemaField]:
18701871
"AGENT_COMPLETED": [
18711872
"CAST(JSON_VALUE(latency_ms, '$.total_ms') AS INT64) AS total_ms",
18721873
],
1874+
"AGENT_ERROR": [
1875+
"CAST(JSON_VALUE(latency_ms, '$.total_ms') AS INT64) AS total_ms",
1876+
"JSON_VALUE(content, '$.error_traceback') AS error_traceback",
1877+
],
18731878
"INVOCATION_STARTING": [],
18741879
"INVOCATION_COMPLETED": [],
1880+
"INVOCATION_ERROR": [
1881+
"JSON_VALUE(content, '$.error_traceback') AS error_traceback",
1882+
],
18751883
"STATE_DELTA": [
18761884
"JSON_QUERY(attributes, '$.state_delta') AS state_delta",
18771885
],
@@ -3505,3 +3513,98 @@ async def on_tool_error_callback(
35053513
parent_span_id_override=parent_span_id,
35063514
),
35073515
)
3516+
3517+
@_safe_callback
3518+
async def on_agent_error_callback(
3519+
self,
3520+
*,
3521+
agent: Any,
3522+
callback_context: CallbackContext,
3523+
error: Exception,
3524+
) -> None:
3525+
"""Callback when an agent execution fails with an unhandled exception.
3526+
3527+
Emits an AGENT_ERROR event and pops the agent span from
3528+
TraceManager.
3529+
3530+
Args:
3531+
agent: The agent instance that failed.
3532+
callback_context: The callback context.
3533+
error: The exception that escaped agent execution.
3534+
"""
3535+
span_id, duration = TraceManager.pop_span()
3536+
parent_span_id, _ = TraceManager.get_current_span_and_parent()
3537+
3538+
error_tb = "".join(
3539+
traceback_module.format_exception(
3540+
type(error), error, error.__traceback__
3541+
)
3542+
)
3543+
max_len = self.config.max_content_length
3544+
if max_len and len(error_tb) > max_len:
3545+
error_tb = error_tb[:max_len] + "... [truncated]"
3546+
3547+
await self._log_event(
3548+
"AGENT_ERROR",
3549+
callback_context,
3550+
event_data=EventData(
3551+
status="ERROR",
3552+
error_message=str(error),
3553+
latency_ms=duration,
3554+
span_id_override=span_id,
3555+
parent_span_id_override=parent_span_id,
3556+
),
3557+
raw_content={"error_traceback": error_tb},
3558+
)
3559+
3560+
@_safe_callback
3561+
async def on_run_error_callback(
3562+
self,
3563+
*,
3564+
invocation_context: "InvocationContext",
3565+
error: Exception,
3566+
) -> None:
3567+
"""Callback when a runner execution fails with an unhandled exception.
3568+
3569+
Emits an INVOCATION_ERROR event and performs the cleanup that
3570+
after_run_callback would normally do.
3571+
3572+
Args:
3573+
invocation_context: The context of the current invocation.
3574+
error: The exception that escaped runner execution.
3575+
"""
3576+
try:
3577+
callback_ctx = CallbackContext(invocation_context)
3578+
trace_id = TraceManager.get_trace_id(callback_ctx)
3579+
3580+
span_id, duration = TraceManager.pop_span()
3581+
parent_span_id = TraceManager.get_current_span_id()
3582+
3583+
error_tb = "".join(
3584+
traceback_module.format_exception(
3585+
type(error), error, error.__traceback__
3586+
)
3587+
)
3588+
max_len = self.config.max_content_length
3589+
if max_len and len(error_tb) > max_len:
3590+
error_tb = error_tb[:max_len] + "... [truncated]"
3591+
3592+
await self._log_event(
3593+
"INVOCATION_ERROR",
3594+
callback_ctx,
3595+
event_data=EventData(
3596+
trace_id_override=trace_id,
3597+
status="ERROR",
3598+
error_message=str(error),
3599+
latency_ms=duration,
3600+
span_id_override=span_id,
3601+
parent_span_id_override=parent_span_id,
3602+
),
3603+
raw_content={"error_traceback": error_tb},
3604+
)
3605+
finally:
3606+
# Cleanup must run even if _log_event raises.
3607+
TraceManager.clear_stack()
3608+
_active_invocation_id_ctx.set(None)
3609+
_root_agent_name_ctx.set(None)
3610+
await self.flush()

src/google/adk/plugins/plugin_manager.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
"after_model_callback",
5353
"on_tool_error_callback",
5454
"on_model_error_callback",
55+
"on_agent_error_callback",
56+
"on_run_error_callback",
5557
]
5658

5759
logger = logging.getLogger("google_adk." + __name__)
@@ -306,6 +308,61 @@ async def _run_callbacks(
306308

307309
return None
308310

311+
async def run_on_agent_error_callback(
312+
self,
313+
*,
314+
agent: BaseAgent,
315+
callback_context: CallbackContext,
316+
error: Exception,
317+
) -> None:
318+
"""Runs the `on_agent_error_callback` for all plugins."""
319+
await self._run_notification_callbacks(
320+
"on_agent_error_callback",
321+
agent=agent,
322+
callback_context=callback_context,
323+
error=error,
324+
)
325+
326+
async def run_on_run_error_callback(
327+
self,
328+
*,
329+
invocation_context: InvocationContext,
330+
error: Exception,
331+
) -> None:
332+
"""Runs the `on_run_error_callback` for all plugins."""
333+
await self._run_notification_callbacks(
334+
"on_run_error_callback",
335+
invocation_context=invocation_context,
336+
error=error,
337+
)
338+
339+
async def _run_notification_callbacks(
340+
self, callback_name: PluginCallbackName, **kwargs: Any
341+
) -> None:
342+
"""Executes a notification-only callback for all registered plugins.
343+
344+
Unlike ``_run_callbacks``, this method is best-effort: it always
345+
iterates all plugins regardless of return values or exceptions.
346+
If a plugin's callback raises, the error is logged and iteration
347+
continues so that every plugin gets notified.
348+
349+
Args:
350+
callback_name: The name of the callback method to execute.
351+
**kwargs: Keyword arguments to be passed to the callback method.
352+
"""
353+
for plugin in self.plugins:
354+
callback_method = getattr(plugin, callback_name)
355+
try:
356+
await callback_method(**kwargs)
357+
except Exception as e:
358+
logger.error(
359+
"Error in plugin '%s' during '%s' callback: %s",
360+
plugin.name,
361+
callback_name,
362+
e,
363+
exc_info=True,
364+
)
365+
309366
async def close(self) -> None:
310367
"""Calls the close method on all registered plugins concurrently.
311368

0 commit comments

Comments
 (0)