Skip to content

Commit 7cdef7c

Browse files
committed
feat(plugins): add on_agent_error_callback and on_run_error_callback lifecycle hooks
Fixes #4774 When an unhandled exception propagates out of an agent's _run_async_impl / _run_live_impl, or out of the runner's execution loop, the existing after_agent_callback / after_run_callback were silently skipped. This made fatal failures invisible to observability plugins (e.g. BigQuery analytics), inflating success rates and losing failure events entirely. Changes: - BasePlugin: add on_agent_error_callback(agent, callback_context, error) and on_run_error_callback(invocation_context, error) with safe no-op defaults. - PluginManager: add run_on_agent_error_callback / run_on_run_error_callback dispatch methods backed by a new _run_error_callbacks helper that fans out to ALL plugins (no early-exit) and logs — but does not propagate — individual plugin failures. - base_agent.py: wrap run_async / run_live generator loops in try/except; call run_on_agent_error_callback before re-raising. - runners.py: wrap the execute_fn generator loop in try/except; call run_on_run_error_callback before re-raising. after_run_callback is intentionally skipped on the error path so plugins can distinguish clean completions from fatal failures. Tests (30 new, all passing): - tests/unittests/plugins/test_lifecycle_error_callbacks.py - tests/unittests/runners/test_runner_error_callbacks.py - tests/unittests/agents/test_agent_error_callbacks.py
1 parent f973673 commit 7cdef7c

File tree

7 files changed

+1080
-69
lines changed

7 files changed

+1080
-69
lines changed

src/google/adk/agents/base_agent.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,20 @@ async def run_async(
293293
if ctx.end_invocation:
294294
return
295295

296-
async with Aclosing(self._run_async_impl(ctx)) as agen:
297-
async for event in agen:
298-
yield event
296+
try:
297+
async with Aclosing(self._run_async_impl(ctx)) as agen:
298+
async for event in agen:
299+
yield event
300+
except Exception as agent_error:
301+
# Notify plugins that this agent run failed before re-raising.
302+
# after_agent_callback is intentionally skipped so plugins can
303+
# distinguish a clean completion from a fatal failure.
304+
await ctx.plugin_manager.run_on_agent_error_callback(
305+
agent=self,
306+
callback_context=CallbackContext(ctx),
307+
error=agent_error,
308+
)
309+
raise
299310

300311
if ctx.end_invocation:
301312
return
@@ -326,9 +337,18 @@ async def run_live(
326337
if ctx.end_invocation:
327338
return
328339

329-
async with Aclosing(self._run_live_impl(ctx)) as agen:
330-
async for event in agen:
331-
yield event
340+
try:
341+
async with Aclosing(self._run_live_impl(ctx)) as agen:
342+
async for event in agen:
343+
yield event
344+
except Exception as agent_error:
345+
# Notify plugins that this live agent run failed before re-raising.
346+
await ctx.plugin_manager.run_on_agent_error_callback(
347+
agent=self,
348+
callback_context=CallbackContext(ctx),
349+
error=agent_error,
350+
)
351+
raise
332352

333353
if event := await self._handle_after_agent_callback(ctx):
334354
yield event

src/google/adk/plugins/base_plugin.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,3 +370,63 @@ async def on_tool_error_callback(
370370
allows the original error to be raised.
371371
"""
372372
pass
373+
374+
async def on_agent_error_callback(
375+
self,
376+
*,
377+
agent: BaseAgent,
378+
callback_context: CallbackContext,
379+
error: Exception,
380+
) -> None:
381+
"""Callback executed when an unhandled exception escapes an agent's run.
382+
383+
This callback fires when an exception propagates out of the agent's
384+
``_run_async_impl`` or ``_run_live_impl`` before ``after_agent_callback``
385+
has had a chance to execute. It is intended purely for observability
386+
(logging, metrics, tracing) — the original exception is always re-raised
387+
after all registered plugins have been notified.
388+
389+
Unlike ``on_tool_error_callback`` and ``on_model_error_callback``, this
390+
callback cannot swallow or replace the error; it always returns ``None``.
391+
392+
Args:
393+
agent: The agent instance whose execution raised the exception.
394+
callback_context: The callback context for the failed agent invocation.
395+
error: The exception that was raised.
396+
397+
Returns:
398+
None. The return value is ignored; the exception is re-raised by the
399+
framework regardless.
400+
"""
401+
pass
402+
403+
async def on_run_error_callback(
404+
self,
405+
*,
406+
invocation_context: InvocationContext,
407+
error: Exception,
408+
) -> None:
409+
"""Callback executed when an unhandled exception escapes a runner invocation.
410+
411+
This callback fires when an exception propagates out of the runner's main
412+
execution loop before ``after_run_callback`` has had a chance to execute.
413+
It is intended purely for observability (logging, metrics, tracing) — the
414+
original exception is always re-raised after all registered plugins have
415+
been notified.
416+
417+
This fills the gap where a fatal error (e.g. an unrecoverable model crash
418+
or tool exception) would otherwise cause the invocation to disappear from
419+
observability sinks without ever emitting a terminal event.
420+
421+
Unlike ``on_tool_error_callback`` and ``on_model_error_callback``, this
422+
callback cannot swallow or replace the error; it always returns ``None``.
423+
424+
Args:
425+
invocation_context: The context for the entire invocation.
426+
error: The exception that escaped the runner's execution loop.
427+
428+
Returns:
429+
None. The return value is ignored; the exception is re-raised by the
430+
framework regardless.
431+
"""
432+
pass

src/google/adk/plugins/plugin_manager.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
"after_model_callback",
5353
"on_tool_error_callback",
5454
"on_model_error_callback",
55+
"on_agent_error_callback",
56+
"on_run_error_callback",
5557
]
5658

5759
logger = logging.getLogger("google_adk." + __name__)
@@ -257,6 +259,46 @@ async def run_on_tool_error_callback(
257259
error=error,
258260
)
259261

262+
async def run_on_agent_error_callback(
263+
self,
264+
*,
265+
agent: BaseAgent,
266+
callback_context: CallbackContext,
267+
error: Exception,
268+
) -> None:
269+
"""Runs the ``on_agent_error_callback`` for all plugins.
270+
271+
All registered plugins are notified even if an earlier plugin raises —
272+
failures in individual plugins are logged but do not prevent subsequent
273+
plugins from being called. The original agent error is never suppressed
274+
by this method.
275+
"""
276+
await self._run_error_callbacks(
277+
"on_agent_error_callback",
278+
agent=agent,
279+
callback_context=callback_context,
280+
error=error,
281+
)
282+
283+
async def run_on_run_error_callback(
284+
self,
285+
*,
286+
invocation_context: InvocationContext,
287+
error: Exception,
288+
) -> None:
289+
"""Runs the ``on_run_error_callback`` for all plugins.
290+
291+
All registered plugins are notified even if an earlier plugin raises —
292+
failures in individual plugins are logged but do not prevent subsequent
293+
plugins from being called. The original runner error is never suppressed
294+
by this method.
295+
"""
296+
await self._run_error_callbacks(
297+
"on_run_error_callback",
298+
invocation_context=invocation_context,
299+
error=error,
300+
)
301+
260302
async def _run_callbacks(
261303
self, callback_name: PluginCallbackName, **kwargs: Any
262304
) -> Optional[Any]:
@@ -306,6 +348,41 @@ async def _run_callbacks(
306348

307349
return None
308350

351+
async def _run_error_callbacks(
352+
self, callback_name: PluginCallbackName, **kwargs: Any
353+
) -> None:
354+
"""Executes an error-notification callback for **all** registered plugins.
355+
356+
Unlike ``_run_callbacks``, this method does **not** stop on the first
357+
non-``None`` return value. Error callbacks are pure observers — every
358+
plugin deserves a chance to record the failure even if an earlier plugin
359+
in the chain itself encounters an error.
360+
361+
Individual plugin failures are logged but do not prevent subsequent
362+
plugins from being called, and they do not propagate to the caller. The
363+
underlying framework error that triggered this notification is always
364+
re-raised by the caller independently.
365+
366+
Args:
367+
callback_name: The name of the error callback method to execute.
368+
**kwargs: Keyword arguments forwarded to each plugin's callback.
369+
"""
370+
for plugin in self.plugins:
371+
callback_method = getattr(plugin, callback_name)
372+
try:
373+
await callback_method(**kwargs)
374+
except Exception as e:
375+
# Log but continue — a broken observability plugin must not hide the
376+
# original error from the framework or prevent other plugins from
377+
# receiving the notification.
378+
logger.error(
379+
"Error in plugin '%s' during '%s' callback: %s",
380+
plugin.name,
381+
callback_name,
382+
e,
383+
exc_info=True,
384+
)
385+
309386
async def close(self) -> None:
310387
"""Calls the close method on all registered plugins concurrently.
311388

src/google/adk/runners.py

Lines changed: 74 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -849,73 +849,84 @@ async def _exec_with_plugin(
849849
buffered_events: list[Event] = []
850850
is_transcribing: bool = False
851851

852-
async with Aclosing(execute_fn(invocation_context)) as agen:
853-
async for event in agen:
854-
_apply_run_config_custom_metadata(
855-
event, invocation_context.run_config
856-
)
857-
if is_live_call:
858-
if event.partial and _is_transcription(event):
859-
is_transcribing = True
860-
if is_transcribing and _is_tool_call_or_response(event):
861-
# only buffer function call and function response event which is
862-
# non-partial
863-
buffered_events.append(event)
864-
continue
865-
# Note for live/bidi: for audio response, it's considered as
866-
# non-partial event(event.partial=None)
867-
# event.partial=False and event.partial=None are considered as
868-
# non-partial event; event.partial=True is considered as partial
869-
# event.
870-
if event.partial is not True:
871-
if _is_transcription(event) and (
872-
_has_non_empty_transcription_text(event.input_transcription)
873-
or _has_non_empty_transcription_text(
874-
event.output_transcription
852+
try:
853+
async with Aclosing(execute_fn(invocation_context)) as agen:
854+
async for event in agen:
855+
_apply_run_config_custom_metadata(
856+
event, invocation_context.run_config
857+
)
858+
if is_live_call:
859+
if event.partial and _is_transcription(event):
860+
is_transcribing = True
861+
if is_transcribing and _is_tool_call_or_response(event):
862+
# only buffer function call and function response event which is
863+
# non-partial
864+
buffered_events.append(event)
865+
continue
866+
# Note for live/bidi: for audio response, it's considered as
867+
# non-partial event(event.partial=None)
868+
# event.partial=False and event.partial=None are considered as
869+
# non-partial event; event.partial=True is considered as partial
870+
# event.
871+
if event.partial is not True:
872+
if _is_transcription(event) and (
873+
_has_non_empty_transcription_text(event.input_transcription)
874+
or _has_non_empty_transcription_text(
875+
event.output_transcription
876+
)
877+
):
878+
# transcription end signal, append buffered events
879+
is_transcribing = False
880+
logger.debug(
881+
'Appending transcription finished event: %s', event
875882
)
876-
):
877-
# transcription end signal, append buffered events
878-
is_transcribing = False
879-
logger.debug(
880-
'Appending transcription finished event: %s', event
883+
if self._should_append_event(event, is_live_call):
884+
await self.session_service.append_event(
885+
session=session, event=event
886+
)
887+
888+
for buffered_event in buffered_events:
889+
logger.debug('Appending buffered event: %s', buffered_event)
890+
await self.session_service.append_event(
891+
session=session, event=buffered_event
892+
)
893+
yield buffered_event # yield buffered events to caller
894+
buffered_events = []
895+
else:
896+
# non-transcription event or empty transcription event, for
897+
# example, event that stores blob reference, should be appended.
898+
if self._should_append_event(event, is_live_call):
899+
logger.debug('Appending non-buffered event: %s', event)
900+
await self.session_service.append_event(
901+
session=session, event=event
902+
)
903+
else:
904+
if event.partial is not True:
905+
await self.session_service.append_event(
906+
session=session, event=event
881907
)
882-
if self._should_append_event(event, is_live_call):
883-
await self.session_service.append_event(
884-
session=session, event=event
885-
)
886-
887-
for buffered_event in buffered_events:
888-
logger.debug('Appending buffered event: %s', buffered_event)
889-
await self.session_service.append_event(
890-
session=session, event=buffered_event
891-
)
892-
yield buffered_event # yield buffered events to caller
893-
buffered_events = []
894-
else:
895-
# non-transcription event or empty transcription event, for
896-
# example, event that stores blob reference, should be appended.
897-
if self._should_append_event(event, is_live_call):
898-
logger.debug('Appending non-buffered event: %s', event)
899-
await self.session_service.append_event(
900-
session=session, event=event
901-
)
902-
else:
903-
if event.partial is not True:
904-
await self.session_service.append_event(
905-
session=session, event=event
906-
)
907908

908-
# Step 3: Run the on_event callbacks to optionally modify the event.
909-
modified_event = await plugin_manager.run_on_event_callback(
910-
invocation_context=invocation_context, event=event
911-
)
912-
if modified_event:
913-
_apply_run_config_custom_metadata(
914-
modified_event, invocation_context.run_config
909+
# Step 3: Run the on_event callbacks to optionally modify the event.
910+
modified_event = await plugin_manager.run_on_event_callback(
911+
invocation_context=invocation_context, event=event
915912
)
916-
yield modified_event
917-
else:
918-
yield event
913+
if modified_event:
914+
_apply_run_config_custom_metadata(
915+
modified_event, invocation_context.run_config
916+
)
917+
yield modified_event
918+
else:
919+
yield event
920+
except Exception as run_error:
921+
# Step 3b: Notify all plugins that this invocation failed.
922+
# The callback is fire-and-forget — it cannot suppress the error.
923+
# after_run_callback is intentionally skipped on the error path so
924+
# that plugins can distinguish clean completions from fatal failures.
925+
await plugin_manager.run_on_run_error_callback(
926+
invocation_context=invocation_context,
927+
error=run_error,
928+
)
929+
raise
919930

920931
# Step 4: Run the after_run callbacks to perform global cleanup tasks or
921932
# finalizing logs and metrics data.

0 commit comments

Comments
 (0)