Balance hallucinated tool callback lifecycle

giulio-leone · giulio-leone · commit b868aee609e1 · 2026-03-21T02:56:47.000+01:00
diff --git a/src/google/adk/flows/llm_flows/functions.py b/src/google/adk/flows/llm_flows/functions.py
@@ -480,6 +480,7 @@ async def _run_on_tool_error_callbacks(
       invocation_context, function_call, tool_confirmation
   )
 
+  _tool_lookup_error: Exception | None = None
   try:
     tool = _get_tool(function_call, tools_dict)
   except ValueError as tool_error:
@@ -488,9 +489,7 @@ async def _run_on_tool_error_callbacks(
     # OTel span are created *before* on_tool_error_callback fires.  This
     # keeps the callback lifecycle balanced (push/pop) and prevents plugins
     # like BigQueryAgentAnalyticsPlugin from corrupting their span stacks.
-    _tool_lookup_error: Exception = tool_error
-  else:
-    _tool_lookup_error = None
+    _tool_lookup_error = tool_error
 
   async def _run_with_trace():
     nonlocal function_args
@@ -722,13 +721,12 @@ async def _run_on_tool_error_callbacks(
 
   tool_context = _create_tool_context(invocation_context, function_call)
 
+  _tool_lookup_error: Exception | None = None
   try:
     tool = _get_tool(function_call, tools_dict)
   except ValueError as tool_error:
     tool = BaseTool(name=function_call.name, description='Tool not found')
-    _tool_lookup_error: Exception = tool_error
-  else:
-    _tool_lookup_error = None
+    _tool_lookup_error = tool_error
 
   async def _run_with_trace():
     nonlocal function_args
diff --git a/tests/unittests/flows/llm_flows/test_plugin_tool_callbacks.py b/tests/unittests/flows/llm_flows/test_plugin_tool_callbacks.py
@@ -429,9 +429,101 @@ async def test_hallucinated_tool_raises_when_no_error_callback(
       agent=agent, user_content="", plugins=[mock_plugin]
   )
 
+  function_call = types.FunctionCall(name="nonexistent_tool", args={})
+  content = types.Content(parts=[types.Part(function_call=function_call)])
+  event = Event(
+      invocation_id=invocation_context.invocation_id,
+      author=agent.name,
+      content=content,
+  )
+  tools_dict = {mock_tool.name: mock_tool}
+
+  with pytest.raises(ValueError, match="nonexistent_tool"):
+    await handle_function_calls_async(
+        invocation_context,
+        event,
+        tools_dict,
+    )
+
+
+@pytest.mark.asyncio
+async def test_hallucinated_tool_fires_before_and_error_callbacks_live(
+    mock_tool, mock_plugin
+):
+  """Live path regression test for hallucinated tool callback ordering."""
+  mock_plugin.enable_before_tool_callback = True
+  mock_plugin.enable_on_tool_error_callback = True
+
+  call_order = []
+  original_before = mock_plugin.before_tool_callback
+  original_error = mock_plugin.on_tool_error_callback
+
+  async def tracking_before(**kwargs):
+    call_order.append("before_tool")
+    return await original_before(**kwargs)
+
+  async def tracking_error(**kwargs):
+    call_order.append("on_tool_error")
+    return await original_error(**kwargs)
+
+  mock_plugin.before_tool_callback = tracking_before
+  mock_plugin.on_tool_error_callback = tracking_error
+
+  model = testing_utils.MockModel.create(responses=[])
+  agent = Agent(
+      name="agent",
+      model=model,
+      tools=[mock_tool],
+  )
+  invocation_context = await testing_utils.create_invocation_context(
+      agent=agent, user_content="", plugins=[mock_plugin]
+  )
+
   function_call = types.FunctionCall(
-      name="nonexistent_tool", args={}
+      name="hallucinated_tool_xyz", args={"query": "test"}
+  )
+  content = types.Content(parts=[types.Part(function_call=function_call)])
+  event = Event(
+      invocation_id=invocation_context.invocation_id,
+      author=agent.name,
+      content=content,
+  )
+  tools_dict = {mock_tool.name: mock_tool}
+
+  result_event = await handle_function_calls_live(
+      invocation_context,
+      event,
+      tools_dict,
+  )
+
+  assert result_event is not None
+  part = result_event.content.parts[0]
+  assert part.function_response.response == mock_plugin.on_tool_error_response
+
+  assert "before_tool" in call_order
+  assert "on_tool_error" in call_order
+  assert call_order.index("before_tool") < call_order.index("on_tool_error")
+
+
+@pytest.mark.asyncio
+async def test_hallucinated_tool_raises_when_no_error_callback_live(
+    mock_tool, mock_plugin
+):
+  """Live path should propagate ValueError for hallucinated tools."""
+  mock_plugin.enable_before_tool_callback = False
+  mock_plugin.enable_on_tool_error_callback = False
+
+  model = testing_utils.MockModel.create(responses=[])
+  agent = Agent(
+      name="agent",
+      model=model,
+      tools=[mock_tool],
   )
+  invocation_context = await testing_utils.create_invocation_context(
+      agent=agent, user_content="", plugins=[mock_plugin]
+  )
+
+  function_call = types.FunctionCall(name="nonexistent_tool", args={})
   content = types.Content(parts=[types.Part(function_call=function_call)])
   event = Event(
       invocation_id=invocation_context.invocation_id,
@@ -441,7 +533,7 @@ async def test_hallucinated_tool_raises_when_no_error_callback(
   tools_dict = {mock_tool.name: mock_tool}
 
   with pytest.raises(ValueError, match="nonexistent_tool"):
-    await handle_function_calls_async(
+    await handle_function_calls_live(
         invocation_context,
         event,
         tools_dict,