From c62ff7f1b6bc5152998a3c56880aef313a36dbe6 Mon Sep 17 00:00:00 2001 From: Aditya Singh Date: Wed, 13 May 2026 20:20:36 -0700 Subject: [PATCH] feat(agent): recover from unknown tool calls via opt-in handler When the LLM hallucinates a tool name not registered on the agent, turn_resolution previously raised ModelBehaviorError and crashed the entire run. Add an opt-in Agent.unknown_tool_behavior field with "raise" (default, preserves existing behavior) and "respond" (append a synthetic tool-call output naming the available tools and let the run continue so the model can recover). Refs #325. --- docs/agents.md | 16 ++++ src/agents/agent.py | 15 ++++ src/agents/run_internal/turn_resolution.py | 100 ++++++++++++++++++++- tests/test_run.py | 60 ++++++++++++- tests/test_run_step_processing.py | 48 ++++++++++ 5 files changed, 233 insertions(+), 6 deletions(-) diff --git a/docs/agents.md b/docs/agents.md index f1878559b2..1bbe859b5e 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -43,6 +43,7 @@ The most common properties of an agent are: | `hooks` | no | Agent-scoped lifecycle callbacks. See [Lifecycle events (hooks)](#lifecycle-events-hooks). | | `tool_use_behavior` | no | Control whether tool results loop back to the model or end the run. See [Tool use behavior](#tool-use-behavior). | | `reset_tool_choice` | no | Reset `tool_choice` after a tool call (default: `True`) to avoid tool-use loops. See [Forcing tool use](#forcing-tool-use). | +| `unknown_tool_behavior` | no | What to do when the model calls a tool that is not registered (default: `"raise"`). Set to `"respond"` to feed an error tool output back to the LLM and let the run continue. See [Recovering from unknown tool calls](#recovering-from-unknown-tool-calls). | ```python from agents import Agent, ModelSettings, function_tool @@ -423,3 +424,18 @@ agent = Agent( !!! note To prevent infinite loops, the framework automatically resets `tool_choice` to "auto" after a tool call. This behavior is configurable via [`agent.reset_tool_choice`][agents.agent.Agent.reset_tool_choice]. The infinite loop is because tool results are sent to the LLM, which then generates another tool call because of `tool_choice`, ad infinitum. + +## Recovering from unknown tool calls + +By default, the SDK raises [`ModelBehaviorError`][agents.exceptions.ModelBehaviorError] if the model hallucinates a tool that the agent does not expose. This is the safest behavior for development, but it can crash a long-running agent run when the model occasionally invents tool names. + +Set `unknown_tool_behavior="respond"` on the agent to recover instead. When the model calls an unknown tool, the SDK appends a synthetic tool output describing the error and the list of available tools, and lets the agent continue. The LLM sees the error on the next turn and can pick a real tool. + +```python +agent = Agent( + name="Weather Agent", + instructions="Retrieve weather details.", + tools=[get_weather], + unknown_tool_behavior="respond", +) +``` diff --git a/src/agents/agent.py b/src/agents/agent.py index 602d84066c..dff02af46e 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -368,6 +368,15 @@ class Agent(AgentBase, Generic[TContext]): """Whether to reset the tool choice to the default value after a tool has been called. Defaults to True. This ensures that the agent doesn't enter an infinite loop of tool usage.""" + unknown_tool_behavior: Literal["raise", "respond"] = "raise" + """Controls what happens when the model invokes a tool the agent does not expose. + + - ``"raise"`` (default): A `ModelBehaviorError` is raised, matching prior behavior. + - ``"respond"``: A synthetic tool output is appended describing the error along with the list + of currently available tool names, and the agent continues running so the LLM can recover + on the next turn instead of crashing the run. + """ + def __post_init__(self): from typing import get_origin @@ -484,6 +493,12 @@ def __post_init__(self): f"got {type(self.reset_tool_choice).__name__}" ) + if self.unknown_tool_behavior not in ("raise", "respond"): + raise TypeError( + f"Agent unknown_tool_behavior must be 'raise' or 'respond', " + f"got {self.unknown_tool_behavior!r}" + ) + def clone(self, **kwargs: Any) -> Agent[TContext]: """Make a copy of the agent, with the given arguments changed. Notes: diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py index b37e27fbd4..6ffb4b4fa6 100644 --- a/src/agents/run_internal/turn_resolution.py +++ b/src/agents/run_internal/turn_resolution.py @@ -1468,6 +1468,81 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None: ) +def _available_tool_names_for_recovery(all_tools: list[Tool]) -> list[str]: + """Collect tool names suitable for inclusion in an unknown-tool recovery message.""" + seen: set[str] = set() + names: list[str] = [] + for tool in all_tools: + name = getattr(tool, "name", None) + if not isinstance(name, str) or not name or name in seen: + continue + seen.add(name) + names.append(name) + return names + + +def _build_unknown_tool_recovery_message( + tool_name: str, + agent_name: str, + all_tools: list[Tool], +) -> str: + """Build the synthetic tool output sent back to the model after an unknown tool call.""" + available = _available_tool_names_for_recovery(all_tools) + if available: + return ( + f"Tool '{tool_name}' is not available on agent '{agent_name}'. " + f"Available tools: {', '.join(available)}." + ) + return ( + f"Tool '{tool_name}' is not available on agent '{agent_name}'. " + "No tools are currently available." + ) + + +def _append_unknown_function_tool_recovery( + *, + agent: Agent[Any], + tool_call: ResponseFunctionToolCall, + items: list[RunItem], + all_tools: list[Tool], + display_name: str, +) -> None: + """Emit a synthetic function-call output so the LLM can retry instead of crashing.""" + message = _build_unknown_tool_recovery_message(display_name, agent.name, all_tools) + items.append(ToolCallItem(raw_item=tool_call, agent=agent)) + items.append( + ToolCallOutputItem( + output=message, + raw_item=ItemHelpers.tool_call_output_item(tool_call, message), + agent=agent, + ) + ) + + +def _append_unknown_custom_tool_recovery( + *, + agent: Agent[Any], + tool_call: ResponseCustomToolCall, + items: list[RunItem], + all_tools: list[Tool], +) -> None: + """Emit a synthetic custom_tool output so the LLM can retry instead of crashing.""" + message = _build_unknown_tool_recovery_message(tool_call.name, agent.name, all_tools) + items.append(ToolCallItem(raw_item=cast(Any, tool_call), agent=agent)) + output_raw: dict[str, Any] = { + "type": "custom_tool_call_output", + "call_id": tool_call.call_id, + "output": message, + } + items.append( + ToolCallOutputItem( + output=message, + raw_item=cast(Any, output_raw), + agent=agent, + ) + ) + + def process_model_response( *, agent: Agent[Any], @@ -1791,13 +1866,22 @@ def _dump_output_item(raw_item: Any) -> dict[str, Any]: "Model produced apply_patch call without an apply_patch tool." ) else: - items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent)) _error_tracing.attach_error_to_current_span( SpanError( message="Custom tool not found", data={"tool_name": output.name}, ) ) + if agent.unknown_tool_behavior == "respond": + tools_used.append(output.name) + _append_unknown_custom_tool_recovery( + agent=agent, + tool_call=output, + items=items, + all_tools=all_tools, + ) + continue + items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent)) raise ModelBehaviorError(f"Tool {output.name} not found in agent {agent.name}") elif ( isinstance(output, ResponseFunctionToolCall) @@ -1873,9 +1957,17 @@ def _dump_output_item(raw_item: Any) -> dict[str, Any]: data={"tool_name": qualified_output_name or output.name}, ) ) - error = ( - f"Tool {qualified_output_name or output.name} not found in agent {agent.name}" - ) + display_name = qualified_output_name or output.name + if agent.unknown_tool_behavior == "respond": + _append_unknown_function_tool_recovery( + agent=agent, + tool_call=output, + items=items, + all_tools=all_tools, + display_name=display_name, + ) + continue + error = f"Tool {display_name} not found in agent {agent.name}" raise ModelBehaviorError(error) items.append( diff --git a/tests/test_run.py b/tests/test_run.py index 3788cab625..9913c389ec 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -4,11 +4,16 @@ import pytest -from agents import Agent, Runner +from agents import Agent, ModelBehaviorError, Runner from agents.run import AgentRunner, set_default_agent_runner from .fake_model import FakeModel -from .test_responses import get_text_input_item, get_text_message +from .test_responses import ( + get_function_tool, + get_function_tool_call, + get_text_input_item, + get_text_message, +) @pytest.mark.asyncio @@ -42,3 +47,54 @@ async def test_run_preserves_duplicate_user_messages() -> None: assert len(sent_input) == 2 assert sent_input[0]["content"] == "repeat" assert sent_input[1]["content"] == "repeat" + + +@pytest.mark.asyncio +async def test_unknown_tool_default_raises_model_behavior_error() -> None: + """Default Agent still raises ModelBehaviorError when the model calls a missing tool.""" + model = FakeModel() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("does_not_exist", "")], + [get_text_message("unreachable")], + ] + ) + agent = Agent(name="test", model=model, tools=[get_function_tool("known", "ok")]) + + with pytest.raises(ModelBehaviorError, match="does_not_exist"): + await Runner.run(agent, input="hello") + + +@pytest.mark.asyncio +async def test_unknown_tool_respond_lets_run_continue() -> None: + """With unknown_tool_behavior='respond', the run continues and the model can recover.""" + model = FakeModel() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("does_not_exist", "")], + [get_text_message("recovered")], + ] + ) + agent = Agent( + name="test", + model=model, + tools=[get_function_tool("known", "ok")], + unknown_tool_behavior="respond", + ) + + result = await Runner.run(agent, input="hello") + + assert result.final_output == "recovered" + # The second model turn must have been fed the synthetic recovery tool output. + sent_input = model.last_turn_args["input"] + assert isinstance(sent_input, list) + function_call_outputs = [ + item + for item in sent_input + if isinstance(item, dict) and item.get("type") == "function_call_output" + ] + assert function_call_outputs, "expected a synthetic function_call_output for the unknown tool" + output_text = function_call_outputs[-1].get("output") + assert isinstance(output_text, str) + assert "does_not_exist" in output_text + assert "known" in output_text diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py index 8d83193185..44f7fca1f4 100644 --- a/tests/test_run_step_processing.py +++ b/tests/test_run_step_processing.py @@ -28,6 +28,7 @@ RunHooks, RunItem, ToolCallItem, + ToolCallOutputItem, Usage, handoff, ) @@ -135,6 +136,53 @@ async def test_missing_tool_call_raises_error(): await process_response(agent=agent, response=response) +@pytest.mark.asyncio +async def test_unknown_function_tool_respond_appends_recovery_output(): + """With unknown_tool_behavior='respond', an unknown function tool yields a tool output + describing the error and the run continues instead of raising.""" + agent = Agent( + name="test", + tools=[get_function_tool(name="known_tool")], + unknown_tool_behavior="respond", + ) + response = ModelResponse( + output=[get_function_tool_call("bogus_tool", "")], + usage=Usage(), + response_id=None, + ) + + result = await process_response(agent=agent, response=response) + + # No real function run scheduled; the loop should continue and let the LLM retry. + assert not result.functions + assert not result.handoffs + # The unknown tool name is still recorded in tools_used (added before the lookup). + assert "bogus_tool" in result.tools_used + # The new items should contain a ToolCallItem for the unknown call followed by a + # ToolCallOutputItem containing the recovery message that names available tools. + tool_calls = [item for item in result.new_items if isinstance(item, ToolCallItem)] + tool_outputs = [item for item in result.new_items if isinstance(item, ToolCallOutputItem)] + assert len(tool_calls) == 1 + assert len(tool_outputs) == 1 + message = tool_outputs[0].output + assert "bogus_tool" in message + assert "known_tool" in message + + +@pytest.mark.asyncio +async def test_unknown_function_tool_default_still_raises(): + """The default Agent behavior must continue to raise so existing users aren't broken.""" + agent = Agent(name="test", tools=[get_function_tool(name="known_tool")]) + response = ModelResponse( + output=[get_function_tool_call("bogus_tool", "")], + usage=Usage(), + response_id=None, + ) + + with pytest.raises(ModelBehaviorError, match="bogus_tool"): + await process_response(agent=agent, response=response) + + @pytest.mark.asyncio async def test_multiple_tool_calls(): agent = Agent(