feat(agent): recover from unknown tool calls via opt-in handler

adityasingh2400 · adityasingh2400 · commit c62ff7f1b6bc · 2026-05-13T20:20:36.000-07:00
When the LLM hallucinates a tool name not registered on the agent, turn_resolution previously raised ModelBehaviorError and crashed the entire run. Add an opt-in Agent.unknown_tool_behavior field with "raise" (default, preserves existing behavior) and "respond" (append a synthetic tool-call output naming the available tools and let the run continue so the model can recover). Refs #325.
diff --git a/docs/agents.md b/docs/agents.md
@@ -43,6 +43,7 @@ The most common properties of an agent are:
 | `hooks` | no | Agent-scoped lifecycle callbacks. See [Lifecycle events (hooks)](#lifecycle-events-hooks). |
 | `tool_use_behavior` | no | Control whether tool results loop back to the model or end the run. See [Tool use behavior](#tool-use-behavior). |
 | `reset_tool_choice` | no | Reset `tool_choice` after a tool call (default: `True`) to avoid tool-use loops. See [Forcing tool use](#forcing-tool-use). |
+| `unknown_tool_behavior` | no | What to do when the model calls a tool that is not registered (default: `"raise"`). Set to `"respond"` to feed an error tool output back to the LLM and let the run continue. See [Recovering from unknown tool calls](#recovering-from-unknown-tool-calls). |
 
 ```python
 from agents import Agent, ModelSettings, function_tool
@@ -423,3 +424,18 @@ agent = Agent(
 !!! note
 
     To prevent infinite loops, the framework automatically resets `tool_choice` to "auto" after a tool call. This behavior is configurable via [`agent.reset_tool_choice`][agents.agent.Agent.reset_tool_choice]. The infinite loop is because tool results are sent to the LLM, which then generates another tool call because of `tool_choice`, ad infinitum.
+
+## Recovering from unknown tool calls
+
+By default, the SDK raises [`ModelBehaviorError`][agents.exceptions.ModelBehaviorError] if the model hallucinates a tool that the agent does not expose. This is the safest behavior for development, but it can crash a long-running agent run when the model occasionally invents tool names.
+
+Set `unknown_tool_behavior="respond"` on the agent to recover instead. When the model calls an unknown tool, the SDK appends a synthetic tool output describing the error and the list of available tools, and lets the agent continue. The LLM sees the error on the next turn and can pick a real tool.
+
+```python
+agent = Agent(
+    name="Weather Agent",
+    instructions="Retrieve weather details.",
+    tools=[get_weather],
+    unknown_tool_behavior="respond",
+)
+```
diff --git a/src/agents/agent.py b/src/agents/agent.py
@@ -368,6 +368,15 @@ class Agent(AgentBase, Generic[TContext]):
     """Whether to reset the tool choice to the default value after a tool has been called. Defaults
     to True. This ensures that the agent doesn't enter an infinite loop of tool usage."""
 
+    unknown_tool_behavior: Literal["raise", "respond"] = "raise"
+    """Controls what happens when the model invokes a tool the agent does not expose.
+
+    - ``"raise"`` (default): A `ModelBehaviorError` is raised, matching prior behavior.
+    - ``"respond"``: A synthetic tool output is appended describing the error along with the list
+      of currently available tool names, and the agent continues running so the LLM can recover
+      on the next turn instead of crashing the run.
+    """
+
     def __post_init__(self):
         from typing import get_origin
 
@@ -484,6 +493,12 @@ def __post_init__(self):
                 f"got {type(self.reset_tool_choice).__name__}"
             )
 
+        if self.unknown_tool_behavior not in ("raise", "respond"):
+            raise TypeError(
+                f"Agent unknown_tool_behavior must be 'raise' or 'respond', "
+                f"got {self.unknown_tool_behavior!r}"
+            )
+
     def clone(self, **kwargs: Any) -> Agent[TContext]:
         """Make a copy of the agent, with the given arguments changed.
         Notes:
diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py
@@ -1468,6 +1468,81 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None:
     )
 
 
+def _available_tool_names_for_recovery(all_tools: list[Tool]) -> list[str]:
+    """Collect tool names suitable for inclusion in an unknown-tool recovery message."""
+    seen: set[str] = set()
+    names: list[str] = []
+    for tool in all_tools:
+        name = getattr(tool, "name", None)
+        if not isinstance(name, str) or not name or name in seen:
+            continue
+        seen.add(name)
+        names.append(name)
+    return names
+
+
+def _build_unknown_tool_recovery_message(
+    tool_name: str,
+    agent_name: str,
+    all_tools: list[Tool],
+) -> str:
+    """Build the synthetic tool output sent back to the model after an unknown tool call."""
+    available = _available_tool_names_for_recovery(all_tools)
+    if available:
+        return (
+            f"Tool '{tool_name}' is not available on agent '{agent_name}'. "
+            f"Available tools: {', '.join(available)}."
+        )
+    return (
+        f"Tool '{tool_name}' is not available on agent '{agent_name}'. "
+        "No tools are currently available."
+    )
+
+
+def _append_unknown_function_tool_recovery(
+    *,
+    agent: Agent[Any],
+    tool_call: ResponseFunctionToolCall,
+    items: list[RunItem],
+    all_tools: list[Tool],
+    display_name: str,
+) -> None:
+    """Emit a synthetic function-call output so the LLM can retry instead of crashing."""
+    message = _build_unknown_tool_recovery_message(display_name, agent.name, all_tools)
+    items.append(ToolCallItem(raw_item=tool_call, agent=agent))
+    items.append(
+        ToolCallOutputItem(
+            output=message,
+            raw_item=ItemHelpers.tool_call_output_item(tool_call, message),
+            agent=agent,
+        )
+    )
+
+
+def _append_unknown_custom_tool_recovery(
+    *,
+    agent: Agent[Any],
+    tool_call: ResponseCustomToolCall,
+    items: list[RunItem],
+    all_tools: list[Tool],
+) -> None:
+    """Emit a synthetic custom_tool output so the LLM can retry instead of crashing."""
+    message = _build_unknown_tool_recovery_message(tool_call.name, agent.name, all_tools)
+    items.append(ToolCallItem(raw_item=cast(Any, tool_call), agent=agent))
+    output_raw: dict[str, Any] = {
+        "type": "custom_tool_call_output",
+        "call_id": tool_call.call_id,
+        "output": message,
+    }
+    items.append(
+        ToolCallOutputItem(
+            output=message,
+            raw_item=cast(Any, output_raw),
+            agent=agent,
+        )
+    )
+
+
 def process_model_response(
     *,
     agent: Agent[Any],
@@ -1791,13 +1866,22 @@ def _dump_output_item(raw_item: Any) -> dict[str, Any]:
                         "Model produced apply_patch call without an apply_patch tool."
                     )
             else:
-                items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent))
                 _error_tracing.attach_error_to_current_span(
                     SpanError(
                         message="Custom tool not found",
                         data={"tool_name": output.name},
                     )
                 )
+                if agent.unknown_tool_behavior == "respond":
+                    tools_used.append(output.name)
+                    _append_unknown_custom_tool_recovery(
+                        agent=agent,
+                        tool_call=output,
+                        items=items,
+                        all_tools=all_tools,
+                    )
+                    continue
+                items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent))
                 raise ModelBehaviorError(f"Tool {output.name} not found in agent {agent.name}")
         elif (
             isinstance(output, ResponseFunctionToolCall)
@@ -1873,9 +1957,17 @@ def _dump_output_item(raw_item: Any) -> dict[str, Any]:
                         data={"tool_name": qualified_output_name or output.name},
                     )
                 )
-                error = (
-                    f"Tool {qualified_output_name or output.name} not found in agent {agent.name}"
-                )
+                display_name = qualified_output_name or output.name
+                if agent.unknown_tool_behavior == "respond":
+                    _append_unknown_function_tool_recovery(
+                        agent=agent,
+                        tool_call=output,
+                        items=items,
+                        all_tools=all_tools,
+                        display_name=display_name,
+                    )
+                    continue
+                error = f"Tool {display_name} not found in agent {agent.name}"
                 raise ModelBehaviorError(error)
 
             items.append(
diff --git a/tests/test_run.py b/tests/test_run.py
@@ -4,11 +4,16 @@
 
 import pytest
 
-from agents import Agent, Runner
+from agents import Agent, ModelBehaviorError, Runner
 from agents.run import AgentRunner, set_default_agent_runner
 
 from .fake_model import FakeModel
-from .test_responses import get_text_input_item, get_text_message
+from .test_responses import (
+    get_function_tool,
+    get_function_tool_call,
+    get_text_input_item,
+    get_text_message,
+)
 
 
 @pytest.mark.asyncio
@@ -42,3 +47,54 @@ async def test_run_preserves_duplicate_user_messages() -> None:
     assert len(sent_input) == 2
     assert sent_input[0]["content"] == "repeat"
     assert sent_input[1]["content"] == "repeat"
+
+
+@pytest.mark.asyncio
+async def test_unknown_tool_default_raises_model_behavior_error() -> None:
+    """Default Agent still raises ModelBehaviorError when the model calls a missing tool."""
+    model = FakeModel()
+    model.add_multiple_turn_outputs(
+        [
+            [get_function_tool_call("does_not_exist", "")],
+            [get_text_message("unreachable")],
+        ]
+    )
+    agent = Agent(name="test", model=model, tools=[get_function_tool("known", "ok")])
+
+    with pytest.raises(ModelBehaviorError, match="does_not_exist"):
+        await Runner.run(agent, input="hello")
+
+
+@pytest.mark.asyncio
+async def test_unknown_tool_respond_lets_run_continue() -> None:
+    """With unknown_tool_behavior='respond', the run continues and the model can recover."""
+    model = FakeModel()
+    model.add_multiple_turn_outputs(
+        [
+            [get_function_tool_call("does_not_exist", "")],
+            [get_text_message("recovered")],
+        ]
+    )
+    agent = Agent(
+        name="test",
+        model=model,
+        tools=[get_function_tool("known", "ok")],
+        unknown_tool_behavior="respond",
+    )
+
+    result = await Runner.run(agent, input="hello")
+
+    assert result.final_output == "recovered"
+    # The second model turn must have been fed the synthetic recovery tool output.
+    sent_input = model.last_turn_args["input"]
+    assert isinstance(sent_input, list)
+    function_call_outputs = [
+        item
+        for item in sent_input
+        if isinstance(item, dict) and item.get("type") == "function_call_output"
+    ]
+    assert function_call_outputs, "expected a synthetic function_call_output for the unknown tool"
+    output_text = function_call_outputs[-1].get("output")
+    assert isinstance(output_text, str)
+    assert "does_not_exist" in output_text
+    assert "known" in output_text
diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py
@@ -28,6 +28,7 @@
     RunHooks,
     RunItem,
     ToolCallItem,
+    ToolCallOutputItem,
     Usage,
     handoff,
 )
@@ -135,6 +136,53 @@ async def test_missing_tool_call_raises_error():
         await process_response(agent=agent, response=response)
 
 
+@pytest.mark.asyncio
+async def test_unknown_function_tool_respond_appends_recovery_output():
+    """With unknown_tool_behavior='respond', an unknown function tool yields a tool output
+    describing the error and the run continues instead of raising."""
+    agent = Agent(
+        name="test",
+        tools=[get_function_tool(name="known_tool")],
+        unknown_tool_behavior="respond",
+    )
+    response = ModelResponse(
+        output=[get_function_tool_call("bogus_tool", "")],
+        usage=Usage(),
+        response_id=None,
+    )
+
+    result = await process_response(agent=agent, response=response)
+
+    # No real function run scheduled; the loop should continue and let the LLM retry.
+    assert not result.functions
+    assert not result.handoffs
+    # The unknown tool name is still recorded in tools_used (added before the lookup).
+    assert "bogus_tool" in result.tools_used
+    # The new items should contain a ToolCallItem for the unknown call followed by a
+    # ToolCallOutputItem containing the recovery message that names available tools.
+    tool_calls = [item for item in result.new_items if isinstance(item, ToolCallItem)]
+    tool_outputs = [item for item in result.new_items if isinstance(item, ToolCallOutputItem)]
+    assert len(tool_calls) == 1
+    assert len(tool_outputs) == 1
+    message = tool_outputs[0].output
+    assert "bogus_tool" in message
+    assert "known_tool" in message
+
+
+@pytest.mark.asyncio
+async def test_unknown_function_tool_default_still_raises():
+    """The default Agent behavior must continue to raise so existing users aren't broken."""
+    agent = Agent(name="test", tools=[get_function_tool(name="known_tool")])
+    response = ModelResponse(
+        output=[get_function_tool_call("bogus_tool", "")],
+        usage=Usage(),
+        response_id=None,
+    )
+
+    with pytest.raises(ModelBehaviorError, match="bogus_tool"):
+        await process_response(agent=agent, response=response)
+
+
 @pytest.mark.asyncio
 async def test_multiple_tool_calls():
     agent = Agent(