feat: add conversation history to ToolContext

HuxleyHu98 · HuxleyHu98 · commit bd2a0037924c · 2026-03-21T13:05:54.000+08:00
diff --git a/src/agents/run_internal/tool_execution.py b/src/agents/run_internal/tool_execution.py
@@ -55,6 +55,7 @@
     RunItemBase,
     ToolApprovalItem,
     ToolCallOutputItem,
+    TResponseInputItem,
 )
 from ..logger import logger
 from ..model_settings import ModelSettings
@@ -1284,13 +1285,15 @@ def __init__(
         hooks: RunHooks[Any],
         context_wrapper: RunContextWrapper[Any],
         config: RunConfig,
+        conversation_history: list[TResponseInputItem] | None,
         isolate_parallel_failures: bool | None,
     ) -> None:
         self.agent = agent
         self.tool_runs = tool_runs
         self.hooks = hooks
         self.context_wrapper = context_wrapper
         self.config = config
+        self.conversation_history = list(conversation_history or [])
         self.isolate_parallel_failures = (
             len(tool_runs) > 1 if isolate_parallel_failures is None else isolate_parallel_failures
         )
@@ -1465,6 +1468,7 @@ async def _run_single_tool(
                 tool_namespace=tool_context_namespace,
                 agent=self.agent,
                 run_config=self.config,
+                conversation_history=self.conversation_history,
             )
             agent_hooks = self.agent.hooks
             if self.config.trace_include_sensitive_data:
@@ -1797,6 +1801,7 @@ async def execute_function_tool_calls(
     hooks: RunHooks[Any],
     context_wrapper: RunContextWrapper[Any],
     config: RunConfig,
+    conversation_history: list[TResponseInputItem] | None = None,
     isolate_parallel_failures: bool | None = None,
 ) -> tuple[
     list[FunctionToolResult], list[ToolInputGuardrailResult], list[ToolOutputGuardrailResult]
@@ -1808,6 +1813,7 @@ async def execute_function_tool_calls(
         hooks=hooks,
         context_wrapper=context_wrapper,
         config=config,
+        conversation_history=conversation_history,
         isolate_parallel_failures=isolate_parallel_failures,
     ).execute()
 
diff --git a/src/agents/run_internal/tool_planning.py b/src/agents/run_internal/tool_planning.py
@@ -20,6 +20,7 @@
     ToolApprovalItem,
     ToolCallItem,
     ToolCallOutputItem,
+    TResponseInputItem,
 )
 from ..run_context import RunContextWrapper
 from ..tool import FunctionTool, MCPToolApprovalRequest
@@ -522,6 +523,7 @@ async def _execute_tool_plan(
     hooks,
     context_wrapper: RunContextWrapper[Any],
     run_config,
+    conversation_history: list[TResponseInputItem] | None = None,
     parallel: bool = True,
 ) -> tuple[
     list[Any],
@@ -556,6 +558,7 @@ async def _execute_tool_plan(
                 hooks=hooks,
                 context_wrapper=context_wrapper,
                 config=run_config,
+                conversation_history=conversation_history,
                 isolate_parallel_failures=isolate_function_tool_failures,
             ),
             execute_computer_actions(
@@ -598,6 +601,7 @@ async def _execute_tool_plan(
             hooks=hooks,
             context_wrapper=context_wrapper,
             config=run_config,
+            conversation_history=conversation_history,
             isolate_parallel_failures=isolate_function_tool_failures,
         )
         computer_results = await execute_computer_actions(
diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py
@@ -42,7 +42,7 @@
 from ..agent import Agent, ToolsToFinalOutputResult
 from ..agent_output import AgentOutputSchemaBase
 from ..agent_tool_state import get_agent_tool_state_scope, peek_agent_tool_run_result
-from ..exceptions import ModelBehaviorError, UserError
+from ..exceptions import AgentsException, ModelBehaviorError, UserError
 from ..handoffs import Handoff, HandoffInputData, nest_handoff_history
 from ..items import (
     CompactionItem,
@@ -153,6 +153,24 @@
 ]
 
 
+def _build_function_tool_conversation_history(
+    original_input: str | list[TResponseInputItem],
+    pre_step_items: Sequence[RunItem],
+) -> list[TResponseInputItem]:
+    """Build the visible history snapshot for a local function tool invocation.
+
+    This intentionally includes only items that can be represented as model input.
+    Internal bookkeeping items such as approval placeholders are skipped.
+    """
+    history = ItemHelpers.input_to_new_input_list(original_input)
+    for item in pre_step_items:
+        try:
+            history.append(item.to_input_item())
+        except AgentsException:
+            continue
+    return history
+
+
 async def _maybe_finalize_from_tool_results(
     *,
     agent: Agent[TContext],
@@ -528,6 +546,11 @@ async def execute_tools_and_side_effects(
         new_items=processed_response.new_items,
     )
 
+    conversation_history = _build_function_tool_conversation_history(
+        original_input,
+        pre_step_items,
+    )
+
     (
         function_results,
         tool_input_guardrail_results,
@@ -542,6 +565,7 @@ async def execute_tools_and_side_effects(
         hooks=hooks,
         context_wrapper=context_wrapper,
         run_config=run_config,
+        conversation_history=conversation_history,
     )
     new_step_items.extend(
         _build_tool_result_items(
@@ -1103,6 +1127,11 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None:
         apply_patch_calls=approved_apply_patch_calls,
     )
 
+    conversation_history = _build_function_tool_conversation_history(
+        original_input,
+        original_pre_step_items,
+    )
+
     (
         function_results,
         tool_input_guardrail_results,
@@ -1117,6 +1146,7 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None:
         hooks=hooks,
         context_wrapper=context_wrapper,
         run_config=run_config,
+        conversation_history=conversation_history,
     )
 
     for interruption in _collect_tool_interruptions(
diff --git a/src/agents/tool_context.py b/src/agents/tool_context.py
@@ -57,6 +57,9 @@ class ToolContext(RunContextWrapper[TContext]):
     run_config: RunConfig | None = None
     """The active run config for this tool call, when available."""
 
+    conversation_history: list[TResponseInputItem] = field(default_factory=list)
+    """Visible conversation history snapshot available when this tool is invoked."""
+
     def __init__(
         self,
         context: TContext,
@@ -69,6 +72,7 @@ def __init__(
         tool_namespace: str | None = None,
         agent: AgentBase[Any] | None = None,
         run_config: RunConfig | None = None,
+        conversation_history: list[TResponseInputItem] | None = None,
         turn_input: list[TResponseInputItem] | None = None,
         _approvals: dict[str, _ApprovalRecord] | None = None,
         tool_input: Any | None = None,
@@ -103,6 +107,7 @@ def __init__(
         )
         self.agent = agent
         self.run_config = run_config
+        self.conversation_history = list(conversation_history or [])
 
     @property
     def qualified_tool_name(self) -> str:
@@ -119,6 +124,7 @@ def from_agent_context(
         *,
         tool_namespace: str | None = None,
         run_config: RunConfig | None = None,
+        conversation_history: list[TResponseInputItem] | None = None,
     ) -> ToolContext:
         """
         Create a ToolContext from a RunContextWrapper.
@@ -155,6 +161,7 @@ def from_agent_context(
             ),
             agent=tool_agent,
             run_config=tool_run_config,
+            conversation_history=conversation_history,
             **base_values,
         )
         set_agent_tool_state_scope(tool_context, get_agent_tool_state_scope(context))
diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py
@@ -813,6 +813,68 @@ def foo(context: ToolContext[Any]) -> str:
     assert captured_contexts[0].agent is agent
 
 
+@pytest.mark.asyncio
+async def test_tool_call_context_includes_conversation_history_snapshot() -> None:
+    model = FakeModel()
+    captured_contexts: list[ToolContext[Any]] = []
+
+    @function_tool(name_override="foo")
+    def foo(context: ToolContext[Any]) -> str:
+        captured_contexts.append(context)
+        return "tool_result"
+
+    agent = Agent(
+        name="test",
+        model=model,
+        tools=[foo],
+    )
+
+    model.add_multiple_turn_outputs(
+        [
+            [get_function_tool_call("foo", "{}")],
+            [get_text_message("done")],
+        ]
+    )
+
+    result = await Runner.run(agent, input="user_message")
+
+    assert result.final_output == "done"
+    assert len(captured_contexts) == 1
+    assert captured_contexts[0].conversation_history == [get_text_input_item("user_message")]
+
+
+@pytest.mark.asyncio
+async def test_tool_call_context_conversation_history_includes_prior_session_turns() -> None:
+    model = FakeModel()
+    captured_contexts: list[ToolContext[Any]] = []
+
+    @function_tool(name_override="foo")
+    def foo(context: ToolContext[Any]) -> str:
+        captured_contexts.append(context)
+        return "tool_result"
+
+    agent = Agent(name="test", model=model, tools=[foo])
+    session = SimpleListSession()
+
+    model.add_multiple_turn_outputs(
+        [
+            [get_text_message("first_done")],
+            [get_function_tool_call("foo", "{}")],
+            [get_text_message("second_done")],
+        ]
+    )
+
+    first_result = await Runner.run(agent, input="first_user", session=session)
+    second_result = await Runner.run(agent, input="second_user", session=session)
+
+    assert first_result.final_output == "first_done"
+    assert second_result.final_output == "second_done"
+    assert len(captured_contexts) == 1
+    history = captured_contexts[0].conversation_history
+    assert any(isinstance(item, dict) and item.get("content") == "first_user" for item in history)
+    assert any(isinstance(item, dict) and item.get("content") == "second_user" for item in history)
+
+
 @pytest.mark.asyncio
 async def test_handoffs():
     model = FakeModel()
diff --git a/tests/test_tool_context.py b/tests/test_tool_context.py
@@ -4,6 +4,7 @@
 from openai.types.responses import ResponseFunctionToolCall
 
 from agents import Agent
+from agents.items import TResponseInputItem
 from agents.run_config import RunConfig
 from agents.run_context import RunContextWrapper
 from agents.tool import FunctionTool, invoke_function_tool
@@ -51,6 +52,27 @@ def test_tool_context_from_agent_context_populates_fields() -> None:
     assert tool_ctx.agent is agent
 
 
+def test_tool_context_from_agent_context_copies_conversation_history() -> None:
+    tool_call = ResponseFunctionToolCall(
+        type="function_call",
+        name="test_tool",
+        call_id="call-history",
+        arguments="{}",
+    )
+    ctx = make_context_wrapper()
+    history: list[TResponseInputItem] = [{"role": "user", "content": "hello"}]
+
+    tool_ctx = ToolContext.from_agent_context(
+        ctx,
+        tool_call_id="call-history",
+        tool_call=tool_call,
+        conversation_history=history,
+    )
+
+    assert tool_ctx.conversation_history == history
+    assert tool_ctx.conversation_history is not history
+
+
 def test_tool_context_agent_none_by_default() -> None:
     tool_call = ResponseFunctionToolCall(
         type="function_call",