feat: add built-in tool support for OpenAI Responses API (#2011)

pgrayy · web-flow · commit de9b14989505 · 2026-03-31T11:10:45.000-04:00
diff --git a/src/strands/models/openai_responses.py b/src/strands/models/openai_responses.py
@@ -1,6 +1,19 @@
 """OpenAI model provider using the Responses API.
 
-Note: Built-in tools (web search, code interpreter, file search) are not yet supported.
+Built-in tools (e.g. web_search, file_search, code_interpreter) can be passed via the
+``params`` configuration and will be merged with any agent function tools in the request.
+
+All built-in tools produce text responses that stream correctly. Limitations on tool-specific
+metadata:
+
+- web_search (supported): Full support including URL citations.
+- file_search (partial): File citation annotations not emitted (no matching CitationLocation variant).
+- code_interpreter (partial): Executed code and stdout/stderr not surfaced.
+- mcp (partial): Approval flow and ``mcp_list_tools``/``mcp_call`` events not surfaced.
+- shell (partial): Local (client-executed) mode not supported.
+- tool_search (not supported): Requires ``defer_loading`` on function tools, which is not supported.
+- image_generation (not supported): Requires image content block delta support in the event loop.
+- computer_use_preview (not supported): Requires a developer-managed screenshot/action loop.
 
 Docs: https://platform.openai.com/docs/api-reference/responses
 """
@@ -40,6 +53,7 @@
 
 import openai  # noqa: E402 - must import after version check
 
+from ..types.citations import WebLocationDict  # noqa: E402
 from ..types.content import ContentBlock, Messages, Role  # noqa: E402
 from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException  # noqa: E402
 from ..types.streaming import StreamEvent  # noqa: E402
@@ -103,12 +117,7 @@ def responses(self) -> Any:
 
 
 class OpenAIResponsesModel(Model):
-    """OpenAI Responses API model provider implementation.
-
-    Note:
-        This implementation currently only supports function tools (custom tools defined via tool_specs).
-        OpenAI's built-in system tools are not yet supported.
-    """
+    """OpenAI Responses API model provider implementation."""
 
     client: Client
     client_args: dict[str, Any]
@@ -255,6 +264,22 @@ async def stream(
                                     {"chunk_type": "content_delta", "data_type": "text", "data": event.delta}
                                 )
 
+                        elif event.type == "response.output_text.annotation.added":
+                            if hasattr(event, "annotation"):
+                                if event.annotation.get("type") == "url_citation":
+                                    yield self._format_chunk(
+                                        {
+                                            "chunk_type": "content_delta",
+                                            "data_type": "citation",
+                                            "data": event.annotation,
+                                        }
+                                    )
+                                else:
+                                    logger.warning(
+                                        "annotation_type=<%s> | unsupported annotation type",
+                                        event.annotation.get("type"),
+                                    )
+
                         elif event.type == "response.output_item.added":
                             # Tool call started
                             if (
@@ -431,16 +456,16 @@ def _format_request(
 
         # Add tools if provided
         if tool_specs:
-            request["tools"] = [
+            # Merge with any built-in tools (e.g. web_search) already in the request from params
+            request.setdefault("tools", []).extend(
                 {
                     "type": "function",
                     "name": tool_spec["name"],
                     "description": tool_spec.get("description", ""),
                     "parameters": tool_spec["inputSchema"]["json"],
                 }
                 for tool_spec in tool_specs
-            ]
-            # Add tool_choice if provided
+            )
             request.update(self._format_request_tool_choice(tool_choice))
 
         return request
@@ -550,6 +575,11 @@ def _format_request_message_content(cls, content: ContentBlock, *, role: Role =
             text_type = "output_text" if role == "assistant" else "input_text"
             return {"type": text_type, "text": content["text"]}
 
+        if "citationsContent" in content:
+            text = "".join(c["text"] for c in content["citationsContent"].get("content", []) if "text" in c)
+            text_type = "output_text" if role == "assistant" else "input_text"
+            return {"type": text_type, "text": text}
+
         raise TypeError(f"content_type=<{next(iter(content))}> | unsupported type")
 
     @classmethod
@@ -680,6 +710,19 @@ def _format_chunk(self, event: dict[str, Any]) -> StreamEvent:
                 if event["data_type"] == "reasoning_content":
                     return {"contentBlockDelta": {"delta": {"reasoningContent": {"text": event["data"]}}}}
 
+                if event["data_type"] == "citation":
+                    web_location: WebLocationDict = {"web": {"url": event["data"].get("url", "")}}
+                    return {
+                        "contentBlockDelta": {
+                            "delta": {
+                                "citation": {
+                                    "title": event["data"].get("title", ""),
+                                    "location": web_location,
+                                }
+                            }
+                        }
+                    }
+
                 return {"contentBlockDelta": {"delta": {"text": event["data"]}}}
 
             case "content_stop":
diff --git a/tests/strands/models/test_openai_responses.py b/tests/strands/models/test_openai_responses.py
@@ -394,6 +394,19 @@ def test_format_request(model, messages, tool_specs, system_prompt):
             {"chunk_type": "content_delta", "data_type": "reasoning_content", "data": "I'm thinking"},
             {"contentBlockDelta": {"delta": {"reasoningContent": {"text": "I'm thinking"}}}},
         ),
+        # Content Delta - Citation
+        (
+            {
+                "chunk_type": "content_delta",
+                "data_type": "citation",
+                "data": {"type": "url_citation", "title": "Example", "url": "https://example.com"},
+            },
+            {
+                "contentBlockDelta": {
+                    "delta": {"citation": {"title": "Example", "location": {"web": {"url": "https://example.com"}}}}
+                }
+            },
+        ),
         # Content Delta - Text
         (
             {"chunk_type": "content_delta", "data_type": "text", "data": "hello"},
@@ -618,6 +631,74 @@ async def test_stream_reasoning_content(openai_client, model, agenerator, alist)
     assert len(content_stops) == 2
 
 
+@pytest.mark.asyncio
+async def test_stream_citation_annotations(openai_client, model, agenerator, alist):
+    """Test that web search citation annotations are streamed as CitationsDelta events."""
+    mock_text_event1 = unittest.mock.Mock(type="response.output_text.delta", delta="The answer is here. ")
+    mock_text_event2 = unittest.mock.Mock(type="response.output_text.delta", delta="(example.com)")
+    mock_annotation_event = unittest.mock.Mock(
+        type="response.output_text.annotation.added",
+        annotation={
+            "type": "url_citation",
+            "title": "Example Source",
+            "url": "https://example.com/article",
+        },
+    )
+    mock_complete_event = unittest.mock.Mock(
+        type="response.completed",
+        response=unittest.mock.Mock(usage=unittest.mock.Mock(input_tokens=10, output_tokens=5, total_tokens=15)),
+    )
+
+    openai_client.responses.create = unittest.mock.AsyncMock(
+        return_value=agenerator([mock_text_event1, mock_text_event2, mock_annotation_event, mock_complete_event])
+    )
+
+    messages = [{"role": "user", "content": [{"text": "search something"}]}]
+    tru_events = await alist(model.stream(messages))
+
+    citation_deltas = [
+        e for e in tru_events if "contentBlockDelta" in e and "citation" in e["contentBlockDelta"]["delta"]
+    ]
+    assert len(citation_deltas) == 1
+    assert citation_deltas[0] == {
+        "contentBlockDelta": {
+            "delta": {
+                "citation": {
+                    "title": "Example Source",
+                    "location": {"web": {"url": "https://example.com/article"}},
+                }
+            }
+        }
+    }
+
+
+@pytest.mark.asyncio
+async def test_stream_unsupported_annotation_type(openai_client, model, agenerator, alist, caplog):
+    """Test that unsupported annotation types log a warning and are not emitted."""
+    mock_text_event = unittest.mock.Mock(type="response.output_text.delta", delta="Some text")
+    mock_annotation_event = unittest.mock.Mock(
+        type="response.output_text.annotation.added",
+        annotation={"type": "file_citation", "file_id": "file-123", "filename": "doc.pdf"},
+    )
+    mock_complete_event = unittest.mock.Mock(
+        type="response.completed",
+        response=unittest.mock.Mock(usage=unittest.mock.Mock(input_tokens=10, output_tokens=5, total_tokens=15)),
+    )
+
+    openai_client.responses.create = unittest.mock.AsyncMock(
+        return_value=agenerator([mock_text_event, mock_annotation_event, mock_complete_event])
+    )
+
+    messages = [{"role": "user", "content": [{"text": "search files"}]}]
+    tru_events = await alist(model.stream(messages))
+
+    citation_deltas = [
+        e for e in tru_events if "contentBlockDelta" in e and "citation" in e["contentBlockDelta"]["delta"]
+    ]
+    assert len(citation_deltas) == 0
+    assert "annotation_type=<file_citation> | unsupported annotation type" in caplog.text
+
+
 @pytest.mark.asyncio
 async def test_structured_output(openai_client, model, test_output_model_cls, alist):
     messages = [{"role": "user", "content": [{"text": "Generate a person"}]}]
@@ -886,6 +967,71 @@ def test_format_request_with_tool_choice(model, messages, tool_specs):
     assert request["tool_choice"] == {"type": "function", "name": "test_tool"}
 
 
+def test_format_request_merges_builtin_tools_with_function_tools(messages, tool_specs):
+    """Test that built-in tools from params are merged with function tools."""
+    model = OpenAIResponsesModel(
+        model_id="gpt-4o",
+        params={"tools": [{"type": "web_search"}]},
+    )
+    request = model._format_request(messages, tool_specs)
+
+    assert request["tools"] == [
+        {"type": "web_search"},
+        {
+            "type": "function",
+            "name": "test_tool",
+            "description": "A test tool",
+            "parameters": {
+                "type": "object",
+                "properties": {"input": {"type": "string"}},
+                "required": ["input"],
+            },
+        },
+    ]
+
+
+def test_format_request_builtin_tools_without_function_tools(messages):
+    """Test that built-in tools from params are preserved when no function tools are provided."""
+    model = OpenAIResponsesModel(
+        model_id="gpt-4o",
+        params={"tools": [{"type": "web_search"}]},
+    )
+    request = model._format_request(messages)
+
+    assert request["tools"] == [{"type": "web_search"}]
+
+
+def test_format_request_messages_with_citations_content():
+    """Test that citationsContent blocks are converted to text in the request."""
+    messages = [
+        {"role": "user", "content": [{"text": "search something"}]},
+        {
+            "role": "assistant",
+            "content": [
+                {
+                    "citationsContent": {
+                        "citations": [
+                            {
+                                "title": "Example",
+                                "location": {"web": {"url": "https://example.com", "domain": "example.com"}},
+                                "sourceContent": [{"text": "cited text"}],
+                            }
+                        ],
+                        "content": [{"text": "The answer with citations."}],
+                    }
+                }
+            ],
+        },
+    ]
+    formatted = OpenAIResponsesModel._format_request_messages(messages)
+
+    assistant_msg = [m for m in formatted if m.get("role") == "assistant"][0]
+    assert assistant_msg == {
+        "role": "assistant",
+        "content": [{"type": "output_text", "text": "The answer with citations."}],
+    }
+
+
 def test_format_request_message_content_image_size_limit():
     """Test that oversized images raise ValueError."""
     oversized_data = b"x" * (_MAX_MEDIA_SIZE_BYTES + 1)
diff --git a/tests_integ/models/test_model_openai.py b/tests_integ/models/test_model_openai.py
@@ -1,5 +1,8 @@
 import os
+import tempfile
+import time
 
+import openai as openai_sdk
 import pydantic
 import pytest
 
@@ -80,6 +83,31 @@ def lower(_, value):
     return Color(name="yellow")
 
 
+@pytest.fixture(scope="module")
+def openai_vector_store():
+    """Create a vector store with a test file for file_search tests."""
+    client = openai_sdk.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt") as f:
+        f.write("The secret code is ALPHA-7742.")
+        f.flush()
+        file_obj = client.files.create(file=open(f.name, "rb"), purpose="assistants")
+
+    vector_store = client.vector_stores.create(name="test-builtin-tools")
+    try:
+        client.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_obj.id)
+
+        for _ in range(30):
+            if client.vector_stores.retrieve(vector_store.id).file_counts.completed > 0:
+                break
+            time.sleep(1)
+
+        yield vector_store.id
+    finally:
+        client.vector_stores.delete(vector_store.id)
+        client.files.delete(file_obj.id)
+
+
 @pytest.fixture(scope="module")
 def test_image_path(request):
     return request.config.rootpath / "tests_integ" / "test_image.png"
@@ -308,3 +336,67 @@ def test_responses_server_side_conversation():
 
     result = agent("What is my name?")
     assert "alice" in result.message["content"][0]["text"].lower()
+
+
+@pytest.mark.skipif(not _openai_responses_available, reason="OpenAI Responses API not available")
+def test_responses_builtin_tool_web_search():
+    """Test that web_search produces text with citation content."""
+    model = OpenAIResponsesModel(
+        model_id="gpt-4o",
+        params={"tools": [{"type": "web_search"}]},
+        client_args={"api_key": os.getenv("OPENAI_API_KEY")},
+    )
+    agent = Agent(model=model, system_prompt="Answer concisely.", callback_handler=None)
+
+    result = agent("Search https://strandsagents.com/ and tell me what Strands Agents is.")
+    content = result.message["content"][0]
+
+    assert "citationsContent" in content
+    citations = content["citationsContent"]["citations"]
+    assert any("strandsagents.com" in c["location"]["web"]["url"] for c in citations)
+
+
+@pytest.mark.skipif(not _openai_responses_available, reason="OpenAI Responses API not available")
+def test_responses_builtin_tool_file_search(openai_vector_store):
+    """Test that file_search produces text output from uploaded files."""
+    model = OpenAIResponsesModel(
+        model_id="gpt-4o",
+        params={"tools": [{"type": "file_search", "vector_store_ids": [openai_vector_store]}]},
+        client_args={"api_key": os.getenv("OPENAI_API_KEY")},
+    )
+    agent = Agent(model=model, system_prompt="Answer based on the files.", callback_handler=None)
+
+    result = agent("What is the secret code?")
+    text = result.message["content"][0]["text"]
+    assert "ALPHA-7742" in text
+
+
+@pytest.mark.skipif(not _openai_responses_available, reason="OpenAI Responses API not available")
+def test_responses_builtin_tool_code_interpreter():
+    """Test that code_interpreter produces correct results via text output."""
+    model = OpenAIResponsesModel(
+        model_id="gpt-4o",
+        params={"tools": [{"type": "code_interpreter", "container": {"type": "auto"}}]},
+        client_args={"api_key": os.getenv("OPENAI_API_KEY")},
+    )
+    agent = Agent(model=model, system_prompt="Answer concisely.", callback_handler=None)
+
+    # SHA-256 of "strands" requires actual computation
+    result = agent("Compute the SHA-256 hash of the string 'strands'. Return only the hex digest.")
+    text = result.message["content"][0]["text"]
+    assert "11e0e34bd35e12185cfacd5e5a256ab4292bfa3616d8d5b74e20eca36feed228" in text
+
+
+@pytest.mark.skipif(not _openai_responses_available, reason="OpenAI Responses API not available")
+def test_responses_builtin_tool_shell():
+    """Test that the shell built-in tool executes commands in a hosted container."""
+    model = OpenAIResponsesModel(
+        model_id="gpt-5.4-mini",
+        params={"tools": [{"type": "shell", "environment": {"type": "container_auto"}}]},
+        client_args={"api_key": os.getenv("OPENAI_API_KEY")},
+    )
+    agent = Agent(model=model, system_prompt="Answer concisely.", callback_handler=None)
+
+    result = agent("Use the shell to compute the md5sum of the string 'strands-test'. Return only the hash.")
+    text = result.message["content"][0]["text"]
+    assert "d82f373f079b00a1db7ef1eec7f15c68" in text