Skip to content

Commit de9b149

Browse files
authored
feat: add built-in tool support for OpenAI Responses API (#2011)
1 parent ae19308 commit de9b149

File tree

3 files changed

+291
-10
lines changed

3 files changed

+291
-10
lines changed

src/strands/models/openai_responses.py

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
"""OpenAI model provider using the Responses API.
22
3-
Note: Built-in tools (web search, code interpreter, file search) are not yet supported.
3+
Built-in tools (e.g. web_search, file_search, code_interpreter) can be passed via the
4+
``params`` configuration and will be merged with any agent function tools in the request.
5+
6+
All built-in tools produce text responses that stream correctly. Limitations on tool-specific
7+
metadata:
8+
9+
- web_search (supported): Full support including URL citations.
10+
- file_search (partial): File citation annotations not emitted (no matching CitationLocation variant).
11+
- code_interpreter (partial): Executed code and stdout/stderr not surfaced.
12+
- mcp (partial): Approval flow and ``mcp_list_tools``/``mcp_call`` events not surfaced.
13+
- shell (partial): Local (client-executed) mode not supported.
14+
- tool_search (not supported): Requires ``defer_loading`` on function tools, which is not supported.
15+
- image_generation (not supported): Requires image content block delta support in the event loop.
16+
- computer_use_preview (not supported): Requires a developer-managed screenshot/action loop.
417
518
Docs: https://platform.openai.com/docs/api-reference/responses
619
"""
@@ -40,6 +53,7 @@
4053

4154
import openai # noqa: E402 - must import after version check
4255

56+
from ..types.citations import WebLocationDict # noqa: E402
4357
from ..types.content import ContentBlock, Messages, Role # noqa: E402
4458
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException # noqa: E402
4559
from ..types.streaming import StreamEvent # noqa: E402
@@ -103,12 +117,7 @@ def responses(self) -> Any:
103117

104118

105119
class OpenAIResponsesModel(Model):
106-
"""OpenAI Responses API model provider implementation.
107-
108-
Note:
109-
This implementation currently only supports function tools (custom tools defined via tool_specs).
110-
OpenAI's built-in system tools are not yet supported.
111-
"""
120+
"""OpenAI Responses API model provider implementation."""
112121

113122
client: Client
114123
client_args: dict[str, Any]
@@ -255,6 +264,22 @@ async def stream(
255264
{"chunk_type": "content_delta", "data_type": "text", "data": event.delta}
256265
)
257266

267+
elif event.type == "response.output_text.annotation.added":
268+
if hasattr(event, "annotation"):
269+
if event.annotation.get("type") == "url_citation":
270+
yield self._format_chunk(
271+
{
272+
"chunk_type": "content_delta",
273+
"data_type": "citation",
274+
"data": event.annotation,
275+
}
276+
)
277+
else:
278+
logger.warning(
279+
"annotation_type=<%s> | unsupported annotation type",
280+
event.annotation.get("type"),
281+
)
282+
258283
elif event.type == "response.output_item.added":
259284
# Tool call started
260285
if (
@@ -431,16 +456,16 @@ def _format_request(
431456

432457
# Add tools if provided
433458
if tool_specs:
434-
request["tools"] = [
459+
# Merge with any built-in tools (e.g. web_search) already in the request from params
460+
request.setdefault("tools", []).extend(
435461
{
436462
"type": "function",
437463
"name": tool_spec["name"],
438464
"description": tool_spec.get("description", ""),
439465
"parameters": tool_spec["inputSchema"]["json"],
440466
}
441467
for tool_spec in tool_specs
442-
]
443-
# Add tool_choice if provided
468+
)
444469
request.update(self._format_request_tool_choice(tool_choice))
445470

446471
return request
@@ -550,6 +575,11 @@ def _format_request_message_content(cls, content: ContentBlock, *, role: Role =
550575
text_type = "output_text" if role == "assistant" else "input_text"
551576
return {"type": text_type, "text": content["text"]}
552577

578+
if "citationsContent" in content:
579+
text = "".join(c["text"] for c in content["citationsContent"].get("content", []) if "text" in c)
580+
text_type = "output_text" if role == "assistant" else "input_text"
581+
return {"type": text_type, "text": text}
582+
553583
raise TypeError(f"content_type=<{next(iter(content))}> | unsupported type")
554584

555585
@classmethod
@@ -680,6 +710,19 @@ def _format_chunk(self, event: dict[str, Any]) -> StreamEvent:
680710
if event["data_type"] == "reasoning_content":
681711
return {"contentBlockDelta": {"delta": {"reasoningContent": {"text": event["data"]}}}}
682712

713+
if event["data_type"] == "citation":
714+
web_location: WebLocationDict = {"web": {"url": event["data"].get("url", "")}}
715+
return {
716+
"contentBlockDelta": {
717+
"delta": {
718+
"citation": {
719+
"title": event["data"].get("title", ""),
720+
"location": web_location,
721+
}
722+
}
723+
}
724+
}
725+
683726
return {"contentBlockDelta": {"delta": {"text": event["data"]}}}
684727

685728
case "content_stop":

tests/strands/models/test_openai_responses.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,19 @@ def test_format_request(model, messages, tool_specs, system_prompt):
394394
{"chunk_type": "content_delta", "data_type": "reasoning_content", "data": "I'm thinking"},
395395
{"contentBlockDelta": {"delta": {"reasoningContent": {"text": "I'm thinking"}}}},
396396
),
397+
# Content Delta - Citation
398+
(
399+
{
400+
"chunk_type": "content_delta",
401+
"data_type": "citation",
402+
"data": {"type": "url_citation", "title": "Example", "url": "https://example.com"},
403+
},
404+
{
405+
"contentBlockDelta": {
406+
"delta": {"citation": {"title": "Example", "location": {"web": {"url": "https://example.com"}}}}
407+
}
408+
},
409+
),
397410
# Content Delta - Text
398411
(
399412
{"chunk_type": "content_delta", "data_type": "text", "data": "hello"},
@@ -618,6 +631,74 @@ async def test_stream_reasoning_content(openai_client, model, agenerator, alist)
618631
assert len(content_stops) == 2
619632

620633

634+
@pytest.mark.asyncio
635+
async def test_stream_citation_annotations(openai_client, model, agenerator, alist):
636+
"""Test that web search citation annotations are streamed as CitationsDelta events."""
637+
mock_text_event1 = unittest.mock.Mock(type="response.output_text.delta", delta="The answer is here. ")
638+
mock_text_event2 = unittest.mock.Mock(type="response.output_text.delta", delta="(example.com)")
639+
mock_annotation_event = unittest.mock.Mock(
640+
type="response.output_text.annotation.added",
641+
annotation={
642+
"type": "url_citation",
643+
"title": "Example Source",
644+
"url": "https://example.com/article",
645+
},
646+
)
647+
mock_complete_event = unittest.mock.Mock(
648+
type="response.completed",
649+
response=unittest.mock.Mock(usage=unittest.mock.Mock(input_tokens=10, output_tokens=5, total_tokens=15)),
650+
)
651+
652+
openai_client.responses.create = unittest.mock.AsyncMock(
653+
return_value=agenerator([mock_text_event1, mock_text_event2, mock_annotation_event, mock_complete_event])
654+
)
655+
656+
messages = [{"role": "user", "content": [{"text": "search something"}]}]
657+
tru_events = await alist(model.stream(messages))
658+
659+
citation_deltas = [
660+
e for e in tru_events if "contentBlockDelta" in e and "citation" in e["contentBlockDelta"]["delta"]
661+
]
662+
assert len(citation_deltas) == 1
663+
assert citation_deltas[0] == {
664+
"contentBlockDelta": {
665+
"delta": {
666+
"citation": {
667+
"title": "Example Source",
668+
"location": {"web": {"url": "https://example.com/article"}},
669+
}
670+
}
671+
}
672+
}
673+
674+
675+
@pytest.mark.asyncio
676+
async def test_stream_unsupported_annotation_type(openai_client, model, agenerator, alist, caplog):
677+
"""Test that unsupported annotation types log a warning and are not emitted."""
678+
mock_text_event = unittest.mock.Mock(type="response.output_text.delta", delta="Some text")
679+
mock_annotation_event = unittest.mock.Mock(
680+
type="response.output_text.annotation.added",
681+
annotation={"type": "file_citation", "file_id": "file-123", "filename": "doc.pdf"},
682+
)
683+
mock_complete_event = unittest.mock.Mock(
684+
type="response.completed",
685+
response=unittest.mock.Mock(usage=unittest.mock.Mock(input_tokens=10, output_tokens=5, total_tokens=15)),
686+
)
687+
688+
openai_client.responses.create = unittest.mock.AsyncMock(
689+
return_value=agenerator([mock_text_event, mock_annotation_event, mock_complete_event])
690+
)
691+
692+
messages = [{"role": "user", "content": [{"text": "search files"}]}]
693+
tru_events = await alist(model.stream(messages))
694+
695+
citation_deltas = [
696+
e for e in tru_events if "contentBlockDelta" in e and "citation" in e["contentBlockDelta"]["delta"]
697+
]
698+
assert len(citation_deltas) == 0
699+
assert "annotation_type=<file_citation> | unsupported annotation type" in caplog.text
700+
701+
621702
@pytest.mark.asyncio
622703
async def test_structured_output(openai_client, model, test_output_model_cls, alist):
623704
messages = [{"role": "user", "content": [{"text": "Generate a person"}]}]
@@ -886,6 +967,71 @@ def test_format_request_with_tool_choice(model, messages, tool_specs):
886967
assert request["tool_choice"] == {"type": "function", "name": "test_tool"}
887968

888969

970+
def test_format_request_merges_builtin_tools_with_function_tools(messages, tool_specs):
971+
"""Test that built-in tools from params are merged with function tools."""
972+
model = OpenAIResponsesModel(
973+
model_id="gpt-4o",
974+
params={"tools": [{"type": "web_search"}]},
975+
)
976+
request = model._format_request(messages, tool_specs)
977+
978+
assert request["tools"] == [
979+
{"type": "web_search"},
980+
{
981+
"type": "function",
982+
"name": "test_tool",
983+
"description": "A test tool",
984+
"parameters": {
985+
"type": "object",
986+
"properties": {"input": {"type": "string"}},
987+
"required": ["input"],
988+
},
989+
},
990+
]
991+
992+
993+
def test_format_request_builtin_tools_without_function_tools(messages):
994+
"""Test that built-in tools from params are preserved when no function tools are provided."""
995+
model = OpenAIResponsesModel(
996+
model_id="gpt-4o",
997+
params={"tools": [{"type": "web_search"}]},
998+
)
999+
request = model._format_request(messages)
1000+
1001+
assert request["tools"] == [{"type": "web_search"}]
1002+
1003+
1004+
def test_format_request_messages_with_citations_content():
1005+
"""Test that citationsContent blocks are converted to text in the request."""
1006+
messages = [
1007+
{"role": "user", "content": [{"text": "search something"}]},
1008+
{
1009+
"role": "assistant",
1010+
"content": [
1011+
{
1012+
"citationsContent": {
1013+
"citations": [
1014+
{
1015+
"title": "Example",
1016+
"location": {"web": {"url": "https://example.com", "domain": "example.com"}},
1017+
"sourceContent": [{"text": "cited text"}],
1018+
}
1019+
],
1020+
"content": [{"text": "The answer with citations."}],
1021+
}
1022+
}
1023+
],
1024+
},
1025+
]
1026+
formatted = OpenAIResponsesModel._format_request_messages(messages)
1027+
1028+
assistant_msg = [m for m in formatted if m.get("role") == "assistant"][0]
1029+
assert assistant_msg == {
1030+
"role": "assistant",
1031+
"content": [{"type": "output_text", "text": "The answer with citations."}],
1032+
}
1033+
1034+
8891035
def test_format_request_message_content_image_size_limit():
8901036
"""Test that oversized images raise ValueError."""
8911037
oversized_data = b"x" * (_MAX_MEDIA_SIZE_BYTES + 1)

tests_integ/models/test_model_openai.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import os
2+
import tempfile
3+
import time
24

5+
import openai as openai_sdk
36
import pydantic
47
import pytest
58

@@ -80,6 +83,31 @@ def lower(_, value):
8083
return Color(name="yellow")
8184

8285

86+
@pytest.fixture(scope="module")
87+
def openai_vector_store():
88+
"""Create a vector store with a test file for file_search tests."""
89+
client = openai_sdk.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
90+
91+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt") as f:
92+
f.write("The secret code is ALPHA-7742.")
93+
f.flush()
94+
file_obj = client.files.create(file=open(f.name, "rb"), purpose="assistants")
95+
96+
vector_store = client.vector_stores.create(name="test-builtin-tools")
97+
try:
98+
client.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_obj.id)
99+
100+
for _ in range(30):
101+
if client.vector_stores.retrieve(vector_store.id).file_counts.completed > 0:
102+
break
103+
time.sleep(1)
104+
105+
yield vector_store.id
106+
finally:
107+
client.vector_stores.delete(vector_store.id)
108+
client.files.delete(file_obj.id)
109+
110+
83111
@pytest.fixture(scope="module")
84112
def test_image_path(request):
85113
return request.config.rootpath / "tests_integ" / "test_image.png"
@@ -308,3 +336,67 @@ def test_responses_server_side_conversation():
308336

309337
result = agent("What is my name?")
310338
assert "alice" in result.message["content"][0]["text"].lower()
339+
340+
341+
@pytest.mark.skipif(not _openai_responses_available, reason="OpenAI Responses API not available")
342+
def test_responses_builtin_tool_web_search():
343+
"""Test that web_search produces text with citation content."""
344+
model = OpenAIResponsesModel(
345+
model_id="gpt-4o",
346+
params={"tools": [{"type": "web_search"}]},
347+
client_args={"api_key": os.getenv("OPENAI_API_KEY")},
348+
)
349+
agent = Agent(model=model, system_prompt="Answer concisely.", callback_handler=None)
350+
351+
result = agent("Search https://strandsagents.com/ and tell me what Strands Agents is.")
352+
content = result.message["content"][0]
353+
354+
assert "citationsContent" in content
355+
citations = content["citationsContent"]["citations"]
356+
assert any("strandsagents.com" in c["location"]["web"]["url"] for c in citations)
357+
358+
359+
@pytest.mark.skipif(not _openai_responses_available, reason="OpenAI Responses API not available")
360+
def test_responses_builtin_tool_file_search(openai_vector_store):
361+
"""Test that file_search produces text output from uploaded files."""
362+
model = OpenAIResponsesModel(
363+
model_id="gpt-4o",
364+
params={"tools": [{"type": "file_search", "vector_store_ids": [openai_vector_store]}]},
365+
client_args={"api_key": os.getenv("OPENAI_API_KEY")},
366+
)
367+
agent = Agent(model=model, system_prompt="Answer based on the files.", callback_handler=None)
368+
369+
result = agent("What is the secret code?")
370+
text = result.message["content"][0]["text"]
371+
assert "ALPHA-7742" in text
372+
373+
374+
@pytest.mark.skipif(not _openai_responses_available, reason="OpenAI Responses API not available")
375+
def test_responses_builtin_tool_code_interpreter():
376+
"""Test that code_interpreter produces correct results via text output."""
377+
model = OpenAIResponsesModel(
378+
model_id="gpt-4o",
379+
params={"tools": [{"type": "code_interpreter", "container": {"type": "auto"}}]},
380+
client_args={"api_key": os.getenv("OPENAI_API_KEY")},
381+
)
382+
agent = Agent(model=model, system_prompt="Answer concisely.", callback_handler=None)
383+
384+
# SHA-256 of "strands" requires actual computation
385+
result = agent("Compute the SHA-256 hash of the string 'strands'. Return only the hex digest.")
386+
text = result.message["content"][0]["text"]
387+
assert "11e0e34bd35e12185cfacd5e5a256ab4292bfa3616d8d5b74e20eca36feed228" in text
388+
389+
390+
@pytest.mark.skipif(not _openai_responses_available, reason="OpenAI Responses API not available")
391+
def test_responses_builtin_tool_shell():
392+
"""Test that the shell built-in tool executes commands in a hosted container."""
393+
model = OpenAIResponsesModel(
394+
model_id="gpt-5.4-mini",
395+
params={"tools": [{"type": "shell", "environment": {"type": "container_auto"}}]},
396+
client_args={"api_key": os.getenv("OPENAI_API_KEY")},
397+
)
398+
agent = Agent(model=model, system_prompt="Answer concisely.", callback_handler=None)
399+
400+
result = agent("Use the shell to compute the md5sum of the string 'strands-test'. Return only the hash.")
401+
text = result.message["content"][0]["text"]
402+
assert "d82f373f079b00a1db7ef1eec7f15c68" in text

0 commit comments

Comments
 (0)