From 619b2bf3386904f8d977f6df1e7fcfe78c088ee5 Mon Sep 17 00:00:00 2001 From: Abdul Alfozan Date: Thu, 23 Apr 2026 12:11:23 -0700 Subject: [PATCH] Update examples and defaults to GPT-5.5 --- docs/agents.md | 2 +- docs/models/index.md | 28 +++++++++---------- docs/results.md | 2 +- docs/sandbox/guide.md | 2 +- docs/sandbox_agents.md | 2 +- docs/scripts/translate_docs.py | 2 +- docs/streaming.md | 2 +- docs/tools.md | 14 +++++----- docs/voice/quickstart.md | 8 +++--- examples/basic/hello_world_gpt_5.py | 4 +-- examples/basic/stream_ws.py | 4 +-- .../agents/search_agent.py | 2 +- .../agents/verifier_agent.py | 2 +- .../agents/writer_agent.py | 2 +- examples/memory/hitl_session_scenario.py | 8 ++++-- examples/reasoning_content/main.py | 8 +++--- examples/reasoning_content/runner_example.py | 4 +-- examples/research_bot/agents/planner_agent.py | 2 +- examples/research_bot/agents/search_agent.py | 2 +- examples/sandbox/basic.py | 2 +- examples/sandbox/docker/docker_runner.py | 2 +- examples/sandbox/docker/mounts/mount_smoke.py | 2 +- examples/sandbox/docs/coding_task.py | 2 +- examples/sandbox/extensions/blaxel_runner.py | 2 +- .../sandbox/extensions/cloudflare_runner.py | 2 +- .../extensions/daytona/daytona_runner.py | 2 +- .../daytona/usaspending_text2sql/agent.py | 2 +- examples/sandbox/extensions/e2b_runner.py | 2 +- examples/sandbox/extensions/modal_runner.py | 2 +- .../extensions/runloop/capabilities.py | 2 +- examples/sandbox/extensions/runloop/runner.py | 2 +- .../temporal/temporal_sandbox_agent.py | 2 +- examples/sandbox/extensions/vercel_runner.py | 2 +- examples/sandbox/handoffs.py | 2 +- .../healthcare_support/support_agents.py | 8 +++--- .../sandbox/healthcare_support/workflow.py | 1 + examples/sandbox/memory.py | 9 ++++-- .../sandbox/memory_multi_agent_multiturn.py | 7 +++-- examples/sandbox/memory_s3.py | 2 +- .../sandbox/sandbox_agent_capabilities.py | 4 +-- .../sandbox_agent_with_remote_snapshot.py | 2 +- examples/sandbox/sandbox_agent_with_tools.py | 2 +- examples/sandbox/sandbox_agents_as_tools.py | 13 +++++---- examples/sandbox/tax_prep.py | 2 +- .../tutorials/sandbox_resume/README.md | 2 +- examples/sandbox/unix_local_pty.py | 2 +- examples/sandbox/unix_local_runner.py | 2 +- examples/tools/apply_patch.py | 2 +- examples/tools/code_interpreter.py | 2 +- examples/tools/codex.py | 2 +- examples/tools/codex_same_thread.py | 2 +- examples/tools/computer_use.py | 2 +- .../tools/container_shell_inline_skill.py | 2 +- .../tools/container_shell_skill_reference.py | 2 +- examples/tools/local_shell_skill.py | 2 +- examples/tools/shell.py | 2 +- examples/tools/shell_human_in_the_loop.py | 2 +- examples/tools/tool_search.py | 4 +-- examples/voice/streamed/my_workflow.py | 4 +-- src/agents/models/default_models.py | 1 + src/agents/models/openai_responses.py | 4 ++- src/agents/sandbox/capabilities/compaction.py | 1 + src/agents/sandbox/config.py | 2 +- tests/models/test_default_models.py | 6 ++++ tests/sandbox/test_compaction.py | 3 ++ tests/test_openai_responses_converter.py | 10 ++++--- 66 files changed, 136 insertions(+), 105 deletions(-) diff --git a/docs/agents.md b/docs/agents.md index 96c29334c3..12a9f53b41 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -301,7 +301,7 @@ By using the `clone()` method on an agent, you can duplicate an Agent, and optio pirate_agent = Agent( name="Pirate", instructions="Write like a pirate", - model="gpt-5.4", + model="gpt-5.5", ) robot_agent = pirate_agent.clone( diff --git a/docs/models/index.md b/docs/models/index.md index d4e6d78826..e4ee8cc3bb 100644 --- a/docs/models/index.md +++ b/docs/models/index.md @@ -22,16 +22,16 @@ Start with the simplest path that fits your setup: For most OpenAI-only apps, the recommended path is to use string model names with the default OpenAI provider and stay on the Responses model path. -When you don't specify a model when initializing an `Agent`, the default model will be used. The default is currently [`gpt-4.1`](https://developers.openai.com/api/docs/models/gpt-4.1) for compatibility and low latency. If you have access, we recommend setting your agents to [`gpt-5.4`](https://developers.openai.com/api/docs/models/gpt-5.4) for higher quality while keeping explicit `model_settings`. +When you don't specify a model when initializing an `Agent`, the default model will be used. The default is currently [`gpt-4.1`](https://developers.openai.com/api/docs/models/gpt-4.1) for compatibility and low latency. If you have access, we recommend setting your agents to [`gpt-5.5`](https://developers.openai.com/api/docs/models/gpt-5.5) for higher quality while keeping explicit `model_settings`. -If you want to switch to other models like [`gpt-5.4`](https://developers.openai.com/api/docs/models/gpt-5.4), there are two ways to configure your agents. +If you want to switch to other models like [`gpt-5.5`](https://developers.openai.com/api/docs/models/gpt-5.5), there are two ways to configure your agents. ### Default model First, if you want to consistently use a specific model for all agents that do not set a custom model, set the `OPENAI_DEFAULT_MODEL` environment variable before running your agents. ```bash -export OPENAI_DEFAULT_MODEL=gpt-5.4 +export OPENAI_DEFAULT_MODEL=gpt-5.5 python3 my_awesome_agent.py ``` @@ -48,13 +48,13 @@ agent = Agent( result = await Runner.run( agent, "Hello", - run_config=RunConfig(model="gpt-5.4"), + run_config=RunConfig(model="gpt-5.5"), ) ``` #### GPT-5 models -When you use any GPT-5 model such as [`gpt-5.4`](https://developers.openai.com/api/docs/models/gpt-5.4) in this way, the SDK applies default `ModelSettings`. It sets the ones that work the best for most use cases. To adjust the reasoning effort for the default model, pass your own `ModelSettings`: +When you use any GPT-5 model such as [`gpt-5.5`](https://developers.openai.com/api/docs/models/gpt-5.5) in this way, the SDK applies default `ModelSettings`. It sets the ones that work the best for most use cases. To adjust the reasoning effort for the default model, pass your own `ModelSettings`: ```python from openai.types.shared import Reasoning @@ -63,20 +63,20 @@ from agents import Agent, ModelSettings my_agent = Agent( name="My Agent", instructions="You're a helpful agent.", - # If OPENAI_DEFAULT_MODEL=gpt-5.4 is set, passing only model_settings works. + # If OPENAI_DEFAULT_MODEL=gpt-5.5 is set, passing only model_settings works. # It's also fine to pass a GPT-5 model name explicitly: - model="gpt-5.4", + model="gpt-5.5", model_settings=ModelSettings(reasoning=Reasoning(effort="high"), verbosity="low") ) ``` -For lower latency, using `reasoning.effort="none"` with `gpt-5.4` is recommended. The gpt-4.1 family (including mini and nano variants) also remains a solid choice for building interactive agent apps. +For lower latency, using `reasoning.effort="none"` with `gpt-5.5` is recommended. The gpt-4.1 family (including mini and nano variants) also remains a solid choice for building interactive agent apps. #### ComputerTool model selection -If an agent includes [`ComputerTool`][agents.tool.ComputerTool], the effective model on the actual Responses request determines which computer-tool payload the SDK sends. Explicit `gpt-5.4` requests use the GA built-in `computer` tool, while explicit `computer-use-preview` requests keep the older `computer_use_preview` payload. +If an agent includes [`ComputerTool`][agents.tool.ComputerTool], the effective model on the actual Responses request determines which computer-tool payload the SDK sends. Explicit `gpt-5.5` requests use the GA built-in `computer` tool, while explicit `computer-use-preview` requests keep the older `computer_use_preview` payload. -Prompt-managed calls are the main exception. If a prompt template owns the model and the SDK omits `model` from the request, the SDK defaults to the preview-compatible computer payload so it does not guess which model the prompt pins. To keep the GA path in that flow, either make `model="gpt-5.4"` explicit on the request or force the GA selector with `ModelSettings(tool_choice="computer")` or `ModelSettings(tool_choice="computer_use")`. +Prompt-managed calls are the main exception. If a prompt template owns the model and the SDK omits `model` from the request, the SDK defaults to the preview-compatible computer payload so it does not guess which model the prompt pins. To keep the GA path in that flow, either make `model="gpt-5.5"` explicit on the request or force the GA selector with `ModelSettings(tool_choice="computer")` or `ModelSettings(tool_choice="computer_use")`. With a registered [`ComputerTool`][agents.tool.ComputerTool], `tool_choice="computer"`, `"computer_use"`, and `"computer_use_preview"` are normalized to the built-in selector that matches the effective request model. If no `ComputerTool` is registered, those strings continue to behave like ordinary function names. @@ -108,7 +108,7 @@ from agents import set_default_openai_responses_transport set_default_openai_responses_transport("websocket") ``` -This affects OpenAI Responses models resolved by the default OpenAI provider (including string model names such as `"gpt-5.4"`). +This affects OpenAI Responses models resolved by the default OpenAI provider (including string model names such as `"gpt-5.5"`). Transport selection happens when the SDK resolves a model name into a model instance. If you pass a concrete [`Model`][agents.models.interface.Model] object, its transport is already fixed: [`OpenAIResponsesWSModel`][agents.models.openai_responses.OpenAIResponsesWSModel] uses websocket, [`OpenAIResponsesModel`][agents.models.openai_responses.OpenAIResponsesModel] uses HTTP, and [`OpenAIChatCompletionsModel`][agents.models.openai_chatcompletions.OpenAIChatCompletionsModel] stays on Chat Completions. If you pass `RunConfig(model_provider=...)`, that provider controls transport selection instead of the global default. @@ -275,7 +275,7 @@ triage_agent = Agent( name="Triage agent", instructions="Handoff to the appropriate agent based on the language of the request.", handoffs=[spanish_agent, english_agent], - model="gpt-5.4", + model="gpt-5.5", ) async def main(): @@ -320,7 +320,7 @@ from agents import Agent, ModelSettings research_agent = Agent( name="Research agent", - model="gpt-5.4", + model="gpt-5.5", model_settings=ModelSettings( parallel_tool_calls=False, truncation="auto", @@ -363,7 +363,7 @@ from agents import Agent, ModelRetrySettings, ModelSettings, retry_policies agent = Agent( name="Assistant", - model="gpt-5.4", + model="gpt-5.5", model_settings=ModelSettings( retry=ModelRetrySettings( max_retries=4, diff --git a/docs/results.md b/docs/results.md index 93126c3cd6..bec6eda01c 100644 --- a/docs/results.md +++ b/docs/results.md @@ -59,7 +59,7 @@ In practice: Unlike the JavaScript SDK, Python does not expose a separate `output` property for the model-shaped delta only. Use `new_items` when you need SDK metadata, or inspect `raw_responses` when you need the raw model payloads. -Computer-tool replay follows the raw Responses payload shape. Preview-model `computer_call` items preserve a single `action`, while `gpt-5.4` computer calls can preserve batched `actions[]`. [`to_input_list()`][agents.result.RunResultBase.to_input_list] and [`RunState`][agents.run_state.RunState] keep whichever shape the model produced, so manual replay, pause/resume flows, and stored transcripts continue to work across both preview and GA computer-tool calls. Local execution results still appear as `computer_call_output` items in `new_items`. +Computer-tool replay follows the raw Responses payload shape. Preview-model `computer_call` items preserve a single `action`, while `gpt-5.5` computer calls can preserve batched `actions[]`. [`to_input_list()`][agents.result.RunResultBase.to_input_list] and [`RunState`][agents.run_state.RunState] keep whichever shape the model produced, so manual replay, pause/resume flows, and stored transcripts continue to work across both preview and GA computer-tool calls. Local execution results still appear as `computer_call_output` items in `new_items`. ### New items diff --git a/docs/sandbox/guide.md b/docs/sandbox/guide.md index e59bceb2f8..c4653a3e51 100644 --- a/docs/sandbox/guide.md +++ b/docs/sandbox/guide.md @@ -555,7 +555,7 @@ async def main(model: str, prompt: str) -> None: if __name__ == "__main__": asyncio.run( main( - model="gpt-5.4", + model="gpt-5.5", prompt=( "Open `repo/task.md`, use the `$credit-note-fixer` skill, fix the bug, " f"run `{TARGET_TEST_CMD}`, and summarize the change." diff --git a/docs/sandbox_agents.md b/docs/sandbox_agents.md index 68a5ad9c68..25f8f9fd3d 100644 --- a/docs/sandbox_agents.md +++ b/docs/sandbox_agents.md @@ -76,7 +76,7 @@ def build_agent(model: str) -> SandboxAgent[None]: async def main() -> None: result = await Runner.run( - build_agent("gpt-5.4"), + build_agent("gpt-5.5"), "Open `repo/task.md`, fix the issue, run the targeted test, and summarize the change.", run_config=RunConfig( sandbox=SandboxRunConfig(client=UnixLocalSandboxClient()), diff --git a/docs/scripts/translate_docs.py b/docs/scripts/translate_docs.py index 74737289ab..b5b686fc55 100644 --- a/docs/scripts/translate_docs.py +++ b/docs/scripts/translate_docs.py @@ -11,7 +11,7 @@ # logging.basicConfig(level=logging.INFO) # logging.getLogger("openai").setLevel(logging.DEBUG) -OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-5.4") +OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-5.5") ENABLE_CODE_SNIPPET_EXCLUSION = True # gpt-4.5 needed this for better quality diff --git a/docs/streaming.md b/docs/streaming.md index 893092dce0..ad0cd9e620 100644 --- a/docs/streaming.md +++ b/docs/streaming.md @@ -10,7 +10,7 @@ Keep consuming `result.stream_events()` until the async iterator finishes. A str [`RawResponsesStreamEvent`][agents.stream_events.RawResponsesStreamEvent] are raw events passed directly from the LLM. They are in OpenAI Responses API format, which means each event has a type (like `response.created`, `response.output_text.delta`, etc) and data. These events are useful if you want to stream response messages to the user as soon as they are generated. -Computer-tool raw events keep the same preview-vs-GA distinction as stored results. Preview flows stream `computer_call` items with one `action`, while `gpt-5.4` can stream `computer_call` items with batched `actions[]`. The higher-level [`RunItemStreamEvent`][agents.stream_events.RunItemStreamEvent] surface does not add a special computer-only event name for this: both shapes still surface as `tool_called`, and the screenshot result comes back as `tool_output` wrapping a `computer_call_output` item. +Computer-tool raw events keep the same preview-vs-GA distinction as stored results. Preview flows stream `computer_call` items with one `action`, while `gpt-5.5` can stream `computer_call` items with batched `actions[]`. The higher-level [`RunItemStreamEvent`][agents.stream_events.RunItemStreamEvent] surface does not add a special computer-only event name for this: both shapes still surface as `tool_called`, and the screenshot result comes back as `tool_output` wrapping a `computer_call_output` item. For example, this will output the text generated by the LLM token-by-token. diff --git a/docs/tools.md b/docs/tools.md index 9e71e42c2c..3dc860efd5 100644 --- a/docs/tools.md +++ b/docs/tools.md @@ -93,7 +93,7 @@ crm_tools = tool_namespace( agent = Agent( name="Operations assistant", - model="gpt-5.4", + model="gpt-5.5", instructions="Load the crm namespace before using CRM tools.", tools=[*crm_tools, ToolSearchTool()], ) @@ -134,7 +134,7 @@ csv_skill: ShellToolSkillReference = { agent = Agent( name="Container shell agent", - model="gpt-5.4", + model="gpt-5.5", instructions="Use the mounted skill when helpful.", tools=[ ShellTool( @@ -186,20 +186,20 @@ Local runtime tools require you to supply implementations: `ComputerTool` is still a local harness: you provide a [`Computer`][agents.computer.Computer] or [`AsyncComputer`][agents.computer.AsyncComputer] implementation, and the SDK maps that harness onto the OpenAI Responses API computer surface. -For explicit [`gpt-5.4`](https://developers.openai.com/api/docs/models/gpt-5.4) requests, the SDK sends the GA built-in tool payload `{"type": "computer"}`. The older `computer-use-preview` model keeps the preview payload `{"type": "computer_use_preview", "environment": ..., "display_width": ..., "display_height": ...}`. This mirrors the platform migration described in OpenAI's [Computer use guide](https://developers.openai.com/api/docs/guides/tools-computer-use/): +For explicit [`gpt-5.5`](https://developers.openai.com/api/docs/models/gpt-5.5) requests, the SDK sends the GA built-in tool payload `{"type": "computer"}`. The older `computer-use-preview` model keeps the preview payload `{"type": "computer_use_preview", "environment": ..., "display_width": ..., "display_height": ...}`. This mirrors the platform migration described in OpenAI's [Computer use guide](https://developers.openai.com/api/docs/guides/tools-computer-use/): -- Model: `computer-use-preview` -> `gpt-5.4` +- Model: `computer-use-preview` -> `gpt-5.5` - Tool selector: `computer_use_preview` -> `computer` - Computer call shape: one `action` per `computer_call` -> batched `actions[]` on `computer_call` - Truncation: `ModelSettings(truncation="auto")` required on the preview path -> not required on the GA path -The SDK chooses that wire shape from the effective model on the actual Responses request. If you use a prompt template and the request omits `model` because the prompt owns it, the SDK keeps the preview-compatible computer payload unless you either keep `model="gpt-5.4"` explicit or force the GA selector with `ModelSettings(tool_choice="computer")` or `ModelSettings(tool_choice="computer_use")`. +The SDK chooses that wire shape from the effective model on the actual Responses request. If you use a prompt template and the request omits `model` because the prompt owns it, the SDK keeps the preview-compatible computer payload unless you either keep `model="gpt-5.5"` explicit or force the GA selector with `ModelSettings(tool_choice="computer")` or `ModelSettings(tool_choice="computer_use")`. When a [`ComputerTool`][agents.tool.ComputerTool] is present, `tool_choice="computer"`, `"computer_use"`, and `"computer_use_preview"` are all accepted and normalized to the built-in selector that matches the effective request model. Without a `ComputerTool`, those strings still behave like ordinary function names. This distinction matters when `ComputerTool` is backed by a [`ComputerProvider`][agents.tool.ComputerProvider] factory. The GA `computer` payload does not need `environment` or dimensions at serialization time, so unresolved factories are fine. Preview-compatible serialization still needs a resolved `Computer` or `AsyncComputer` instance so the SDK can send `environment`, `display_width`, and `display_height`. -At runtime, both paths still use the same local harness. Preview responses emit `computer_call` items with a single `action`; `gpt-5.4` can emit batched `actions[]`, and the SDK executes them in order before producing a `computer_call_output` screenshot item. See `examples/tools/computer_use.py` for a runnable Playwright-based harness. +At runtime, both paths still use the same local harness. Preview responses emit `computer_call` items with a single `action`; `gpt-5.5` can emit batched `actions[]`, and the SDK executes them in order before producing a `computer_call_output` screenshot item. See `examples/tools/computer_use.py` for a runnable Playwright-based harness. ```python from agents import Agent, ApplyPatchTool, ShellTool @@ -784,7 +784,7 @@ agent = Agent( sandbox_mode="workspace-write", working_directory="/path/to/repo", default_thread_options=ThreadOptions( - model="gpt-5.4", + model="gpt-5.5", model_reasoning_effort="low", network_access_enabled=True, web_search_mode="disabled", diff --git a/docs/voice/quickstart.md b/docs/voice/quickstart.md index 092f759abf..bc84d87b71 100644 --- a/docs/voice/quickstart.md +++ b/docs/voice/quickstart.md @@ -72,7 +72,7 @@ spanish_agent = Agent( instructions=prompt_with_handoff_instructions( "You're speaking to a human, so be polite and concise. Speak in Spanish.", ), - model="gpt-5.4", + model="gpt-5.5", ) agent = Agent( @@ -80,7 +80,7 @@ agent = Agent( instructions=prompt_with_handoff_instructions( "You're speaking to a human, so be polite and concise. If the user speaks in Spanish, handoff to the spanish agent.", ), - model="gpt-5.4", + model="gpt-5.5", handoffs=[spanish_agent], tools=[get_weather], ) @@ -156,7 +156,7 @@ spanish_agent = Agent( instructions=prompt_with_handoff_instructions( "You're speaking to a human, so be polite and concise. Speak in Spanish.", ), - model="gpt-5.4", + model="gpt-5.5", ) agent = Agent( @@ -164,7 +164,7 @@ agent = Agent( instructions=prompt_with_handoff_instructions( "You're speaking to a human, so be polite and concise. If the user speaks in Spanish, handoff to the spanish agent.", ), - model="gpt-5.4", + model="gpt-5.5", handoffs=[spanish_agent], tools=[get_weather], ) diff --git a/examples/basic/hello_world_gpt_5.py b/examples/basic/hello_world_gpt_5.py index 186d345df6..448ea25884 100644 --- a/examples/basic/hello_world_gpt_5.py +++ b/examples/basic/hello_world_gpt_5.py @@ -9,14 +9,14 @@ # from openai import AsyncOpenAI # client = AsyncOpenAI() # from agents import OpenAIChatCompletionsModel -# chat_completions_model = OpenAIChatCompletionsModel(model="gpt-5.4", openai_client=client) +# chat_completions_model = OpenAIChatCompletionsModel(model="gpt-5.5", openai_client=client) async def main(): agent = Agent( name="Knowledgable GPT-5 Assistant", instructions="You're a knowledgable assistant. You always provide an interesting answer.", - model="gpt-5.4", + model="gpt-5.5", model_settings=ModelSettings( reasoning=Reasoning(effort="low"), # "none", "low", "medium", "high", "xhigh" verbosity="low", # "low", "medium", "high" diff --git a/examples/basic/stream_ws.py b/examples/basic/stream_ws.py index cd5dc0e4e4..11f0bff8c0 100644 --- a/examples/basic/stream_ws.py +++ b/examples/basic/stream_ws.py @@ -12,7 +12,7 @@ - `OPENAI_API_KEY` Optional environment variables: -- `OPENAI_MODEL` (defaults to `gpt-5.4`) +- `OPENAI_MODEL` (defaults to `gpt-5.5`) - `OPENAI_BASE_URL` - `OPENAI_WEBSOCKET_BASE_URL` - `EXAMPLES_INTERACTIVE_MODE=auto` (auto-approve HITL prompts for scripted runs) @@ -160,7 +160,7 @@ async def run_streamed_turn( async def main() -> None: - model_name = os.getenv("OPENAI_MODEL", "gpt-5.4") + model_name = os.getenv("OPENAI_MODEL", "gpt-5.5") policy_agent = Agent( name="RefundPolicySpecialist", instructions=( diff --git a/examples/financial_research_agent/agents/search_agent.py b/examples/financial_research_agent/agents/search_agent.py index 899c9a818a..24d2fb9ce5 100644 --- a/examples/financial_research_agent/agents/search_agent.py +++ b/examples/financial_research_agent/agents/search_agent.py @@ -11,7 +11,7 @@ search_agent = Agent( name="FinancialSearchAgent", - model="gpt-5.4", + model="gpt-5.5", instructions=INSTRUCTIONS, tools=[WebSearchTool()], ) diff --git a/examples/financial_research_agent/agents/verifier_agent.py b/examples/financial_research_agent/agents/verifier_agent.py index 780a85c6b3..6ca1838cdd 100644 --- a/examples/financial_research_agent/agents/verifier_agent.py +++ b/examples/financial_research_agent/agents/verifier_agent.py @@ -22,6 +22,6 @@ class VerificationResult(BaseModel): verifier_agent = Agent( name="VerificationAgent", instructions=VERIFIER_PROMPT, - model="gpt-5.4", + model="gpt-5.5", output_type=VerificationResult, ) diff --git a/examples/financial_research_agent/agents/writer_agent.py b/examples/financial_research_agent/agents/writer_agent.py index 0f4713c56d..49bc83c3a8 100644 --- a/examples/financial_research_agent/agents/writer_agent.py +++ b/examples/financial_research_agent/agents/writer_agent.py @@ -29,6 +29,6 @@ class FinancialReportData(BaseModel): writer_agent = Agent( name="FinancialWriterAgent", instructions=WRITER_PROMPT, - model="gpt-5.4", + model="gpt-5.5", output_type=FinancialReportData, ) diff --git a/examples/memory/hitl_session_scenario.py b/examples/memory/hitl_session_scenario.py index 79e10ec7b2..c9936a016c 100644 --- a/examples/memory/hitl_session_scenario.py +++ b/examples/memory/hitl_session_scenario.py @@ -13,6 +13,8 @@ from pathlib import Path from typing import Any +from openai.types.shared import Reasoning + from agents import Agent, Model, ModelSettings, OpenAIConversationsSession, Runner, function_tool from agents.items import TResponseInputItem @@ -80,7 +82,9 @@ async def run_scenario_step( ), tools=[approval_echo, approval_note], model=model, - model_settings=ModelSettings(tool_choice=step.tool_name), + model_settings=ModelSettings( + tool_choice=step.tool_name, reasoning=Reasoning(effort="none") + ), tool_use_behavior="stop_on_first_tool", ) @@ -389,7 +393,7 @@ async def main() -> None: print("OPENAI_API_KEY must be set to run the HITL session scenario.") raise SystemExit(1) - model_override = os.environ.get("HITL_MODEL", "gpt-5.4") + model_override = os.environ.get("HITL_MODEL", "gpt-5.5") if model_override: print(f"Model: {model_override}") diff --git a/examples/reasoning_content/main.py b/examples/reasoning_content/main.py index 272c8c96bf..425e6153a0 100644 --- a/examples/reasoning_content/main.py +++ b/examples/reasoning_content/main.py @@ -1,13 +1,13 @@ """ Example demonstrating how to access reasoning summaries when a model returns them. -Some models, like gpt-5.4, provide a reasoning_content field in addition to the regular content. +Some models, like gpt-5.5, provide a reasoning_content field in addition to the regular content. This example shows how to access that content from both streaming and non-streaming responses, and how to handle responses that do not include a reasoning summary. To run this example, you need to: 1. Set your OPENAI_API_KEY environment variable -2. Use a model that supports reasoning content (e.g., gpt-5.4) +2. Use a model that supports reasoning content (e.g., gpt-5.5) """ import asyncio @@ -21,7 +21,7 @@ from agents.models.interface import ModelTracing from agents.models.openai_provider import OpenAIProvider -MODEL_NAME = os.getenv("REASONING_MODEL_NAME") or "gpt-5.4" +MODEL_NAME = os.getenv("REASONING_MODEL_NAME") or "gpt-5.5" async def stream_with_reasoning_content(): @@ -121,7 +121,7 @@ async def main(): except Exception as e: print(f"Error: {e}") print("\nNote: This example requires a model that supports reasoning content.") - print("You may need to use a specific model like gpt-5.4 or similar.") + print("You may need to use a specific model like gpt-5.5 or similar.") if __name__ == "__main__": diff --git a/examples/reasoning_content/runner_example.py b/examples/reasoning_content/runner_example.py index 56c6daeb68..b5ff0a0ce4 100644 --- a/examples/reasoning_content/runner_example.py +++ b/examples/reasoning_content/runner_example.py @@ -6,7 +6,7 @@ To run this example, you need to: 1. Set your OPENAI_API_KEY environment variable -2. Use a model that supports reasoning content (e.g., gpt-5.4) +2. Use a model that supports reasoning content (e.g., gpt-5.5) """ import asyncio @@ -17,7 +17,7 @@ from agents import Agent, ModelSettings, Runner, trace from agents.items import ReasoningItem -MODEL_NAME = os.getenv("REASONING_MODEL_NAME") or "gpt-5.4" +MODEL_NAME = os.getenv("REASONING_MODEL_NAME") or "gpt-5.5" async def main(): diff --git a/examples/research_bot/agents/planner_agent.py b/examples/research_bot/agents/planner_agent.py index 1c94e8f475..a89a4ef3f1 100644 --- a/examples/research_bot/agents/planner_agent.py +++ b/examples/research_bot/agents/planner_agent.py @@ -25,7 +25,7 @@ class WebSearchPlan(BaseModel): planner_agent = Agent( name="PlannerAgent", instructions=PROMPT, - model="gpt-5.4", + model="gpt-5.5", model_settings=ModelSettings(reasoning=Reasoning(effort="medium")), output_type=WebSearchPlan, ) diff --git a/examples/research_bot/agents/search_agent.py b/examples/research_bot/agents/search_agent.py index 810f5d166a..7921efc713 100644 --- a/examples/research_bot/agents/search_agent.py +++ b/examples/research_bot/agents/search_agent.py @@ -11,7 +11,7 @@ search_agent = Agent( name="Search agent", - model="gpt-5.4", + model="gpt-5.5", instructions=INSTRUCTIONS, tools=[WebSearchTool()], ) diff --git a/examples/sandbox/basic.py b/examples/sandbox/basic.py index 21936f33c5..02e8184de7 100644 --- a/examples/sandbox/basic.py +++ b/examples/sandbox/basic.py @@ -223,7 +223,7 @@ async def main( if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") parser.add_argument( "--backend", diff --git a/examples/sandbox/docker/docker_runner.py b/examples/sandbox/docker/docker_runner.py index e64c891f11..8d95c94f5a 100644 --- a/examples/sandbox/docker/docker_runner.py +++ b/examples/sandbox/docker/docker_runner.py @@ -159,7 +159,7 @@ async def main(model: str, question: str) -> None: if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") args = parser.parse_args() asyncio.run(main(args.model, args.question)) diff --git a/examples/sandbox/docker/mounts/mount_smoke.py b/examples/sandbox/docker/mounts/mount_smoke.py index 54d0262eed..2a1972ee2e 100644 --- a/examples/sandbox/docker/mounts/mount_smoke.py +++ b/examples/sandbox/docker/mounts/mount_smoke.py @@ -66,7 +66,7 @@ def build_agent(name: str, manifest: Manifest) -> SandboxAgent: return SandboxAgent( name=name, - model=os.getenv("OPENAI_MODEL", "gpt-5.4"), + model=os.getenv("OPENAI_MODEL", "gpt-5.5"), instructions=( "Use the shell tool only. Write the requested exact content to the requested exact " "path, read the file back with cat, and then reply with only `done`." diff --git a/examples/sandbox/docs/coding_task.py b/examples/sandbox/docs/coding_task.py index dbf4b49115..978b1403f5 100644 --- a/examples/sandbox/docs/coding_task.py +++ b/examples/sandbox/docs/coding_task.py @@ -22,7 +22,7 @@ from agents.sandbox.entries import LocalDir from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" TARGET_TEST_CMD = "sh tests/test_credit_note.sh" DEFAULT_PROMPT = ( "Open `repo/task.md`, use the `$credit-note-fixer` skill, fix the bug, run " diff --git a/examples/sandbox/extensions/blaxel_runner.py b/examples/sandbox/extensions/blaxel_runner.py index 0a29e47e4a..5669a10aba 100644 --- a/examples/sandbox/extensions/blaxel_runner.py +++ b/examples/sandbox/extensions/blaxel_runner.py @@ -67,7 +67,7 @@ ) from exc -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" DEFAULT_QUESTION = "Summarize this cloud sandbox workspace in 2 sentences." DEFAULT_PTY_QUESTION = ( "Start an interactive Python session with `tty=true`. In that same session, compute " diff --git a/examples/sandbox/extensions/cloudflare_runner.py b/examples/sandbox/extensions/cloudflare_runner.py index d30d231060..e8828fb676 100644 --- a/examples/sandbox/extensions/cloudflare_runner.py +++ b/examples/sandbox/extensions/cloudflare_runner.py @@ -46,7 +46,7 @@ ) from exc -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" DEFAULT_QUESTION = "Summarize this cloud sandbox workspace in 2 sentences." DEFAULT_PTY_QUESTION = ( "Start an interactive Python session with `tty=true`. In that same session, compute " diff --git a/examples/sandbox/extensions/daytona/daytona_runner.py b/examples/sandbox/extensions/daytona/daytona_runner.py index df59204f3e..3580f92d0f 100644 --- a/examples/sandbox/extensions/daytona/daytona_runner.py +++ b/examples/sandbox/extensions/daytona/daytona_runner.py @@ -160,7 +160,7 @@ async def main( if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") parser.add_argument( "--pause-on-exit", diff --git a/examples/sandbox/extensions/daytona/usaspending_text2sql/agent.py b/examples/sandbox/extensions/daytona/usaspending_text2sql/agent.py index 07d06557e9..5a4db48ef7 100644 --- a/examples/sandbox/extensions/daytona/usaspending_text2sql/agent.py +++ b/examples/sandbox/extensions/daytona/usaspending_text2sql/agent.py @@ -112,7 +112,7 @@ def build_agent() -> SandboxAgent: return SandboxAgent( name="NASA Spending Q&A", default_manifest=manifest, - model="gpt-5.4", + model="gpt-5.5", instructions=( "You are a helpful data analyst that answers questions about NASA federal spending " "by writing and executing SQL queries.\n\n" + DEVELOPER_INSTRUCTIONS diff --git a/examples/sandbox/extensions/e2b_runner.py b/examples/sandbox/extensions/e2b_runner.py index 675fafa0c9..6d380437e1 100644 --- a/examples/sandbox/extensions/e2b_runner.py +++ b/examples/sandbox/extensions/e2b_runner.py @@ -226,7 +226,7 @@ async def main( if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") parser.add_argument( "--sandbox-type", diff --git a/examples/sandbox/extensions/modal_runner.py b/examples/sandbox/extensions/modal_runner.py index 53fbf46b89..b833982fb6 100644 --- a/examples/sandbox/extensions/modal_runner.py +++ b/examples/sandbox/extensions/modal_runner.py @@ -289,7 +289,7 @@ async def main( if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") parser.add_argument( "--app-name", diff --git a/examples/sandbox/extensions/runloop/capabilities.py b/examples/sandbox/extensions/runloop/capabilities.py index 941af3f31f..8d65b218ce 100644 --- a/examples/sandbox/extensions/runloop/capabilities.py +++ b/examples/sandbox/extensions/runloop/capabilities.py @@ -48,7 +48,7 @@ ) from exc -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" DEFAULT_HTTP_PORT = 8123 DEFAULT_AGENT_PROMPT = ( "Inspect this Runloop sandbox workspace, verify the configuration using the shell tool, " diff --git a/examples/sandbox/extensions/runloop/runner.py b/examples/sandbox/extensions/runloop/runner.py index bb7f0dd9af..d66b5af1fb 100644 --- a/examples/sandbox/extensions/runloop/runner.py +++ b/examples/sandbox/extensions/runloop/runner.py @@ -136,7 +136,7 @@ async def main( if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") parser.add_argument( "--pause-on-exit", diff --git a/examples/sandbox/extensions/temporal/temporal_sandbox_agent.py b/examples/sandbox/extensions/temporal/temporal_sandbox_agent.py index 2ec20c6fbe..00746e39ce 100644 --- a/examples/sandbox/extensions/temporal/temporal_sandbox_agent.py +++ b/examples/sandbox/extensions/temporal/temporal_sandbox_agent.py @@ -537,7 +537,7 @@ async def run(self, request: AgentRequest) -> AgentResponse: return AgentResponse() - def _build_agent(self, manifest: Manifest, model: str = "gpt-5.4") -> SandboxAgent: + def _build_agent(self, manifest: Manifest, model: str = "gpt-5.5") -> SandboxAgent: """Construct the SandboxAgent used by the workflow.""" return SandboxAgent( name="Temporal Sandbox Agent", diff --git a/examples/sandbox/extensions/vercel_runner.py b/examples/sandbox/extensions/vercel_runner.py index 9d33bf1fe4..b49fdad0a0 100644 --- a/examples/sandbox/extensions/vercel_runner.py +++ b/examples/sandbox/extensions/vercel_runner.py @@ -390,7 +390,7 @@ async def main( if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") parser.add_argument( "--runtime", diff --git a/examples/sandbox/handoffs.py b/examples/sandbox/handoffs.py index e70d4a4bcd..a12e059042 100644 --- a/examples/sandbox/handoffs.py +++ b/examples/sandbox/handoffs.py @@ -97,7 +97,7 @@ async def main(model: str, question: str) -> None: if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") args = parser.parse_args() diff --git a/examples/sandbox/healthcare_support/support_agents.py b/examples/sandbox/healthcare_support/support_agents.py index dde68c890c..5dd1f1f559 100644 --- a/examples/sandbox/healthcare_support/support_agents.py +++ b/examples/sandbox/healthcare_support/support_agents.py @@ -90,7 +90,7 @@ benefits_agent = Agent[HealthcareSupportContext]( name="HealthcareBenefitsAgent", - model="gpt-5.4", + model="gpt-5.5", instructions=BENEFITS_PROMPT, model_settings=ModelSettings(reasoning=Reasoning(effort="low"), verbosity="low"), tools=[ @@ -105,7 +105,7 @@ def build_policy_sandbox_agent(*, skills_root: Path) -> SandboxAgent[HealthcareSupportContext]: return SandboxAgent[HealthcareSupportContext]( name="HealthcarePolicySandboxAgent", - model="gpt-5.4", + model="gpt-5.5", instructions=( POLICY_SANDBOX_PROMPT + "\n\n" "Use `load_skill` before reading the skill file. Use `exec_command` with `pwd`, " @@ -135,7 +135,7 @@ def build_policy_sandbox_agent(*, skills_root: Path) -> SandboxAgent[HealthcareS def build_orchestrator(*, sandbox_policy_tool: Tool) -> Agent[HealthcareSupportContext]: return Agent[HealthcareSupportContext]( name="HealthcareSupportOrchestrator", - model="gpt-5.4", + model="gpt-5.5", instructions=ORCHESTRATOR_PROMPT, model_settings=ModelSettings( reasoning=Reasoning(effort="low"), @@ -155,7 +155,7 @@ def build_orchestrator(*, sandbox_policy_tool: Tool) -> Agent[HealthcareSupportC memory_recap_agent = Agent[HealthcareSupportContext]( name="HealthcareSupportMemoryAgent", - model="gpt-5.4", + model="gpt-5.5", instructions=MEMORY_PROMPT, model_settings=ModelSettings(reasoning=Reasoning(effort="low"), verbosity="low"), output_type=AgentOutputSchema(MemoryRecap, strict_json_schema=False), diff --git a/examples/sandbox/healthcare_support/workflow.py b/examples/sandbox/healthcare_support/workflow.py index 7306ec65b2..58fe35104a 100644 --- a/examples/sandbox/healthcare_support/workflow.py +++ b/examples/sandbox/healthcare_support/workflow.py @@ -345,6 +345,7 @@ async def run_healthcare_support_workflow( workflow_name="Healthcare support sandbox packet", ), hooks=hooks, + max_turns=20, ) orchestrator = build_orchestrator(sandbox_policy_tool=sandbox_policy_tool) trace_id = gen_trace_id() diff --git a/examples/sandbox/memory.py b/examples/sandbox/memory.py index 4c0f70703a..5499f330a8 100644 --- a/examples/sandbox/memory.py +++ b/examples/sandbox/memory.py @@ -17,7 +17,7 @@ if __package__ is None or __package__ == "": sys.path.insert(0, str(Path(__file__).resolve().parents[2])) -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" FIRST_PROMPT = "Inspect workspace and fix invoice total bug in src/acme_metrics/report.py." SECOND_PROMPT = "Add a regression test for the previous bug you fixed." @@ -74,7 +74,10 @@ def _build_agent(*, model: str, manifest: Manifest) -> SandboxAgent: "Answer questions about the sandbox workspace. Inspect files before answering, make " "minimal edits, and keep the response concise. " "Use the shell tool to inspect and validate the workspace. Use apply_patch for text " - "edits when it is the clearest option. Do not invent files you did not read." + "edits when it is the clearest option. Use a non-login POSIX shell for commands. " + "Make one focused pytest attempt; if the local sandbox blocks Python or toolchain " + "access, report that validation was blocked and finish instead of retrying repeatedly. " + "Do not invent files you did not read." ), default_manifest=manifest, capabilities=[ @@ -189,6 +192,7 @@ async def main(*, model: str) -> None: sandbox=sandbox, workflow_name="Sandbox memory example: initial fix", ), + max_turns=20, ) print("\n[first run]") print(first.final_output) @@ -204,6 +208,7 @@ async def main(*, model: str) -> None: sandbox=resumed_sandbox, workflow_name="Sandbox memory example: follow-up", ), + max_turns=20, ) print("\n[second run]") print(second.final_output) diff --git a/examples/sandbox/memory_multi_agent_multiturn.py b/examples/sandbox/memory_multi_agent_multiturn.py index e7e867b30e..05f13d2747 100644 --- a/examples/sandbox/memory_multi_agent_multiturn.py +++ b/examples/sandbox/memory_multi_agent_multiturn.py @@ -15,7 +15,7 @@ if __package__ is None or __package__ == "": sys.path.insert(0, str(Path(__file__).resolve().parents[2])) -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" GTM_SESSION_ID = "gtm-q2-pipeline-review" ENGINEERING_SESSION_ID = "eng-invoice-test-fix" @@ -123,7 +123,9 @@ def _build_engineering_agent(*, model: str, manifest: Manifest) -> SandboxAgent: model=model, instructions=( "You are an engineer. Inspect files before editing, make minimal changes, and verify " - "with tests." + "with tests. Use a non-login POSIX shell for commands. Make one focused pytest attempt; " + "if the local sandbox blocks Python or toolchain access, report that validation was " + "blocked and finish instead of retrying repeatedly." ), default_manifest=manifest, capabilities=[ @@ -204,6 +206,7 @@ async def main(*, model: str) -> None: ENGINEERING_TURN, session=engineering_conversation_session, run_config=engineering_config, + max_turns=20, ) print("\n[engineering]") print(engineering.final_output) diff --git a/examples/sandbox/memory_s3.py b/examples/sandbox/memory_s3.py index 2eb3bea57f..bfd770bc69 100644 --- a/examples/sandbox/memory_s3.py +++ b/examples/sandbox/memory_s3.py @@ -31,7 +31,7 @@ from examples.sandbox.basic import _import_docker_from_env from examples.sandbox.docker.mounts.mount_smoke import IMAGE as MOUNT_IMAGE, ensure_mount_image -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" DEFAULT_MOUNT_DIR = "persistent" FIRST_PROMPT = "Inspect workspace and fix invoice total bug in src/acme_metrics/report.py." SECOND_PROMPT = ( diff --git a/examples/sandbox/sandbox_agent_capabilities.py b/examples/sandbox/sandbox_agent_capabilities.py index 4d00ab6310..2751d5cc15 100644 --- a/examples/sandbox/sandbox_agent_capabilities.py +++ b/examples/sandbox/sandbox_agent_capabilities.py @@ -55,7 +55,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parents[2])) -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" COMPACTION_THRESHOLD = 1_000 VERIFICATION_FILE = Path("verification/capabilities.txt") DELETE_FILE = Path("verification/delete-me.txt") @@ -211,7 +211,7 @@ def _configure_filesystem(toolset: FilesystemToolSet): f"5. Create `{VERIFICATION_FILE.as_posix()}` with exactly these two lines:\n" " skill_loaded=true\n" " codename=atlas\n" - "6. Update that file so it has exactly these four lines:\n" + "6. Use the apply_patch tool to update that file so it has exactly these four lines:\n" " skill_loaded=true\n" " codename=atlas\n" " note_source=filesystem\n" diff --git a/examples/sandbox/sandbox_agent_with_remote_snapshot.py b/examples/sandbox/sandbox_agent_with_remote_snapshot.py index 95f651587b..902715602a 100644 --- a/examples/sandbox/sandbox_agent_with_remote_snapshot.py +++ b/examples/sandbox/sandbox_agent_with_remote_snapshot.py @@ -167,7 +167,7 @@ async def main(model: str) -> None: if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") args = parser.parse_args() asyncio.run(main(args.model)) diff --git a/examples/sandbox/sandbox_agent_with_tools.py b/examples/sandbox/sandbox_agent_with_tools.py index a9dceb8326..508d35a58d 100644 --- a/examples/sandbox/sandbox_agent_with_tools.py +++ b/examples/sandbox/sandbox_agent_with_tools.py @@ -109,7 +109,7 @@ async def main(model: str, question: str) -> None: if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") args = parser.parse_args() diff --git a/examples/sandbox/sandbox_agents_as_tools.py b/examples/sandbox/sandbox_agents_as_tools.py index 777b4c8295..5740308bd3 100644 --- a/examples/sandbox/sandbox_agents_as_tools.py +++ b/examples/sandbox/sandbox_agents_as_tools.py @@ -13,6 +13,7 @@ from pathlib import Path from typing import Literal +from openai.types.shared import Reasoning from pydantic import BaseModel, Field from agents import Agent, ModelSettings, Runner, function_tool @@ -130,7 +131,7 @@ async def main(model: str, question: str) -> None: ), default_manifest=pricing_manifest, capabilities=[WorkspaceShellCapability()], - model_settings=ModelSettings(tool_choice="required"), + model_settings=ModelSettings(tool_choice="required", reasoning=Reasoning(effort="none")), output_type=PricingPacketReview, ) rollout_agent = SandboxAgent( @@ -146,7 +147,7 @@ async def main(model: str, question: str) -> None: ), default_manifest=rollout_manifest, capabilities=[WorkspaceShellCapability()], - model_settings=ModelSettings(tool_choice="required"), + model_settings=ModelSettings(tool_choice="required", reasoning=Reasoning(effort="none")), output_type=RolloutRiskReview, ) @@ -165,25 +166,27 @@ async def main(model: str, question: str) -> None: "recommendation, use only facts and numbers that appear in the tool outputs, and do " "not add any extra incidents, price points, or contract terms." ), - model_settings=ModelSettings(tool_choice="required"), + model_settings=ModelSettings(tool_choice="required", reasoning=Reasoning(effort="none")), tools=[ pricing_agent.as_tool( tool_name="review_pricing_packet", tool_description="Inspect the pricing packet and summarize commercial risk.", custom_output_extractor=_structured_tool_output_extractor, run_config=pricing_run_config, + max_turns=6, ), rollout_agent.as_tool( tool_name="review_rollout_risk", tool_description="Inspect the rollout packet and summarize implementation risk.", custom_output_extractor=_structured_tool_output_extractor, run_config=rollout_run_config, + max_turns=6, ), get_discount_approval_rule, ], ) - result = await Runner.run(orchestrator, question) + result = await Runner.run(orchestrator, question, max_turns=8) tool_names = [ tool_call_name(item.raw_item) for item in result.new_items @@ -196,7 +199,7 @@ async def main(model: str, question: str) -> None: if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") args = parser.parse_args() diff --git a/examples/sandbox/tax_prep.py b/examples/sandbox/tax_prep.py index 6028913db3..047f4a9cb9 100644 --- a/examples/sandbox/tax_prep.py +++ b/examples/sandbox/tax_prep.py @@ -227,7 +227,7 @@ async def main( if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--image", default=DEFAULT_IMAGE, help="Docker image for the sandbox.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") parser.add_argument( diff --git a/examples/sandbox/tutorials/sandbox_resume/README.md b/examples/sandbox/tutorials/sandbox_resume/README.md index 323849ed8f..46d7ac8e32 100644 --- a/examples/sandbox/tutorials/sandbox_resume/README.md +++ b/examples/sandbox/tutorials/sandbox_resume/README.md @@ -25,7 +25,7 @@ and resume step stay easy to follow. You can override the model or prompt: ```bash -uv run python examples/sandbox/tutorials/sandbox_resume/main.py --model gpt-5.4 --question "Build a FastAPI service that exposes a warehouse robot's maintenance status." +uv run python examples/sandbox/tutorials/sandbox_resume/main.py --model gpt-5.5 --question "Build a FastAPI service that exposes a warehouse robot's maintenance status." ``` To run the same flow in Docker, build the shared tutorial image once and pass diff --git a/examples/sandbox/unix_local_pty.py b/examples/sandbox/unix_local_pty.py index 5918f2d898..be7c9c01d3 100644 --- a/examples/sandbox/unix_local_pty.py +++ b/examples/sandbox/unix_local_pty.py @@ -26,7 +26,7 @@ from examples.sandbox.misc.example_support import tool_call_name -DEFAULT_MODEL = "gpt-5.4" +DEFAULT_MODEL = "gpt-5.5" DEFAULT_QUESTION = ( "Start an interactive Python session. In that same session, compute `5 + 5`, then add " "5 more to the previous result. Briefly report the outputs and confirm that you stayed " diff --git a/examples/sandbox/unix_local_runner.py b/examples/sandbox/unix_local_runner.py index a8ebdf8935..d9869b87d7 100644 --- a/examples/sandbox/unix_local_runner.py +++ b/examples/sandbox/unix_local_runner.py @@ -197,7 +197,7 @@ async def main(model: str, question: str, stream: bool) -> None: if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", default="gpt-5.4", help="Model name to use.") + parser.add_argument("--model", default="gpt-5.5", help="Model name to use.") parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") parser.add_argument("--stream", action="store_true", default=False, help="Stream the response.") parser.add_argument( diff --git a/examples/tools/apply_patch.py b/examples/tools/apply_patch.py index 4fa2878923..408f7ce18d 100644 --- a/examples/tools/apply_patch.py +++ b/examples/tools/apply_patch.py @@ -163,7 +163,7 @@ async def main(auto_approve: bool, model: str) -> None: ) parser.add_argument( "--model", - default="gpt-5.4", + default="gpt-5.5", help="Model ID to use for the agent.", ) args = parser.parse_args() diff --git a/examples/tools/code_interpreter.py b/examples/tools/code_interpreter.py index e4e7c09a7f..9577469a9a 100644 --- a/examples/tools/code_interpreter.py +++ b/examples/tools/code_interpreter.py @@ -16,7 +16,7 @@ async def main(): name="Code interpreter", # Note: using gpt-5-class models with streaming for this tool may require org verification. # Code interpreter does not support gpt-5 minimal reasoning effort; use default effort. - model="gpt-5.4", + model="gpt-5.5", instructions=( "Always use the code interpreter tool to solve numeric problems, and show the code " "you ran when possible." diff --git a/examples/tools/codex.py b/examples/tools/codex.py index bd5d508933..95c853e157 100644 --- a/examples/tools/codex.py +++ b/examples/tools/codex.py @@ -118,7 +118,7 @@ async def main() -> None: default_thread_options=ThreadOptions( # You can pass a Codex instance to customize CLI details # codex=Codex(executable_path="/path/to/codex", base_url="..."), - model="gpt-5.4", + model="gpt-5.5", model_reasoning_effort="low", network_access_enabled=True, web_search_enabled=False, diff --git a/examples/tools/codex_same_thread.py b/examples/tools/codex_same_thread.py index 5fd43c0da1..19cfee534c 100644 --- a/examples/tools/codex_same_thread.py +++ b/examples/tools/codex_same_thread.py @@ -73,7 +73,7 @@ async def main() -> None: name="codex_engineer", sandbox_mode="read-only", default_thread_options=ThreadOptions( - model="gpt-5.4", + model="gpt-5.5", model_reasoning_effort="low", network_access_enabled=True, web_search_enabled=False, diff --git a/examples/tools/computer_use.py b/examples/tools/computer_use.py index 0f076bba96..86256d3c5c 100644 --- a/examples/tools/computer_use.py +++ b/examples/tools/computer_use.py @@ -202,7 +202,7 @@ async def run_agent( instructions="You are a helpful agent. Find the current weather in Tokyo.", tools=[ComputerTool(computer=computer_config)], # GPT-5.4 uses the built-in Responses API computer tool. - model="gpt-5.4", + model="gpt-5.5", ) result = await Runner.run(agent, "What is the weather in Tokyo right now?") print(result.final_output) diff --git a/examples/tools/container_shell_inline_skill.py b/examples/tools/container_shell_inline_skill.py index ff974029fa..fa53675c11 100644 --- a/examples/tools/container_shell_inline_skill.py +++ b/examples/tools/container_shell_inline_skill.py @@ -110,7 +110,7 @@ async def main(model: str) -> None: parser = argparse.ArgumentParser() parser.add_argument( "--model", - default="gpt-5.4", + default="gpt-5.5", help="Model name to use.", ) args = parser.parse_args() diff --git a/examples/tools/container_shell_skill_reference.py b/examples/tools/container_shell_skill_reference.py index 4e42b94198..e1cd1396b9 100644 --- a/examples/tools/container_shell_skill_reference.py +++ b/examples/tools/container_shell_skill_reference.py @@ -105,7 +105,7 @@ async def main(model: str) -> None: parser = argparse.ArgumentParser() parser.add_argument( "--model", - default="gpt-5.4", + default="gpt-5.5", help="Model name to use.", ) args = parser.parse_args() diff --git a/examples/tools/local_shell_skill.py b/examples/tools/local_shell_skill.py index 75ca73b62c..2a1955eced 100644 --- a/examples/tools/local_shell_skill.py +++ b/examples/tools/local_shell_skill.py @@ -71,7 +71,7 @@ async def main(model: str) -> None: parser = argparse.ArgumentParser() parser.add_argument( "--model", - default="gpt-5.4", + default="gpt-5.5", help="Model name to use.", ) args = parser.parse_args() diff --git a/examples/tools/shell.py b/examples/tools/shell.py index 1fca7d6763..6fa97af3b0 100644 --- a/examples/tools/shell.py +++ b/examples/tools/shell.py @@ -135,7 +135,7 @@ async def on_shell_approval( ) parser.add_argument( "--model", - default="gpt-5.4", + default="gpt-5.5", ) args = parser.parse_args() asyncio.run(main(args.prompt, args.model)) diff --git a/examples/tools/shell_human_in_the_loop.py b/examples/tools/shell_human_in_the_loop.py index 596eafe03e..8c99b22af4 100644 --- a/examples/tools/shell_human_in_the_loop.py +++ b/examples/tools/shell_human_in_the_loop.py @@ -148,7 +148,7 @@ async def main(prompt: str, model: str) -> None: ) parser.add_argument( "--model", - default="gpt-5.4", + default="gpt-5.5", ) args = parser.parse_args() asyncio.run(main(args.prompt, args.model)) diff --git a/examples/tools/tool_search.py b/examples/tools/tool_search.py index d0d83cc210..1a15a4146b 100644 --- a/examples/tools/tool_search.py +++ b/examples/tools/tool_search.py @@ -96,7 +96,7 @@ def get_shipping_credit_balance( namespaced_agent = Agent( name="Operations assistant", - model="gpt-5.4", + model="gpt-5.5", instructions=( "For customer questions in this example, load the full `crm` namespace with no query " "filter before calling tools. " @@ -108,7 +108,7 @@ def get_shipping_credit_balance( top_level_agent = Agent( name="Shipping assistant", - model="gpt-5.4", + model="gpt-5.5", instructions=( "For ETA questions in this example, search `get_shipping_eta` before calling tools. " "Do not search `get_shipping_credit_balance` unless the user asks about shipping credits." diff --git a/examples/voice/streamed/my_workflow.py b/examples/voice/streamed/my_workflow.py index 2e0bf1c8d4..532f7867a4 100644 --- a/examples/voice/streamed/my_workflow.py +++ b/examples/voice/streamed/my_workflow.py @@ -20,7 +20,7 @@ def get_weather(city: str) -> str: instructions=prompt_with_handoff_instructions( "You're speaking to a human, so be polite and concise. Speak in Spanish.", ), - model="gpt-5.4", + model="gpt-5.5", ) agent = Agent( @@ -28,7 +28,7 @@ def get_weather(city: str) -> str: instructions=prompt_with_handoff_instructions( "You're speaking to a human, so be polite and concise. If the user speaks in Spanish, handoff to the spanish agent.", ), - model="gpt-5.4", + model="gpt-5.5", handoffs=[spanish_agent], tools=[get_weather], ) diff --git a/src/agents/models/default_models.py b/src/agents/models/default_models.py index 455aec27a5..c6d29f5abf 100644 --- a/src/agents/models/default_models.py +++ b/src/agents/models/default_models.py @@ -61,6 +61,7 @@ (re.compile(r"^gpt-5\.4-pro(?:-\d{4}-\d{2}-\d{2})?$"), "medium"), (re.compile(r"^gpt-5\.4-mini(?:-\d{4}-\d{2}-\d{2})?$"), "none"), (re.compile(r"^gpt-5\.4-nano(?:-\d{4}-\d{2}-\d{2})?$"), "none"), + (re.compile(r"^gpt-5\.5(?:-\d{4}-\d{2}-\d{2})?$"), "none"), ) diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index d40376302f..c253bb2f56 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -1748,7 +1748,9 @@ def _is_preview_computer_model(cls, model: str | ChatModel | None) -> bool: @classmethod def _is_ga_computer_model(cls, model: str | ChatModel | None) -> bool: - return isinstance(model, str) and model.startswith("gpt-5.4") + return isinstance(model, str) and ( + model.startswith("gpt-5.4") or model.startswith("gpt-5.5") + ) @classmethod def resolve_computer_tool_model( diff --git a/src/agents/sandbox/capabilities/compaction.py b/src/agents/sandbox/capabilities/compaction.py index 1682119c8c..f1860bf196 100644 --- a/src/agents/sandbox/capabilities/compaction.py +++ b/src/agents/sandbox/capabilities/compaction.py @@ -29,6 +29,7 @@ def _model_context_windows(models: tuple[str, ...], context_window: int) -> dict "gpt-5.4-2026-03-05", "gpt-5.4-pro", "gpt-5.4-pro-2026-03-05", + "gpt-5.5", "gpt-4.1", "gpt-4.1-2025-04-14", "gpt-4.1-mini", diff --git a/src/agents/sandbox/config.py b/src/agents/sandbox/config.py index 350e1a84f3..206ed459f1 100644 --- a/src/agents/sandbox/config.py +++ b/src/agents/sandbox/config.py @@ -49,7 +49,7 @@ class MemoryGenerateConfig: ) """Model settings used for phase-1 single-rollout extraction.""" - phase_two_model: str | Model = "gpt-5.4" + phase_two_model: str | Model = "gpt-5.5" """Model used for phase-2 memory consolidation.""" phase_two_model_settings: ModelSettings | None = field( diff --git a/tests/models/test_default_models.py b/tests/models/test_default_models.py index d0904cd4e2..f24ef19295 100644 --- a/tests/models/test_default_models.py +++ b/tests/models/test_default_models.py @@ -48,6 +48,7 @@ def test_gpt_5_reasoning_settings_required_detects_gpt_5_models_while_ignoring_c assert gpt_5_reasoning_settings_required("gpt-5.2-codex") is True assert gpt_5_reasoning_settings_required("gpt-5.2-pro") is True assert gpt_5_reasoning_settings_required("gpt-5.4-pro") is True + assert gpt_5_reasoning_settings_required("gpt-5.5") is True assert gpt_5_reasoning_settings_required("gpt-5-mini") is True assert gpt_5_reasoning_settings_required("gpt-5-nano") is True assert gpt_5_reasoning_settings_required("gpt-5-chat-latest") is False @@ -89,6 +90,11 @@ def test_get_default_model_settings_returns_none_reasoning_defaults_for_gpt_5_4_ assert get_default_model_settings("gpt-5.4-nano") == _gpt_5_default_settings("none") +def test_get_default_model_settings_returns_none_reasoning_defaults_for_gpt_5_5_models(): + assert get_default_model_settings("gpt-5.5") == _gpt_5_default_settings("none") + assert get_default_model_settings("gpt-5.5-2026-04-23") == _gpt_5_default_settings("none") + + def test_get_default_model_settings_returns_low_reasoning_defaults_for_base_gpt_5(): assert get_default_model_settings("gpt-5") == _gpt_5_default_settings("low") assert get_default_model_settings("gpt-5-2025-08-07") == _gpt_5_default_settings("low") diff --git a/tests/sandbox/test_compaction.py b/tests/sandbox/test_compaction.py index 3a49820327..76a7f21d2f 100644 --- a/tests/sandbox/test_compaction.py +++ b/tests/sandbox/test_compaction.py @@ -8,14 +8,17 @@ [ ("gpt-5.4", 1_047_576), ("gpt-5.4-pro", 1_047_576), + ("gpt-5.5", 1_047_576), ("gpt-5.3-codex", 400_000), ("gpt-5.4-mini", 400_000), ("gpt-4.1", 1_047_576), ("o3", 200_000), ("gpt-4o", 128_000), ("openai/gpt-5.4", 1_047_576), + ("openai/gpt-5.5", 1_047_576), ("gpt-5-2", 400_000), ("gpt-5-4", 1_047_576), + ("gpt-5-5", 1_047_576), ("openai/gpt-5-4-mini", 400_000), ("gpt-4-1-mini", 1_047_576), ], diff --git a/tests/test_openai_responses_converter.py b/tests/test_openai_responses_converter.py index a461785ede..e1c8069ec9 100644 --- a/tests/test_openai_responses_converter.py +++ b/tests/test_openai_responses_converter.py @@ -1025,9 +1025,10 @@ def test_convert_tools_includes_handoffs(): assert converted.includes == [] -def test_convert_tools_accepts_unresolved_computer_initializer(): +@pytest.mark.parametrize("model", ["gpt-5.4", "gpt-5.5"]) +def test_convert_tools_accepts_unresolved_computer_initializer(model: str): comp_tool = ComputerTool(computer=lambda **_: DummyComputer()) - converted = Converter.convert_tools(tools=[comp_tool], handoffs=[], model="gpt-5.4") + converted = Converter.convert_tools(tools=[comp_tool], handoffs=[], model=model) assert converted.tools == [{"type": "computer"}] @@ -1042,13 +1043,14 @@ def test_resolve_computer_tool_model_returns_none_when_request_model_is_omitted( assert resolved is None -def test_convert_tools_preview_tool_choice_uses_ga_payload_for_ga_model() -> None: +@pytest.mark.parametrize("model", ["gpt-5.4", "gpt-5.5"]) +def test_convert_tools_preview_tool_choice_uses_ga_payload_for_ga_model(model: str) -> None: comp_tool = ComputerTool(computer=lambda **_: DummyComputer()) converted = Converter.convert_tools( tools=[comp_tool], handoffs=[], - model="gpt-5.4", + model=model, tool_choice="computer_use_preview", )