Merge branch 'main' into feat/sprites-sandbox

vaurdan · web-flow · commit e38d3d7dd11b · 2026-05-15T10:20:44.000+01:00
diff --git a/src/agents/agent_output.py b/src/agents/agent_output.py
@@ -180,15 +180,16 @@ def _is_subclass_of_base_model_or_dict(t: Any) -> bool:
     return issubclass(t, BaseModel | dict)
 
 
-def _type_to_str(t: type[Any]) -> str:
+def _type_to_str(t: Any) -> str:
     origin = get_origin(t)
     args = get_args(t)
 
     if origin is None:
         # It's a simple type like `str`, `int`, etc.
-        return t.__name__
+        return getattr(t, "__name__", repr(t))
     elif args:
         args_str = ", ".join(_type_to_str(arg) for arg in args)
-        return f"{origin.__name__}[{args_str}]"
+        origin_name = getattr(origin, "__name__", str(origin))
+        return f"{origin_name}[{args_str}]"
     else:
         return str(t)
diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py
@@ -104,6 +104,7 @@ def _remove_tool_types_from_input(
         "apply_patch_call_output",
         "custom_tool_call",
         "custom_tool_call_output",
+        "hosted_tool_call",
     ]
 
     filtered_items: list[TResponseInputItem] = []
diff --git a/src/agents/extensions/sandbox/vercel/sandbox.py b/src/agents/extensions/sandbox/vercel/sandbox.py
@@ -79,6 +79,12 @@
     httpx.ProtocolError,
 )
 
+# Sandbox status values from which the sandbox can still transition to RUNNING.
+# Only "pending" qualifies: a freshly created sandbox transitions PENDING -> RUNNING.
+# Other non-RUNNING states ("stopping", "stopped", "failed", "aborted",
+# "snapshotting") cannot reach RUNNING, so waiting is futile.
+_VERCEL_TRANSIENT_SANDBOX_STATUSES: frozenset[str] = frozenset({"pending"})
+
 
 def _is_transient_create_error(exc: BaseException) -> bool:
     if exception_chain_has_status_code(exc, {408, 425, 429, 500, 502, 503, 504}):
@@ -754,15 +760,22 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
                     project_id=resolved_project_id,
                     team_id=resolved_team_id,
                 )
-                # XXX(scotttrinh): This will wait even if in a terminal state.
-                # We should make wait_for_status smarter about the possible
-                # transitions to avoid waiting for a status if it's impossible
-                # to transition to it from the current status.
-                await sandbox.wait_for_status(
-                    SandboxStatus.RUNNING,
-                    timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
-                )
-                reconnected = True
+                current_status = str(sandbox.status)
+                if current_status == str(SandboxStatus.RUNNING):
+                    # Already running; skip the wait entirely.
+                    reconnected = True
+                elif current_status in _VERCEL_TRANSIENT_SANDBOX_STATUSES:
+                    # Still transitioning toward RUNNING (e.g. PENDING); wait normally.
+                    await sandbox.wait_for_status(
+                        SandboxStatus.RUNNING,
+                        timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
+                    )
+                    reconnected = True
+                else:
+                    # Cannot reach RUNNING from here (STOPPING, STOPPED, FAILED,
+                    # ABORTED, SNAPSHOTTING). Drop the handle and recreate below.
+                    await sandbox.client.aclose()
+                    sandbox = None
             except TimeoutError:
                 if sandbox is not None:
                     await sandbox.client.aclose()
diff --git a/src/agents/items.py b/src/agents/items.py
@@ -684,7 +684,12 @@ def extract_last_content(cls, message: TResponseOutputItem) -> str:
             return ""
         last_content = message.content[-1]
         if isinstance(last_content, ResponseOutputText):
-            return last_content.text
+            # ``last_content.text`` is typed as ``str`` per the Responses API schema,
+            # but provider gateways (e.g. LiteLLM) and ``model_construct`` paths during
+            # streaming have been observed surfacing ``None``. Coerce so callers relying
+            # on the ``-> str`` return type don't see a ``None``. Same rationale as
+            # ``extract_text`` below.
+            return last_content.text or ""
         elif isinstance(last_content, ResponseOutputRefusal):
             return last_content.refusal
         else:
diff --git a/tests/extensions/sandbox/test_vercel.py b/tests/extensions/sandbox/test_vercel.py
@@ -793,13 +793,70 @@ async def test_vercel_resume_reconnects_existing_running_sandbox(
             "team_id": None,
         }
     ]
+    assert resumed._inner.state.sandbox_id == "sandbox-existing"
+    assert _FakeAsyncSandbox.create_calls == []
+    # Sandbox is already RUNNING, so wait_for_status should not be called.
+    assert existing.wait_for_status_calls == []
+    assert resumed._inner._workspace_state_preserved_on_start() is True  # noqa: SLF001
+    assert resumed._inner._system_state_preserved_on_start() is True  # noqa: SLF001
+
+
+@pytest.mark.asyncio
+async def test_vercel_resume_waits_when_sandbox_pending(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    vercel_module = _load_vercel_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
+    _FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
+
+    state = vercel_module.VercelSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000200",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.sandbox_id,
+    )
+
+    client = vercel_module.VercelSandboxClient()
+    resumed = await client.resume(state)
+
     assert resumed._inner.state.sandbox_id == "sandbox-existing"
     assert _FakeAsyncSandbox.create_calls == []
     assert existing.wait_for_status_calls == [
         ("running", vercel_module.DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S)
     ]
     assert resumed._inner._workspace_state_preserved_on_start() is True  # noqa: SLF001
-    assert resumed._inner._system_state_preserved_on_start() is True  # noqa: SLF001
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "terminal_status", ["stopping", "stopped", "failed", "aborted", "snapshotting"]
+)
+async def test_vercel_resume_recreates_sandbox_when_cannot_reach_running(
+    monkeypatch: pytest.MonkeyPatch,
+    terminal_status: str,
+) -> None:
+    """A sandbox in any state that cannot transition to RUNNING must be recreated
+    immediately, without waiting for the wait_for_status timeout."""
+    vercel_module = _load_vercel_module(monkeypatch)
+    existing = _FakeAsyncSandbox(sandbox_id="sandbox-terminal", status=terminal_status)
+    _FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
+
+    state = vercel_module.VercelSandboxSessionState(
+        session_id="00000000-0000-0000-0000-000000000201",
+        manifest=Manifest(),
+        snapshot=NoopSnapshot(id="snapshot"),
+        sandbox_id=existing.sandbox_id,
+    )
+
+    client = vercel_module.VercelSandboxClient()
+    resumed = await client.resume(state)
+
+    assert existing.wait_for_status_calls == []
+    assert existing.client.closed is True
+    assert len(_FakeAsyncSandbox.create_calls) == 1
+    assert resumed._inner.state.sandbox_id != "sandbox-terminal"
+    assert resumed._inner.state.workspace_root_ready is False
+    assert resumed._inner._workspace_state_preserved_on_start() is False  # noqa: SLF001
 
 
 @pytest.mark.asyncio
@@ -837,7 +894,8 @@ async def test_vercel_resume_recreates_sandbox_after_wait_timeout(
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
     vercel_module = _load_vercel_module(monkeypatch)
-    existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing")
+    # Use "pending" so that the code enters the wait path (not already RUNNING).
+    existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
     existing.wait_for_status_error = TimeoutError()
     _FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
 
diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py
@@ -1130,6 +1130,7 @@ def test_removes_hosted_tool_types_from_input_history() -> None:
         "apply_patch_call_output",
         "custom_tool_call",
         "custom_tool_call_output",
+        "hosted_tool_call",
     ]
     input_items: list[TResponseInputItem] = [_get_message_input_item("Hello")]
     for t in hosted_types:
diff --git a/tests/test_output_tool.py b/tests/test_output_tool.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any
+from typing import Any, Literal, cast
 
 import pytest
 from pydantic import BaseModel
@@ -77,6 +77,18 @@ def test_structured_output_list():
     assert validated == ["foo", "bar"]
 
 
+def test_structured_output_literal_name_handles_literal_values():
+    output_schema = AgentOutputSchema(output_type=cast(type[Any], Literal["ok"]))
+
+    assert output_schema.name() == "Literal['ok']"
+
+
+def test_structured_output_nested_literal_name_handles_literal_values():
+    output_schema = AgentOutputSchema(output_type=list[Literal["ok", "done"]])
+
+    assert output_schema.name() == "list[Literal['ok', 'done']]"
+
+
 def test_structured_output_generic_dict_is_not_wrapped():
     output_schema = AgentOutputSchema(output_type=dict[str, int], strict_json_schema=False)
     assert output_schema.output_type == dict[str, int]
diff --git a/tests/utils/test_pretty_print_and_items.py b/tests/utils/test_pretty_print_and_items.py
@@ -38,6 +38,30 @@ def test_text_message_outputs_handles_none_text_across_items():
     assert ItemHelpers.text_message_outputs(items) == "world"
 
 
+def _make_output_message(text: str | None) -> ResponseOutputMessage:
+    return ResponseOutputMessage.model_construct(
+        id="msg_1",
+        role="assistant",
+        status="completed",
+        content=[ResponseOutputText.model_construct(type="output_text", text=text, annotations=[])],
+    )
+
+
+def test_extract_last_content_returns_empty_string_for_none_text():
+    """extract_last_content is declared `-> str` and must not return None even if
+    the underlying ResponseOutputText.text is None (observed via LiteLLM gateways
+    and ``model_construct`` paths during streaming, per items.py:714-720)."""
+    msg = _make_output_message(None)
+    result = ItemHelpers.extract_last_content(msg)
+    assert isinstance(result, str)
+    assert result == ""
+
+
+def test_extract_last_content_returns_text_normally():
+    msg = _make_output_message("hello")
+    assert ItemHelpers.extract_last_content(msg) == "hello"
+
+
 def _make_run_error_details(n_input: int = 0, n_output: int = 0) -> RunErrorDetails:
     return RunErrorDetails(
         input="hi",

Original file line number	Diff line number	Diff line change
`@@ -104,6 +104,7 @@ def _remove_tool_types_from_input(`
`104`	`104`	`"apply_patch_call_output",`
`105`	`105`	`"custom_tool_call",`
`106`	`106`	`"custom_tool_call_output",`
	`107`	`+ "hosted_tool_call",`
`107`	`108`	`]`
`108`	`109`
`109`	`110`	`filtered_items: list[TResponseInputItem] = []`
Original file line number	Diff line number	Diff line change
`@@ -1130,6 +1130,7 @@ def test_removes_hosted_tool_types_from_input_history() -> None:`
`1130`	`1130`	`"apply_patch_call_output",`
`1131`	`1131`	`"custom_tool_call",`
`1132`	`1132`	`"custom_tool_call_output",`
	`1133`	`+ "hosted_tool_call",`
`1133`	`1134`	`]`
`1134`	`1135`	`input_items: list[TResponseInputItem] = [_get_message_input_item("Hello")]`
`1135`	`1136`	`for t in hosted_types:`