Skip to content

Commit e38d3d7

Browse files
authored
Merge branch 'main' into feat/sprites-sandbox
2 parents 65e083b + 5e71d09 commit e38d3d7

8 files changed

Lines changed: 131 additions & 16 deletions

File tree

src/agents/agent_output.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,15 +180,16 @@ def _is_subclass_of_base_model_or_dict(t: Any) -> bool:
180180
return issubclass(t, BaseModel | dict)
181181

182182

183-
def _type_to_str(t: type[Any]) -> str:
183+
def _type_to_str(t: Any) -> str:
184184
origin = get_origin(t)
185185
args = get_args(t)
186186

187187
if origin is None:
188188
# It's a simple type like `str`, `int`, etc.
189-
return t.__name__
189+
return getattr(t, "__name__", repr(t))
190190
elif args:
191191
args_str = ", ".join(_type_to_str(arg) for arg in args)
192-
return f"{origin.__name__}[{args_str}]"
192+
origin_name = getattr(origin, "__name__", str(origin))
193+
return f"{origin_name}[{args_str}]"
193194
else:
194195
return str(t)

src/agents/extensions/handoff_filters.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def _remove_tool_types_from_input(
104104
"apply_patch_call_output",
105105
"custom_tool_call",
106106
"custom_tool_call_output",
107+
"hosted_tool_call",
107108
]
108109

109110
filtered_items: list[TResponseInputItem] = []

src/agents/extensions/sandbox/vercel/sandbox.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@
7979
httpx.ProtocolError,
8080
)
8181

82+
# Sandbox status values from which the sandbox can still transition to RUNNING.
83+
# Only "pending" qualifies: a freshly created sandbox transitions PENDING -> RUNNING.
84+
# Other non-RUNNING states ("stopping", "stopped", "failed", "aborted",
85+
# "snapshotting") cannot reach RUNNING, so waiting is futile.
86+
_VERCEL_TRANSIENT_SANDBOX_STATUSES: frozenset[str] = frozenset({"pending"})
87+
8288

8389
def _is_transient_create_error(exc: BaseException) -> bool:
8490
if exception_chain_has_status_code(exc, {408, 425, 429, 500, 502, 503, 504}):
@@ -754,15 +760,22 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
754760
project_id=resolved_project_id,
755761
team_id=resolved_team_id,
756762
)
757-
# XXX(scotttrinh): This will wait even if in a terminal state.
758-
# We should make wait_for_status smarter about the possible
759-
# transitions to avoid waiting for a status if it's impossible
760-
# to transition to it from the current status.
761-
await sandbox.wait_for_status(
762-
SandboxStatus.RUNNING,
763-
timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
764-
)
765-
reconnected = True
763+
current_status = str(sandbox.status)
764+
if current_status == str(SandboxStatus.RUNNING):
765+
# Already running; skip the wait entirely.
766+
reconnected = True
767+
elif current_status in _VERCEL_TRANSIENT_SANDBOX_STATUSES:
768+
# Still transitioning toward RUNNING (e.g. PENDING); wait normally.
769+
await sandbox.wait_for_status(
770+
SandboxStatus.RUNNING,
771+
timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
772+
)
773+
reconnected = True
774+
else:
775+
# Cannot reach RUNNING from here (STOPPING, STOPPED, FAILED,
776+
# ABORTED, SNAPSHOTTING). Drop the handle and recreate below.
777+
await sandbox.client.aclose()
778+
sandbox = None
766779
except TimeoutError:
767780
if sandbox is not None:
768781
await sandbox.client.aclose()

src/agents/items.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,12 @@ def extract_last_content(cls, message: TResponseOutputItem) -> str:
684684
return ""
685685
last_content = message.content[-1]
686686
if isinstance(last_content, ResponseOutputText):
687-
return last_content.text
687+
# ``last_content.text`` is typed as ``str`` per the Responses API schema,
688+
# but provider gateways (e.g. LiteLLM) and ``model_construct`` paths during
689+
# streaming have been observed surfacing ``None``. Coerce so callers relying
690+
# on the ``-> str`` return type don't see a ``None``. Same rationale as
691+
# ``extract_text`` below.
692+
return last_content.text or ""
688693
elif isinstance(last_content, ResponseOutputRefusal):
689694
return last_content.refusal
690695
else:

tests/extensions/sandbox/test_vercel.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -793,13 +793,70 @@ async def test_vercel_resume_reconnects_existing_running_sandbox(
793793
"team_id": None,
794794
}
795795
]
796+
assert resumed._inner.state.sandbox_id == "sandbox-existing"
797+
assert _FakeAsyncSandbox.create_calls == []
798+
# Sandbox is already RUNNING, so wait_for_status should not be called.
799+
assert existing.wait_for_status_calls == []
800+
assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001
801+
assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001
802+
803+
804+
@pytest.mark.asyncio
805+
async def test_vercel_resume_waits_when_sandbox_pending(
806+
monkeypatch: pytest.MonkeyPatch,
807+
) -> None:
808+
vercel_module = _load_vercel_module(monkeypatch)
809+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
810+
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
811+
812+
state = vercel_module.VercelSandboxSessionState(
813+
session_id="00000000-0000-0000-0000-000000000200",
814+
manifest=Manifest(),
815+
snapshot=NoopSnapshot(id="snapshot"),
816+
sandbox_id=existing.sandbox_id,
817+
)
818+
819+
client = vercel_module.VercelSandboxClient()
820+
resumed = await client.resume(state)
821+
796822
assert resumed._inner.state.sandbox_id == "sandbox-existing"
797823
assert _FakeAsyncSandbox.create_calls == []
798824
assert existing.wait_for_status_calls == [
799825
("running", vercel_module.DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S)
800826
]
801827
assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001
802-
assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001
828+
829+
830+
@pytest.mark.asyncio
831+
@pytest.mark.parametrize(
832+
"terminal_status", ["stopping", "stopped", "failed", "aborted", "snapshotting"]
833+
)
834+
async def test_vercel_resume_recreates_sandbox_when_cannot_reach_running(
835+
monkeypatch: pytest.MonkeyPatch,
836+
terminal_status: str,
837+
) -> None:
838+
"""A sandbox in any state that cannot transition to RUNNING must be recreated
839+
immediately, without waiting for the wait_for_status timeout."""
840+
vercel_module = _load_vercel_module(monkeypatch)
841+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-terminal", status=terminal_status)
842+
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
843+
844+
state = vercel_module.VercelSandboxSessionState(
845+
session_id="00000000-0000-0000-0000-000000000201",
846+
manifest=Manifest(),
847+
snapshot=NoopSnapshot(id="snapshot"),
848+
sandbox_id=existing.sandbox_id,
849+
)
850+
851+
client = vercel_module.VercelSandboxClient()
852+
resumed = await client.resume(state)
853+
854+
assert existing.wait_for_status_calls == []
855+
assert existing.client.closed is True
856+
assert len(_FakeAsyncSandbox.create_calls) == 1
857+
assert resumed._inner.state.sandbox_id != "sandbox-terminal"
858+
assert resumed._inner.state.workspace_root_ready is False
859+
assert resumed._inner._workspace_state_preserved_on_start() is False # noqa: SLF001
803860

804861

805862
@pytest.mark.asyncio
@@ -837,7 +894,8 @@ async def test_vercel_resume_recreates_sandbox_after_wait_timeout(
837894
monkeypatch: pytest.MonkeyPatch,
838895
) -> None:
839896
vercel_module = _load_vercel_module(monkeypatch)
840-
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing")
897+
# Use "pending" so that the code enters the wait path (not already RUNNING).
898+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
841899
existing.wait_for_status_error = TimeoutError()
842900
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
843901

tests/test_extension_filters.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,7 @@ def test_removes_hosted_tool_types_from_input_history() -> None:
11301130
"apply_patch_call_output",
11311131
"custom_tool_call",
11321132
"custom_tool_call_output",
1133+
"hosted_tool_call",
11331134
]
11341135
input_items: list[TResponseInputItem] = [_get_message_input_item("Hello")]
11351136
for t in hosted_types:

tests/test_output_tool.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import json
2-
from typing import Any
2+
from typing import Any, Literal, cast
33

44
import pytest
55
from pydantic import BaseModel
@@ -77,6 +77,18 @@ def test_structured_output_list():
7777
assert validated == ["foo", "bar"]
7878

7979

80+
def test_structured_output_literal_name_handles_literal_values():
81+
output_schema = AgentOutputSchema(output_type=cast(type[Any], Literal["ok"]))
82+
83+
assert output_schema.name() == "Literal['ok']"
84+
85+
86+
def test_structured_output_nested_literal_name_handles_literal_values():
87+
output_schema = AgentOutputSchema(output_type=list[Literal["ok", "done"]])
88+
89+
assert output_schema.name() == "list[Literal['ok', 'done']]"
90+
91+
8092
def test_structured_output_generic_dict_is_not_wrapped():
8193
output_schema = AgentOutputSchema(output_type=dict[str, int], strict_json_schema=False)
8294
assert output_schema.output_type == dict[str, int]

tests/utils/test_pretty_print_and_items.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,30 @@ def test_text_message_outputs_handles_none_text_across_items():
3838
assert ItemHelpers.text_message_outputs(items) == "world"
3939

4040

41+
def _make_output_message(text: str | None) -> ResponseOutputMessage:
42+
return ResponseOutputMessage.model_construct(
43+
id="msg_1",
44+
role="assistant",
45+
status="completed",
46+
content=[ResponseOutputText.model_construct(type="output_text", text=text, annotations=[])],
47+
)
48+
49+
50+
def test_extract_last_content_returns_empty_string_for_none_text():
51+
"""extract_last_content is declared `-> str` and must not return None even if
52+
the underlying ResponseOutputText.text is None (observed via LiteLLM gateways
53+
and ``model_construct`` paths during streaming, per items.py:714-720)."""
54+
msg = _make_output_message(None)
55+
result = ItemHelpers.extract_last_content(msg)
56+
assert isinstance(result, str)
57+
assert result == ""
58+
59+
60+
def test_extract_last_content_returns_text_normally():
61+
msg = _make_output_message("hello")
62+
assert ItemHelpers.extract_last_content(msg) == "hello"
63+
64+
4165
def _make_run_error_details(n_input: int = 0, n_output: int = 0) -> RunErrorDetails:
4266
return RunErrorDetails(
4367
input="hi",

0 commit comments

Comments
 (0)