Skip to content

Commit 43a389d

Browse files
authored
fix: skip wait_for_status when Vercel sandbox is in a terminal state (#3410)
1 parent 656baf8 commit 43a389d

2 files changed

Lines changed: 82 additions & 11 deletions

File tree

src/agents/extensions/sandbox/vercel/sandbox.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@
7979
httpx.ProtocolError,
8080
)
8181

82+
# Sandbox status values from which the sandbox can still transition to RUNNING.
83+
# Only "pending" qualifies: a freshly created sandbox transitions PENDING -> RUNNING.
84+
# Other non-RUNNING states ("stopping", "stopped", "failed", "aborted",
85+
# "snapshotting") cannot reach RUNNING, so waiting is futile.
86+
_VERCEL_TRANSIENT_SANDBOX_STATUSES: frozenset[str] = frozenset({"pending"})
87+
8288

8389
def _is_transient_create_error(exc: BaseException) -> bool:
8490
if exception_chain_has_status_code(exc, {408, 425, 429, 500, 502, 503, 504}):
@@ -754,15 +760,22 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
754760
project_id=resolved_project_id,
755761
team_id=resolved_team_id,
756762
)
757-
# XXX(scotttrinh): This will wait even if in a terminal state.
758-
# We should make wait_for_status smarter about the possible
759-
# transitions to avoid waiting for a status if it's impossible
760-
# to transition to it from the current status.
761-
await sandbox.wait_for_status(
762-
SandboxStatus.RUNNING,
763-
timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
764-
)
765-
reconnected = True
763+
current_status = str(sandbox.status)
764+
if current_status == str(SandboxStatus.RUNNING):
765+
# Already running; skip the wait entirely.
766+
reconnected = True
767+
elif current_status in _VERCEL_TRANSIENT_SANDBOX_STATUSES:
768+
# Still transitioning toward RUNNING (e.g. PENDING); wait normally.
769+
await sandbox.wait_for_status(
770+
SandboxStatus.RUNNING,
771+
timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
772+
)
773+
reconnected = True
774+
else:
775+
# Cannot reach RUNNING from here (STOPPING, STOPPED, FAILED,
776+
# ABORTED, SNAPSHOTTING). Drop the handle and recreate below.
777+
await sandbox.client.aclose()
778+
sandbox = None
766779
except TimeoutError:
767780
if sandbox is not None:
768781
await sandbox.client.aclose()

tests/extensions/sandbox/test_vercel.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -793,13 +793,70 @@ async def test_vercel_resume_reconnects_existing_running_sandbox(
793793
"team_id": None,
794794
}
795795
]
796+
assert resumed._inner.state.sandbox_id == "sandbox-existing"
797+
assert _FakeAsyncSandbox.create_calls == []
798+
# Sandbox is already RUNNING, so wait_for_status should not be called.
799+
assert existing.wait_for_status_calls == []
800+
assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001
801+
assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001
802+
803+
804+
@pytest.mark.asyncio
805+
async def test_vercel_resume_waits_when_sandbox_pending(
806+
monkeypatch: pytest.MonkeyPatch,
807+
) -> None:
808+
vercel_module = _load_vercel_module(monkeypatch)
809+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
810+
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
811+
812+
state = vercel_module.VercelSandboxSessionState(
813+
session_id="00000000-0000-0000-0000-000000000200",
814+
manifest=Manifest(),
815+
snapshot=NoopSnapshot(id="snapshot"),
816+
sandbox_id=existing.sandbox_id,
817+
)
818+
819+
client = vercel_module.VercelSandboxClient()
820+
resumed = await client.resume(state)
821+
796822
assert resumed._inner.state.sandbox_id == "sandbox-existing"
797823
assert _FakeAsyncSandbox.create_calls == []
798824
assert existing.wait_for_status_calls == [
799825
("running", vercel_module.DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S)
800826
]
801827
assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001
802-
assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001
828+
829+
830+
@pytest.mark.asyncio
831+
@pytest.mark.parametrize(
832+
"terminal_status", ["stopping", "stopped", "failed", "aborted", "snapshotting"]
833+
)
834+
async def test_vercel_resume_recreates_sandbox_when_cannot_reach_running(
835+
monkeypatch: pytest.MonkeyPatch,
836+
terminal_status: str,
837+
) -> None:
838+
"""A sandbox in any state that cannot transition to RUNNING must be recreated
839+
immediately, without waiting for the wait_for_status timeout."""
840+
vercel_module = _load_vercel_module(monkeypatch)
841+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-terminal", status=terminal_status)
842+
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
843+
844+
state = vercel_module.VercelSandboxSessionState(
845+
session_id="00000000-0000-0000-0000-000000000201",
846+
manifest=Manifest(),
847+
snapshot=NoopSnapshot(id="snapshot"),
848+
sandbox_id=existing.sandbox_id,
849+
)
850+
851+
client = vercel_module.VercelSandboxClient()
852+
resumed = await client.resume(state)
853+
854+
assert existing.wait_for_status_calls == []
855+
assert existing.client.closed is True
856+
assert len(_FakeAsyncSandbox.create_calls) == 1
857+
assert resumed._inner.state.sandbox_id != "sandbox-terminal"
858+
assert resumed._inner.state.workspace_root_ready is False
859+
assert resumed._inner._workspace_state_preserved_on_start() is False # noqa: SLF001
803860

804861

805862
@pytest.mark.asyncio
@@ -837,7 +894,8 @@ async def test_vercel_resume_recreates_sandbox_after_wait_timeout(
837894
monkeypatch: pytest.MonkeyPatch,
838895
) -> None:
839896
vercel_module = _load_vercel_module(monkeypatch)
840-
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing")
897+
# Use "pending" so that the code enters the wait path (not already RUNNING).
898+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
841899
existing.wait_for_status_error = TimeoutError()
842900
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
843901

0 commit comments

Comments
 (0)