Skip to content

Commit 8c3526a

Browse files
committed
fix: skip wait_for_status when Vercel sandbox is in a terminal state
1 parent 656baf8 commit 8c3526a

2 files changed

Lines changed: 81 additions & 11 deletions

File tree

src/agents/extensions/sandbox/vercel/sandbox.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@
7979
httpx.ProtocolError,
8080
)
8181

82+
# Sandbox status values that can still transition to RUNNING (non-terminal).
83+
# Terminal states (e.g. "stopped", "failed") are not included because a sandbox
84+
# in those states can never become RUNNING, so waiting is futile.
85+
_VERCEL_TRANSIENT_SANDBOX_STATUSES: frozenset[str] = frozenset({"pending", "stopping"})
86+
8287

8388
def _is_transient_create_error(exc: BaseException) -> bool:
8489
if exception_chain_has_status_code(exc, {408, 425, 429, 500, 502, 503, 504}):
@@ -754,15 +759,21 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
754759
project_id=resolved_project_id,
755760
team_id=resolved_team_id,
756761
)
757-
# XXX(scotttrinh): This will wait even if in a terminal state.
758-
# We should make wait_for_status smarter about the possible
759-
# transitions to avoid waiting for a status if it's impossible
760-
# to transition to it from the current status.
761-
await sandbox.wait_for_status(
762-
SandboxStatus.RUNNING,
763-
timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
764-
)
765-
reconnected = True
762+
current_status = str(sandbox.status)
763+
if current_status == str(SandboxStatus.RUNNING):
764+
# Already running; skip the wait entirely.
765+
reconnected = True
766+
elif current_status in _VERCEL_TRANSIENT_SANDBOX_STATUSES:
767+
# Still transitioning toward RUNNING; wait normally.
768+
await sandbox.wait_for_status(
769+
SandboxStatus.RUNNING,
770+
timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
771+
)
772+
reconnected = True
773+
else:
774+
# Terminal state (e.g. "stopped", "failed"): cannot reach RUNNING.
775+
await sandbox.client.aclose()
776+
sandbox = None
766777
except TimeoutError:
767778
if sandbox is not None:
768779
await sandbox.client.aclose()

tests/extensions/sandbox/test_vercel.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -793,13 +793,71 @@ async def test_vercel_resume_reconnects_existing_running_sandbox(
793793
"team_id": None,
794794
}
795795
]
796+
assert resumed._inner.state.sandbox_id == "sandbox-existing"
797+
assert _FakeAsyncSandbox.create_calls == []
798+
# Sandbox is already RUNNING, so wait_for_status should not be called.
799+
assert existing.wait_for_status_calls == []
800+
assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001
801+
assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001
802+
803+
804+
@pytest.mark.asyncio
805+
@pytest.mark.parametrize("transient_status", ["pending", "stopping"])
806+
async def test_vercel_resume_waits_when_sandbox_in_transient_state(
807+
monkeypatch: pytest.MonkeyPatch,
808+
transient_status: str,
809+
) -> None:
810+
vercel_module = _load_vercel_module(monkeypatch)
811+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status=transient_status)
812+
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
813+
814+
state = vercel_module.VercelSandboxSessionState(
815+
session_id="00000000-0000-0000-0000-000000000200",
816+
manifest=Manifest(),
817+
snapshot=NoopSnapshot(id="snapshot"),
818+
sandbox_id=existing.sandbox_id,
819+
)
820+
821+
client = vercel_module.VercelSandboxClient()
822+
resumed = await client.resume(state)
823+
796824
assert resumed._inner.state.sandbox_id == "sandbox-existing"
797825
assert _FakeAsyncSandbox.create_calls == []
798826
assert existing.wait_for_status_calls == [
799827
("running", vercel_module.DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S)
800828
]
801829
assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001
802-
assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001
830+
831+
832+
@pytest.mark.asyncio
833+
@pytest.mark.parametrize("terminal_status", ["stopped", "failed"])
834+
async def test_vercel_resume_recreates_sandbox_when_in_terminal_state(
835+
monkeypatch: pytest.MonkeyPatch,
836+
terminal_status: str,
837+
) -> None:
838+
vercel_module = _load_vercel_module(monkeypatch)
839+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-terminal", status=terminal_status)
840+
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
841+
842+
state = vercel_module.VercelSandboxSessionState(
843+
session_id="00000000-0000-0000-0000-000000000201",
844+
manifest=Manifest(),
845+
snapshot=NoopSnapshot(id="snapshot"),
846+
sandbox_id=existing.sandbox_id,
847+
)
848+
849+
client = vercel_module.VercelSandboxClient()
850+
resumed = await client.resume(state)
851+
852+
# Should NOT have waited for status — the sandbox is already terminal.
853+
assert existing.wait_for_status_calls == []
854+
# Client must be closed before abandoning the sandbox.
855+
assert existing.client.closed is True
856+
# A new sandbox must have been created to replace the terminal one.
857+
assert len(_FakeAsyncSandbox.create_calls) == 1
858+
assert resumed._inner.state.sandbox_id != "sandbox-terminal"
859+
assert resumed._inner.state.workspace_root_ready is False
860+
assert resumed._inner._workspace_state_preserved_on_start() is False # noqa: SLF001
803861

804862

805863
@pytest.mark.asyncio
@@ -837,7 +895,8 @@ async def test_vercel_resume_recreates_sandbox_after_wait_timeout(
837895
monkeypatch: pytest.MonkeyPatch,
838896
) -> None:
839897
vercel_module = _load_vercel_module(monkeypatch)
840-
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing")
898+
# Use "pending" so that the code enters the wait path (not already RUNNING).
899+
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
841900
existing.wait_for_status_error = TimeoutError()
842901
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing
843902

0 commit comments

Comments
 (0)