Skip to content

Commit 1b21d81

Browse files
committed
Fix Runloop rollout setup cleanup
1 parent d0d1ce2 commit 1b21d81

6 files changed

Lines changed: 229 additions & 25 deletions

File tree

.env.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,8 @@ FIREWORKS_API_KEY="your_fireworks_api_key_here"
2222
# Runloop API Key (if hosting remote rollout servers in Runloop Devboxes)
2323
# RUNLOOP_API_KEY="your_runloop_api_key_here"
2424

25+
# Optional: Runloop blueprint used by examples/runloop_remote_rollout/test_eval.py
26+
# RUNLOOP_BLUEPRINT_ID="your_runloop_blueprint_id_here"
27+
2528
# Other environment variables your custom reward functions might need
2629
# MY_CUSTOM_SERVICE_API_KEY="some_other_key"

docs/integrations/runloop_remote_rollout.mdx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,19 @@ async def test_my_eval(row):
3939

4040
The server command must bind to `0.0.0.0` on the configured port so the Runloop tunnel can reach it. The server must expose `POST /init` and should use `FireworksTracingHttpHandler` plus `RolloutIdFilter` to publish rollout completion status.
4141

42+
## Creating A Blueprint
43+
44+
`blueprint_id` is required when you want `RunloopRolloutProcessor` to create a fresh Devbox for each eval invocation. The blueprint should contain the rollout server code and its Python dependencies.
45+
46+
The included example can create a blueprint for a new Runloop account:
47+
48+
```bash
49+
export RUNLOOP_API_KEY=...
50+
eval "$(python examples/runloop_remote_rollout/create_blueprint.py)"
51+
```
52+
53+
That helper uploads the current repository as a temporary Runloop build context and builds a Python image with `eval-protocol[runloop]` installed. Use the printed `RUNLOOP_BLUEPRINT_ID` with `examples/runloop_remote_rollout/test_eval.py`.
54+
4255
## Existing Devboxes
4356

4457
You can attach to an existing Devbox instead of creating one from a blueprint:

eval_protocol/pytest/runloop_rollout_processor.py

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -99,28 +99,34 @@ def setup(self) -> None:
9999
)
100100

101101
RunloopSDK = _load_runloop_sdk()
102-
self._client = RunloopSDK(bearer_token=api_key)
103-
104-
if self._devbox_id:
105-
self._devbox = self._client.devbox.from_id(self._devbox_id)
106-
self._owns_devbox = False
107-
else:
108-
assert self._blueprint_id is not None
109-
self._devbox = self._client.devbox.create_from_blueprint_id(self._blueprint_id)
110-
self._owns_devbox = True
111-
112-
self._await_running()
113-
tunnel = self._create_tunnel()
114-
self._remote_base_url = self._derive_remote_base_url(tunnel)
115-
self._server_execution = self._devbox.cmd.exec_async(self._server_command)
116-
self._wait_for_server_startup()
117-
self._remote_processor = RemoteRolloutProcessor(
118-
remote_base_url=self._remote_base_url,
119-
model_base_url=self._model_base_url,
120-
poll_interval=self._poll_interval,
121-
timeout_seconds=self._timeout_seconds,
122-
include_payloads=self._include_payloads,
123-
)
102+
client: Any = RunloopSDK(bearer_token=api_key)
103+
self._client = client
104+
105+
try:
106+
if self._devbox_id:
107+
devbox = client.devbox.from_id(self._devbox_id)
108+
self._owns_devbox = False
109+
else:
110+
assert self._blueprint_id is not None
111+
devbox = client.devbox.create_from_blueprint_id(self._blueprint_id)
112+
self._owns_devbox = True
113+
114+
self._devbox = devbox
115+
self._await_running()
116+
tunnel = self._create_tunnel()
117+
self._remote_base_url = self._derive_remote_base_url(tunnel)
118+
self._server_execution = devbox.cmd.exec_async(self._server_command)
119+
self._wait_for_server_startup()
120+
self._remote_processor = RemoteRolloutProcessor(
121+
remote_base_url=self._remote_base_url,
122+
model_base_url=self._model_base_url,
123+
poll_interval=self._poll_interval,
124+
timeout_seconds=self._timeout_seconds,
125+
include_payloads=self._include_payloads,
126+
)
127+
except Exception:
128+
self._cleanup_partial_setup()
129+
raise
124130

125131
def __call__(self, rows: list[EvaluationRow], config: RolloutProcessorConfig) -> list[asyncio.Task[EvaluationRow]]:
126132
if self._remote_processor is None:
@@ -149,6 +155,7 @@ def _await_running(self) -> None:
149155
await_running()
150156

151157
def _create_tunnel(self) -> Any:
158+
assert self._devbox is not None
152159
net = self._devbox.net
153160
create_tunnel = getattr(net, "create_tunnel", None)
154161
if create_tunnel is not None:
@@ -190,8 +197,11 @@ def _wait_for_server_startup(self) -> None:
190197
with urllib.request.urlopen(request, timeout=min(5.0, self._startup_timeout_seconds)) as response:
191198
response.read(1)
192199
return
193-
except urllib.error.HTTPError:
194-
return
200+
except urllib.error.HTTPError as exc:
201+
if exc.code < 500:
202+
return
203+
last_error = exc
204+
time.sleep(min(1.0, max(0.0, deadline - time.monotonic())))
195205
except Exception as exc:
196206
last_error = exc
197207
time.sleep(min(1.0, max(0.0, deadline - time.monotonic())))
@@ -214,3 +224,15 @@ def _shutdown_devbox(self) -> None:
214224
return
215225
self._devbox.shutdown()
216226
self._shutdown_complete = True
227+
228+
def _cleanup_partial_setup(self) -> None:
229+
if self._remote_processor is not None:
230+
self._remote_processor.cleanup()
231+
self._remote_processor = None
232+
if self._should_shutdown_devbox():
233+
self._shutdown_devbox()
234+
self._devbox = None
235+
self._server_execution = None
236+
self._remote_base_url = None
237+
self._owns_devbox = False
238+
self._shutdown_complete = False

examples/runloop_remote_rollout/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ export FIREWORKS_API_KEY=...
1313
Create a Runloop blueprint that contains this repository and its Python dependencies, then set `RUNLOOP_BLUEPRINT_ID`:
1414

1515
```bash
16-
export RUNLOOP_BLUEPRINT_ID=bpt_your_blueprint_id
16+
eval "$(python examples/runloop_remote_rollout/create_blueprint.py)"
1717
pytest examples/runloop_remote_rollout/test_eval.py
1818
```
1919

20+
The blueprint ID matters because `RunloopRolloutProcessor` uses it to create a Devbox that already has this repository and `eval-protocol[runloop]` installed. If you already have a suitable running Devbox, you can pass `devbox_id` to `RunloopRolloutProcessor` instead and skip `RUNLOOP_BLUEPRINT_ID`.
21+
2022
The processor starts:
2123

2224
```bash
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from __future__ import annotations
2+
3+
import argparse
4+
import tarfile
5+
from datetime import timedelta
6+
from pathlib import Path
7+
8+
from runloop_api_client import RunloopSDK
9+
from runloop_api_client.types.blueprint_build_parameters import BuildContext
10+
11+
12+
DEFAULT_DOCKERFILE = """\
13+
FROM python:3.12-slim
14+
WORKDIR /workspace
15+
COPY . /workspace
16+
RUN pip install --no-cache-dir ".[runloop]"
17+
"""
18+
19+
IGNORED_CONTEXT_DIRS = {
20+
".git",
21+
".mypy_cache",
22+
".pytest_cache",
23+
".ruff_cache",
24+
".tox",
25+
".venv",
26+
"__pycache__",
27+
"node_modules",
28+
}
29+
30+
31+
def _ignore_build_context(member: tarfile.TarInfo) -> tarfile.TarInfo | None:
32+
parts = set(Path(member.name).parts)
33+
if parts & IGNORED_CONTEXT_DIRS:
34+
return None
35+
return member
36+
37+
38+
def main() -> None:
39+
parser = argparse.ArgumentParser(description="Create a Runloop blueprint for the remote rollout example.")
40+
parser.add_argument(
41+
"--repo-root",
42+
type=Path,
43+
default=Path(__file__).resolve().parents[2],
44+
help="Path to the eval-protocol repository root.",
45+
)
46+
parser.add_argument(
47+
"--name",
48+
default="eval-protocol-runloop-remote-rollout",
49+
help="Runloop blueprint name.",
50+
)
51+
args = parser.parse_args()
52+
53+
runloop = RunloopSDK()
54+
build_context = runloop.storage_object.upload_from_dir(
55+
args.repo_root,
56+
name=f"{args.name}.tar.gz",
57+
ttl=timedelta(hours=1),
58+
ignore=_ignore_build_context,
59+
)
60+
blueprint = runloop.blueprint.create(
61+
name=args.name,
62+
dockerfile=DEFAULT_DOCKERFILE,
63+
build_context=BuildContext(type="object", object_id=build_context.id),
64+
)
65+
66+
print(f"export RUNLOOP_BLUEPRINT_ID={blueprint.id}")
67+
68+
69+
if __name__ == "__main__":
70+
main()

tests/pytest/test_runloop_rollout_processor.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
from types import SimpleNamespace
3+
import urllib.error
34

45
import pytest
56

@@ -211,6 +212,99 @@ def _raise_missing_dependency():
211212
processor.setup()
212213

213214

215+
def test_setup_cleans_up_owned_devbox_after_startup_failure(fake_runloop, monkeypatch):
216+
processor = RunloopRolloutProcessor(
217+
blueprint_id="bp-123",
218+
server_command="python server.py",
219+
startup_timeout_seconds=0,
220+
)
221+
222+
def _fail_startup():
223+
raise TimeoutError("server did not start")
224+
225+
monkeypatch.setattr(processor, "_wait_for_server_startup", _fail_startup)
226+
227+
with pytest.raises(TimeoutError, match="server did not start"):
228+
processor.setup()
229+
230+
assert fake_runloop.shutdown_calls == ["devbox-created"]
231+
assert processor.remote_base_url is None
232+
assert FakeRemoteRolloutProcessor.instances == []
233+
234+
235+
def test_setup_does_not_shutdown_existing_devbox_after_startup_failure(fake_runloop, monkeypatch):
236+
processor = RunloopRolloutProcessor(
237+
devbox_id="devbox-existing",
238+
server_command="python server.py",
239+
startup_timeout_seconds=0,
240+
)
241+
242+
def _fail_startup():
243+
raise TimeoutError("server did not start")
244+
245+
monkeypatch.setattr(processor, "_wait_for_server_startup", _fail_startup)
246+
247+
with pytest.raises(TimeoutError, match="server did not start"):
248+
processor.setup()
249+
250+
assert fake_runloop.shutdown_calls == []
251+
252+
253+
def test_startup_wait_retries_5xx_http_errors(monkeypatch):
254+
processor = RunloopRolloutProcessor(
255+
devbox_id="devbox-existing",
256+
server_command="python server.py",
257+
startup_timeout_seconds=5,
258+
)
259+
processor._remote_base_url = "https://8000-test-tunnel-key.tunnel.runloop.ai"
260+
261+
calls = []
262+
263+
class ReadyResponse:
264+
def __enter__(self):
265+
return self
266+
267+
def __exit__(self, exc_type, exc, traceback):
268+
return None
269+
270+
def read(self, size):
271+
return b"o"
272+
273+
def _urlopen(request, timeout):
274+
calls.append((request.full_url, timeout))
275+
if len(calls) == 1:
276+
raise urllib.error.HTTPError(request.full_url, 503, "Service Unavailable", hdrs=None, fp=None)
277+
return ReadyResponse()
278+
279+
monkeypatch.setattr(runloop_rollout_processor_module.urllib.request, "urlopen", _urlopen)
280+
monkeypatch.setattr(runloop_rollout_processor_module.time, "sleep", lambda seconds: None)
281+
282+
processor._wait_for_server_startup()
283+
284+
assert len(calls) == 2
285+
286+
287+
def test_startup_wait_accepts_non_5xx_http_errors(monkeypatch):
288+
processor = RunloopRolloutProcessor(
289+
devbox_id="devbox-existing",
290+
server_command="python server.py",
291+
startup_timeout_seconds=5,
292+
)
293+
processor._remote_base_url = "https://8000-test-tunnel-key.tunnel.runloop.ai"
294+
295+
calls = []
296+
297+
def _urlopen(request, timeout):
298+
calls.append((request.full_url, timeout))
299+
raise urllib.error.HTTPError(request.full_url, 404, "Not Found", hdrs=None, fp=None)
300+
301+
monkeypatch.setattr(runloop_rollout_processor_module.urllib.request, "urlopen", _urlopen)
302+
303+
processor._wait_for_server_startup()
304+
305+
assert len(calls) == 1
306+
307+
214308
@pytest.mark.asyncio
215309
async def test_async_cleanup_closes_remote_processor_and_owned_devbox(fake_runloop):
216310
processor = RunloopRolloutProcessor(

0 commit comments

Comments
 (0)