Skip to content

Commit f90b2c0

Browse files
snimuclaude
andauthored
Add RLM-specific exception hierarchy to rlm_env.py (#892)
Replace generic vf.SandboxError raises with specific exception types: - RLMWorkerError: worker not running, crashed, or failed to start - RLMSessionError: session/sandbox/venv not initialized - RLMSetupError: package install, setup hook, or command failures - RLMCodeExecutionTimeout: now inherits from vf.ToolCallError instead of bare Exception, and propagates directly in abort path Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent dfd5650 commit f90b2c0

1 file changed

Lines changed: 42 additions & 35 deletions

File tree

verifiers/envs/experimental/rlm_env.py

Lines changed: 42 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,26 @@ def _merge_tool_lists(
146146
return deduped_all, deduped_map
147147

148148

149-
class RLMCodeExecutionTimeout(Exception):
149+
class RLMCodeExecutionTimeout(vf.ToolCallError):
150150
"""Raised when code execution exceeds the configured timeout."""
151151

152152

153-
class RLMWorkerRecoveryError(vf.SandboxError):
153+
class RLMWorkerError(vf.SandboxError):
154+
"""Raised when the RLM worker is not running, crashes, or fails to start."""
155+
156+
157+
class RLMWorkerRecoveryError(RLMWorkerError):
154158
"""Raised when the RLM worker cannot be restarted after a failure."""
155159

156160

161+
class RLMSessionError(vf.SandboxError):
162+
"""Raised when the RLM session, sandbox, or venv is not initialized."""
163+
164+
165+
class RLMSetupError(vf.SandboxError):
166+
"""Raised when RLM environment setup fails (package install, setup hook, etc.)."""
167+
168+
157169
@dataclass(frozen=True)
158170
class RLMWorkerPaths:
159171
base_dir: str
@@ -1566,9 +1578,9 @@ async def setup(self, state: State) -> None:
15661578
async def execute(self, payload: dict[str, Any], state: State) -> RLMExecResult:
15671579
session = self._get_session(state)
15681580
if session.worker_process is None:
1569-
raise vf.SandboxError() from Exception("RLM worker process not running")
1581+
raise RLMWorkerError("RLM worker process not running")
15701582
if session.worker_process.poll() is not None:
1571-
raise vf.SandboxError() from Exception("RLM worker process not running")
1583+
raise RLMWorkerError("RLM worker process not running")
15721584

15731585
def _do_io() -> str:
15741586
payload_json = json.dumps(payload)
@@ -1751,7 +1763,7 @@ def _get_or_create_session(self, state: State) -> LocalRLMReplSession:
17511763
def _get_session(self, state: State) -> LocalRLMReplSession:
17521764
rollout_id = state.get("rollout_id")
17531765
if not rollout_id or rollout_id not in self._sessions:
1754-
raise vf.SandboxError() from Exception("Local session not initialized")
1766+
raise RLMSessionError("Local session not initialized")
17551767
return self._sessions[rollout_id]
17561768

17571769
async def _ensure_venv(self, session: LocalRLMReplSession) -> str | None:
@@ -1791,18 +1803,16 @@ def _run() -> subprocess.CompletedProcess:
17911803

17921804
try:
17931805
result = await asyncio.to_thread(_run)
1794-
except FileNotFoundError:
1795-
raise vf.SandboxError() from RuntimeError(
1806+
except FileNotFoundError as e:
1807+
raise RLMSetupError(
17961808
"uv not found on PATH; local execution requires uv installed"
1797-
)
1798-
except subprocess.TimeoutExpired:
1799-
raise vf.SandboxError() from RuntimeError(
1800-
f"uv command timed out after {timeout} seconds"
1801-
)
1809+
) from e
1810+
except subprocess.TimeoutExpired as e:
1811+
raise RLMSetupError(f"uv command timed out after {timeout} seconds") from e
18021812
if result.returncode != 0:
18031813
stderr = (result.stderr or "").strip()
18041814
stdout = (result.stdout or "").strip()
1805-
raise vf.SandboxError() from Exception(
1815+
raise RLMSetupError(
18061816
f"uv command failed: {' '.join(args)}\nstdout: {stdout}\nstderr: {stderr}"
18071817
)
18081818

@@ -1827,7 +1837,7 @@ async def _write_local_files(
18271837

18281838
async def _start_worker(self, state: State, session: LocalRLMReplSession) -> None:
18291839
if self.env.repl_language == "python" and not session.venv_path:
1830-
raise vf.SandboxError() from Exception("Local venv not initialized")
1840+
raise RLMSessionError("Local venv not initialized")
18311841
worker_script = _render_worker_script(
18321842
session.paths, repl_language=self.env.repl_language
18331843
)
@@ -1840,7 +1850,7 @@ async def _start_worker(self, state: State, session: LocalRLMReplSession) -> Non
18401850
if self.env.repl_language == "python":
18411851
venv_path = session.venv_path
18421852
if venv_path is None:
1843-
raise vf.SandboxError() from Exception("Local venv not initialized")
1853+
raise RLMSessionError("Local venv not initialized")
18441854
python_path = self._venv_python(venv_path)
18451855
else:
18461856
python_path = sys.executable
@@ -1870,11 +1880,11 @@ async def _wait_for_ready(self, session: LocalRLMReplSession) -> None:
18701880
]
18711881
except Exception:
18721882
pass
1873-
raise vf.SandboxError() from Exception(
1883+
raise RLMWorkerError(
18741884
f"RLM worker exited before ready. Log tail:\n{log_tail}"
18751885
)
18761886
if perf_counter() - start > max_wait_seconds:
1877-
raise vf.SandboxError() from Exception("RLM worker failed to start")
1887+
raise RLMWorkerError("RLM worker failed to start")
18781888
await asyncio.sleep(0.1)
18791889

18801890
def _stop_worker(self, session: LocalRLMReplSession) -> None:
@@ -1932,7 +1942,7 @@ async def prepare_filesystem(self, state: State) -> None:
19321942
state["sandbox_state"]["ready"] = True
19331943

19341944
if not session.sandbox_id:
1935-
raise vf.SandboxError() from Exception("Sandbox not initialized")
1945+
raise RLMSessionError("Sandbox not initialized")
19361946

19371947
sandbox_fs_root = state.get("rlm_fs_root_remote")
19381948
sandbox_control_dir = state.get("rlm_control_dir_remote")
@@ -1965,9 +1975,9 @@ async def prepare_filesystem(self, state: State) -> None:
19651975
async def setup(self, state: State) -> None:
19661976
session = self._get_session(state)
19671977
if not session.sandbox_id:
1968-
raise vf.SandboxError() from Exception("Sandbox not initialized")
1978+
raise RLMSessionError("Sandbox not initialized")
19691979
if not session.paths:
1970-
raise vf.SandboxError() from Exception("Sandbox paths not initialized")
1980+
raise RLMSessionError("Sandbox paths not initialized")
19711981

19721982
await self._install_packages(session)
19731983
await self._write_sandbox_files(session, state)
@@ -1976,7 +1986,7 @@ async def setup(self, state: State) -> None:
19761986
async def execute(self, payload: dict[str, Any], state: State) -> RLMExecResult:
19771987
session = self._get_session(state)
19781988
if not session.sandbox_id or not session.paths:
1979-
raise vf.SandboxError() from Exception("Sandbox session not initialized")
1989+
raise RLMSessionError("Sandbox session not initialized")
19801990

19811991
try:
19821992
raw = await self._send_worker_request(session, payload)
@@ -2114,7 +2124,7 @@ def _get_or_create_session(self, state: State) -> SandboxRLMReplSession:
21142124
def _get_session(self, state: State) -> SandboxRLMReplSession:
21152125
rollout_id = state.get("rollout_id")
21162126
if not rollout_id or rollout_id not in self._sessions:
2117-
raise vf.SandboxError() from Exception("Sandbox session not initialized")
2127+
raise RLMSessionError("Sandbox session not initialized")
21182128
return self._sessions[rollout_id]
21192129

21202130
def _build_sandbox_request(self, state: State) -> CreateSandboxRequest:
@@ -2123,12 +2133,12 @@ def _build_sandbox_request(self, state: State) -> CreateSandboxRequest:
21232133
async def post_sandbox_setup(self, state: State) -> None:
21242134
sandbox_id = state.get("sandbox_id")
21252135
if not sandbox_id:
2126-
raise vf.SandboxError() from Exception("Sandbox not initialized")
2136+
raise RLMSessionError("Sandbox not initialized")
21272137
try:
21282138
# Allow environments to run repo/tool setup before the worker starts.
21292139
await self.env.on_sandbox_ready(state, sandbox_id)
21302140
except Exception as exc:
2131-
raise vf.SandboxError(f"Sandbox setup hook failed: {exc}") from exc
2141+
raise RLMSetupError(f"Sandbox setup hook failed: {exc}") from exc
21322142

21332143
async def _execute_sandbox_command(
21342144
self,
@@ -2153,7 +2163,7 @@ async def _execute_sandbox_command(
21532163
async def _install_packages(self, session: SandboxRLMReplSession) -> None:
21542164
sandbox_id = session.sandbox_id
21552165
if not sandbox_id:
2156-
raise vf.SandboxError() from Exception("Sandbox not initialized")
2166+
raise RLMSessionError("Sandbox not initialized")
21572167
packages = ["requests"]
21582168
extras = [p.strip() for p in self.env.pip_install_packages.split() if p.strip()]
21592169
packages.extend(extras)
@@ -2233,7 +2243,7 @@ async def _start_worker(self, session: SandboxRLMReplSession, state: State) -> N
22332243
assert session.paths is not None
22342244
sandbox_id = session.sandbox_id
22352245
if not sandbox_id:
2236-
raise vf.SandboxError() from Exception("Sandbox not initialized")
2246+
raise RLMSessionError("Sandbox not initialized")
22372247
env_vars = self.env._build_worker_env_vars(state)
22382248

22392249
exports = " ".join(
@@ -2262,7 +2272,7 @@ async def _wait_for_ready(self, session: SandboxRLMReplSession) -> None:
22622272
assert session.paths is not None
22632273
sandbox_id = session.sandbox_id
22642274
if not sandbox_id:
2265-
raise vf.SandboxError() from Exception("Sandbox not initialized")
2275+
raise RLMSessionError("Sandbox not initialized")
22662276
cmd = (
22672277
"bash -lc '"
22682278
f"for i in $(seq 1 {self.env.max_startup_wait_seconds * 10}); do "
@@ -2278,14 +2288,14 @@ async def _wait_for_ready(self, session: SandboxRLMReplSession) -> None:
22782288
)
22792289
except CommandTimeoutError as exc:
22802290
log_tail = await self._read_worker_log_tail(session)
2281-
raise vf.SandboxError(
2291+
raise RLMWorkerError(
22822292
"RLM worker failed to become ready before timeout."
22832293
+ (f"\nLog tail:\n{log_tail}" if log_tail else "")
22842294
) from exc
22852295
exit_code = getattr(result, "exit_code", 0)
22862296
if exit_code != 0:
22872297
log_tail = await self._read_worker_log_tail(session)
2288-
raise vf.SandboxError(
2298+
raise RLMWorkerError(
22892299
"RLM worker failed to become ready."
22902300
+ (f"\nLog tail:\n{log_tail}" if log_tail else "")
22912301
)
@@ -2321,9 +2331,7 @@ def _raise_on_command_error(self, result: Any, context: str) -> None:
23212331
detail += f"\nstdout:\n{stdout}"
23222332
if stderr:
23232333
detail += f"\nstderr:\n{stderr}"
2324-
raise vf.SandboxError() from RuntimeError(
2325-
f"{context} failed with exit code {exit_code}.{detail}"
2326-
)
2334+
raise RLMSetupError(f"{context} failed with exit code {exit_code}.{detail}")
23272335

23282336
async def _read_worker_log_tail(self, session: SandboxRLMReplSession) -> str:
23292337
if not session.sandbox_id or not session.paths:
@@ -2346,7 +2354,7 @@ async def _send_worker_request(
23462354
assert session.paths is not None
23472355
sandbox_id = session.sandbox_id
23482356
if not sandbox_id:
2349-
raise vf.SandboxError() from Exception("Sandbox not initialized")
2357+
raise RLMSessionError("Sandbox not initialized")
23502358
payload_json = json.dumps(payload)
23512359
payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("utf-8")
23522360
timeout_seconds = int(self.env.code_execution_timeout)
@@ -3953,8 +3961,7 @@ async def _execute_code(self, code: str, state: State) -> dict[str, Any]:
39533961
"Code execution timed out after %ss", self.code_execution_timeout
39543962
)
39553963
if self.abort_on_code_timeout:
3956-
# Abort rollout immediately on timeout
3957-
raise vf.SandboxError() from e
3964+
raise
39583965
recovered = await self._recover_from_code_timeout(state)
39593966
if not recovered:
39603967
raise RLMWorkerRecoveryError(

0 commit comments

Comments
 (0)