Skip to content

Commit 786d300

Browse files
mikasenghaasclaude
andauthored
deprecate interleaved_rollouts (#912)
* deprecate interleaved_rollouts * remove stale set_interleaved_rollouts docs Follow-up to 44df268 which deprecated interleaved_rollouts. Removes the set_interleaved_rollouts(bool) method from API reference. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent e35fe3c commit 786d300

5 files changed

Lines changed: 10 additions & 32 deletions

File tree

docs/reference.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,6 @@ Abstract base class for all environments.
308308
| `set_kwargs(**kwargs)` | Set attributes using setter methods when available |
309309
| `add_rubric(rubric)` | Add or merge rubric |
310310
| `set_max_seq_len(max_seq_len)` | Set maximum sequence length |
311-
| `set_interleaved_rollouts(bool)` | Enable/disable interleaved rollouts |
312311
| `set_score_rollouts(bool)` | Enable/disable scoring |
313312

314313
#### SingleTurnEnv

tests/test_rlm_env.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,19 @@
1414

1515
import pytest
1616
from datasets import Dataset
17+
18+
import verifiers as vf
1719
from verifiers.envs.experimental import rlm_env as rlm_module
1820
from verifiers.envs.experimental.rlm_env import (
19-
RLMEnv,
20-
RLMWorkerPaths,
2121
RLMCodeExecutionTimeout,
22+
RLMEnv,
2223
RLMSessionError,
2324
RLMSetupError,
2425
RLMWorkerError,
26+
RLMWorkerPaths,
2527
RLMWorkerRecoveryError,
2628
SubLLMEmptyModelResponseError,
2729
)
28-
import verifiers as vf
29-
3030

3131
# =============================================================================
3232
# Helpers
@@ -1198,7 +1198,6 @@ async def test_sub_llm_ignores_interleaving_and_uses_chat(self, rlm_env):
11981198
)
11991199
)
12001200

1201-
rlm_env.interleaved_rollouts = True
12021201
messages = [{"role": "user", "content": "hi"}]
12031202
state = {"sampling_args": {"max_tokens": 7}}
12041203

@@ -1381,10 +1380,8 @@ def test_interleaved_allowed_when_sub_llm_in_trajectory(self):
13811380
env = build_env(
13821381
dataset,
13831382
include_sub_llm_in_trajectory=True,
1384-
interleaved_rollouts=True,
13851383
)
13861384
assert env.include_sub_llm_in_trajectory is True
1387-
assert env.interleaved_rollouts is True
13881385

13891386
@pytest.mark.asyncio
13901387
async def test_sub_llm_steps_added_to_trajectory(self, rlm_env):

verifiers/envs/env_group.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,6 @@ def set_max_seq_len(self, max_seq_len: int | None) -> None:
328328
for env in self.envs:
329329
env.set_max_seq_len(max_seq_len)
330330

331-
def set_interleaved_rollouts(self, interleaved_rollouts: bool) -> None:
332-
"""Set the interleaved_rollouts flag for this environment group and all sub-environments."""
333-
self.interleaved_rollouts = interleaved_rollouts
334-
for env in self.envs:
335-
env.set_interleaved_rollouts(interleaved_rollouts)
336-
337331
def set_score_rollouts(self, score_rollouts: bool) -> None:
338332
"""Set the score_rollouts flag for this environment group and all sub-environments."""
339333
self.score_rollouts = score_rollouts

verifiers/envs/environment.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ def __init__(
108108
env_args: dict | None = None,
109109
map_kwargs: dict = {},
110110
max_seq_len: int | None = None,
111-
interleaved_rollouts: bool = False,
112111
score_rollouts: bool = True,
113112
**kwargs,
114113
):
@@ -144,7 +143,6 @@ def __init__(
144143
self.max_seq_len = max_seq_len
145144
self.map_kwargs = map_kwargs
146145

147-
self.set_interleaved_rollouts(interleaved_rollouts)
148146
self.set_score_rollouts(score_rollouts)
149147

150148
self.env_client: EnvClient | None = None
@@ -1228,7 +1226,7 @@ def set_kwargs(self, **kwargs) -> None:
12281226
12291227
For each kwarg, checks if a `set_{key}` method exists and calls it,
12301228
otherwise falls back to setattr. This ensures proper propagation for
1231-
attributes like `interleaved_rollouts` in EnvGroup.
1229+
attributes like `score_rollouts` in EnvGroup.
12321230
"""
12331231
for key, value in kwargs.items():
12341232
setter_name = f"set_{key}"
@@ -1250,13 +1248,9 @@ def set_max_seq_len(self, max_seq_len: int | None) -> None:
12501248
"""Set the maximum sequence length for this environment."""
12511249
self.max_seq_len = max_seq_len
12521250

1253-
def set_interleaved_rollouts(self, interleaved_rollouts: bool) -> None:
1254-
"""Set the interleaved rollouts flag for this environment."""
1255-
self.interleaved_rollouts = interleaved_rollouts
1256-
if self.interleaved_rollouts:
1257-
self.logger.warning(
1258-
f"{self.__class__.__name__} is configured to use interleaved rollouts. All model responses after the first turn will be pre-tokenized before being sent to the model. Currently, this is a hand-crafted feature for PRIME-RL's vLLM server extension."
1259-
)
1251+
def set_score_rollouts(self, score_rollouts: bool) -> None:
1252+
"""Set the score rollouts flag for this environment."""
1253+
self.score_rollouts = score_rollouts
12601254

12611255
async def start_server(
12621256
self,
@@ -1309,10 +1303,6 @@ async def stop_server(self) -> None:
13091303
self.env_server_process.join(timeout=5)
13101304
self.env_server_process = None
13111305

1312-
def set_score_rollouts(self, score_rollouts: bool) -> None:
1313-
"""Set the score rollouts flag for this environment."""
1314-
self.score_rollouts = score_rollouts
1315-
13161306
make_dataset = staticmethod(make_dataset)
13171307

13181308

verifiers/envs/experimental/rlm_env.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3136,9 +3136,6 @@ async def teardown_executor(self):
31363136
# State Management
31373137
# =========================================================================
31383138

3139-
def set_interleaved_rollouts(self, interleaved_rollouts: bool) -> None:
3140-
super().set_interleaved_rollouts(interleaved_rollouts)
3141-
31423139
def update_tool_args(
31433140
self,
31443141
tool_name: str,
@@ -3673,7 +3670,8 @@ async def get_model_response( # type: ignore[override]
36733670
step with incompatible tokens. We temporarily move trailing sub-LLM
36743671
steps out of the trajectory for the duration of the super call.
36753672
"""
3676-
if not (self.include_sub_llm_in_trajectory and self.interleaved_rollouts):
3673+
3674+
if not self.include_sub_llm_in_trajectory:
36773675
return await super().get_model_response(state, prompt, **kwargs)
36783676

36793677
trajectory = state.get("trajectory", [])

0 commit comments

Comments
 (0)