diff --git a/configs/debug/training_modes/README.md b/configs/debug/training_modes/README.md
index 96ccebb009..fd4b60009f 100644
--- a/configs/debug/training_modes/README.md
+++ b/configs/debug/training_modes/README.md
@@ -10,6 +10,7 @@ Minimal end-to-end configs for the three training modes (`rl` / `opd` / `sft`) a
 | `sft.toml` | `sft` | local vLLM (`Qwen3-0.6B-Reverse-Text-RL`) | |
 | `sft_lora.toml` | `sft` | local vLLM (`Qwen3-0.6B-Reverse-Text-RL`) | trains a LoRA adapter (rank 8) |
 | `sft_external.toml` | `sft` | PI inference (`openai/gpt-5-mini`) | external OAI endpoint; no local teacher |
+| `sft_replay.toml` | `sft` | none | replays saved message traces through `sft-replay` |
 
 The student inference server is auto-launched on GPU 0 at `http://localhost:8000/v1` with `gpu_memory_utilization=0.5`. The local teacher (used by everything except `rl.toml` and `sft_external.toml`) is **not** auto-launched — start it manually on GPU 1.
 
@@ -42,6 +43,9 @@ uv run rl @ configs/debug/training_modes/sft_lora.toml
 # SFT hard distill from openai/gpt-5-mini via PI inference
 # (requires PRIME_API_KEY + PRIME_TEAM_ID in env; no local teacher needed)
 uv run rl @ configs/debug/training_modes/sft_external.toml
+
+# SFT from replayed dataset traces (no teacher)
+uv run rl @ configs/debug/training_modes/sft_replay.toml
 ```
 
 See [docs/training.md](../../docs/training.md#training-modes-rl--opd--sft-via-orchestrator) for what each mode does.
diff --git a/configs/debug/training_modes/sft_replay.toml b/configs/debug/training_modes/sft_replay.toml
new file mode 100644
index 0000000000..eae996c1af
--- /dev/null
+++ b/configs/debug/training_modes/sft_replay.toml
@@ -0,0 +1,41 @@
+# Static trace SFT through the RL orchestrator. No teacher server is needed:
+# sft-replay turns dataset message rows into replayed rollout trajectories.
+
+max_steps = 20
+seq_len = 2048
+
+[model]
+name = "PrimeIntellect/Qwen3-0.6B-Reverse-Text-SFT"
+
+[wandb]
+project = "reverse-text-debug"
+name = "debug-sft-replay"
+
+[orchestrator]
+training_mode = "sft"
+batch_size = 128
+group_size = 1
+
+[[orchestrator.train.env]]
+id = "sft-replay"
+
+[orchestrator.train.env.args.taskset]
+dataset = "PrimeIntellect/Reverse-Text-SFT"
+
+[orchestrator.eval]
+interval = 1
+num_examples = 128
+
+[orchestrator.eval.sampling]
+max_completion_tokens = 128
+
+[[orchestrator.eval.env]]
+id = "reverse-text"
+
+[trainer.optim]
+lr = 3e-6
+
+[ckpt]
+
+[inference]
+gpu_memory_utilization = 0.5
diff --git a/deps/verifiers b/deps/verifiers
index 05c66c2358..14be5ee386 160000
--- a/deps/verifiers
+++ b/deps/verifiers
@@ -1 +1 @@
-Subproject commit 05c66c235875d785754f2b7078db0e7deeddbeae
+Subproject commit 14be5ee38619290968701fb8af5c87b061556e94
diff --git a/docs/training.md b/docs/training.md
index 53fc7fa7aa..a0d39ca1f6 100644
--- a/docs/training.md
+++ b/docs/training.md
@@ -89,16 +89,18 @@ The RL entrypoint supports three training modes, switched via `orchestrator.trai
 |---|---|---|---|
 | `rl` | Required | Forbidden | Standard RL |
 | `opd` | Required | Required, must be vLLM (needs `prompt_logprobs`) | [On-policy distillation](https://thinkingmachines.ai/blog/on-policy-distillation/): student generates rollouts, trainer minimizes KL to teacher logprobs |
-| `sft` | Required | Required, any OpenAI-compatible endpoint | Hard-distill: teacher generates rollouts, student trains on them |
+| `sft` | Required | Optional | Hard-distill from teacher-generated rollouts, or train from replayed message traces via `sft-replay` |
 
-The `rl` entrypoint only manages student-policy inference. For OPD and (local-vLLM) SFT, start the teacher inference server manually and point `[orchestrator.teacher.client]` at it:
+The `rl` entrypoint only manages student-policy inference. For OPD and teacher-backed local-vLLM SFT, start the teacher inference server manually and point `[orchestrator.teacher.client]` at it:
 
 ```bash
 CUDA_VISIBLE_DEVICES=1 uv run inference \
   --model.name <teacher> --server.port 8001
 ```
 
-The standalone `uv run sft` entrypoint is the more traditional SFT path — pure dataset-based, no teacher, no orchestrator. Use `orchestrator.training_mode = "sft"` only when you want a teacher to generate the supervision on the fly.
+Teacherless orchestrator SFT is valid only when every train env is `sft-replay` and each env config sets `args.taskset.dataset` (or `args.config.taskset.dataset`). In that path, the env replays stored assistant messages into trajectories without model calls, then prime-rl tokenizes them for the trainer.
+
+The standalone `uv run sft` entrypoint is the more traditional SFT path — pure dataset-based, no teacher, no orchestrator. Use `orchestrator.training_mode = "sft"` when you want teacher-generated supervision or replayed env trajectories inside the RL orchestrator/eval pipeline.
 
 ### Important Metrics
 
diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
index be5fe249f3..689133857f 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
@@ -20,6 +20,31 @@
 from prime_rl.configs.trainer import TokenizerConfig
 from prime_rl.utils.config import BaseConfig
 
+SFT_REPLAY_ENV_ID = "sft-replay"
+
+
+def _is_sft_replay_env_id(env_id: str) -> bool:
+    stripped = env_id.split("@")[0]
+    return stripped == SFT_REPLAY_ENV_ID or stripped.endswith(f"/{SFT_REPLAY_ENV_ID}")
+
+
+def _sft_replay_dataset_arg(env_args: dict) -> object | None:
+    taskset = env_args.get("taskset")
+    if isinstance(taskset, dict):
+        dataset = taskset.get("dataset")
+        if dataset:
+            return dataset
+
+    config = env_args.get("config")
+    if isinstance(config, dict):
+        taskset = config.get("taskset")
+        if isinstance(taskset, dict):
+            dataset = taskset.get("dataset")
+            if dataset:
+                return dataset
+
+    return None
+
 
 class OptimizerConfig(BaseConfig):
     lr: float = Field(1e-4, ge=0)
@@ -501,13 +526,13 @@ class RolloutModelConfig(BaseConfig):
 
 class OrchestratorConfig(BaseConfig):
     training_mode: Literal["rl", "opd", "sft"] = "rl"
-    """Training mode. ``rl``: student generates rollouts, no teacher. ``opd``: student generates rollouts, teacher computes logprobs (teacher_tau > 0). ``sft``: teacher generates rollouts, student inference pool used for evals and weight sync."""
+    """Training mode. ``rl``: student generates rollouts, no teacher. ``opd``: student generates rollouts, teacher computes logprobs (teacher_tau > 0). ``sft``: teacher generates rollouts when configured, otherwise train envs must provide replayed traces."""
 
     student: RolloutModelConfig = Field(RolloutModelConfig(), validation_alias=AliasChoices("student", "model"))
     """Student rollout participant (model + client) — the model being trained."""
 
     teacher: RolloutModelConfig | None = Field(None, validation_alias=AliasChoices("teacher", "teacher_model"))
-    """Teacher rollout participant (model + client). Role depends on ``training_mode``: ``opd`` — teacher computes logprobs; ``sft`` — teacher generates rollouts."""
+    """Teacher rollout participant (model + client). Role depends on ``training_mode``: ``opd`` — teacher computes logprobs; ``sft`` — teacher generates rollouts when configured."""
 
     train: TrainConfig = TrainConfig()
 
@@ -752,10 +777,16 @@ def validate_unique_filter_types(self):
                 )
         return self
 
+    @model_validator(mode="after")
+    def _drop_default_sft_zero_advantage_filter(self):
+        if self.training_mode == "sft" and "post_batch_filters" not in self.model_fields_set:
+            self.post_batch_filters = [f for f in self.post_batch_filters if f.type != "zero_advantage"]
+        return self
+
     @model_validator(mode="after")
     def _force_no_renderer_for_sft(self):
-        """SFT rolls out via the teacher's plain chat-completions endpoint; the
-        renderer client doesn't apply. Force ``renderer=None`` so the user
+        """SFT train rollouts use teacher chat completions or replayed traces;
+        the renderer client doesn't apply. Force ``renderer=None`` so the user
         doesn't have to remember to set it. Declared before the renderer
         validators below so they see the corrected value."""
         if self.training_mode == "sft":
@@ -768,8 +799,30 @@ def validate_training_mode(self):
         has_teacher = self.teacher is not None
         if self.training_mode == "rl" and has_teacher:
             raise ValueError("orchestrator.teacher must not be set when training_mode = 'rl'.")
-        if self.training_mode in ("opd", "sft") and not has_teacher:
-            raise ValueError(f"orchestrator.teacher must be configured when training_mode = '{self.training_mode}'.")
+        if self.training_mode == "opd" and not has_teacher:
+            raise ValueError("orchestrator.teacher must be configured when training_mode = 'opd'.")
+        return self
+
+    @model_validator(mode="after")
+    def validate_teacherless_sft_uses_sft_replay(self):
+        """Teacherless SFT is only valid when train envs replay existing data."""
+        if self.training_mode != "sft" or self.teacher is not None:
+            return self
+
+        non_replay_envs = [env.id for env in self.train.env if not _is_sft_replay_env_id(env.id)]
+        if non_replay_envs:
+            raise ValueError(
+                "orchestrator.teacher must be configured for SFT unless every train env uses "
+                f"{SFT_REPLAY_ENV_ID!r}; got non-replay train env(s): {non_replay_envs}."
+            )
+
+        missing_dataset = [env.resolved_name for env in self.train.env if _sft_replay_dataset_arg(env.args) is None]
+        if missing_dataset:
+            raise ValueError(
+                f"teacherless SFT with {SFT_REPLAY_ENV_ID!r} requires an explicit "
+                "env.args.taskset.dataset or env.args.config.taskset.dataset for "
+                f"each train env; missing for: {missing_dataset}."
+            )
         return self
 
     @model_validator(mode="after")
diff --git a/pyproject.toml b/pyproject.toml
index cc931e1446..4341b1a80d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -88,6 +88,7 @@ envs = [
     "rlm-swe",
     "science-env",
     "simpleqa-verified",
+    "sft-replay",
     "tau2-bench",
     "wiki-search",
     "wordle",
@@ -150,6 +151,8 @@ override-dependencies = [
     "transformers==5.6.2",
     "torch>=2.9.0",
     "openenv-core",
+    "harnesses>=0.1.0",
+    "tasksets>=0.1.0",
 ]
 
 # ModelExpress 0.3.0 publishes protobuf<6 metadata, but its generated proto is
@@ -224,6 +227,9 @@ reverse-text = { path = "deps/verifiers/environments/reverse_text", editable = t
 rlm-swe = { path = "deps/research-environments/environments/rlm_swe", editable = true }
 science-env = { path = "deps/research-environments/environments/science_env", editable = true }
 simpleqa-verified = { path = "deps/research-environments/environments/simpleqa_verified", editable = true }
+sft-replay = { path = "deps/verifiers/environments/sft_replay", editable = true }
+harnesses = { path = "deps/verifiers/packages/harnesses", editable = true }
+tasksets = { path = "deps/verifiers/packages/tasksets", editable = true }
 tau2-bench = { path = "deps/research-environments/environments/tau2_bench", editable = true }
 wiki-search = { path = "deps/verifiers/environments/wiki_search", editable = true }
 wordle = { path = "deps/verifiers/environments/wordle", editable = true }
diff --git a/skills/configs/SKILL.md b/skills/configs/SKILL.md
index 83f7dd8d47..8a7f94bb53 100644
--- a/skills/configs/SKILL.md
+++ b/skills/configs/SKILL.md
@@ -60,6 +60,23 @@ CLI: `--env.0.id reverse-text --env.1.id math-env`.
 
 In TOML, an empty section header (`[ckpt]`) does the same.
 
+## Replay-backed SFT config
+
+For teacherless SFT through the `rl` orchestrator, set `orchestrator.training_mode = "sft"` and use only `sft-replay` train envs. Each train env must provide the dataset under the taskset config:
+
+```toml
+[orchestrator]
+training_mode = "sft"
+
+[[orchestrator.train.env]]
+id = "sft-replay"
+
+[orchestrator.train.env.args.taskset]
+dataset = "PrimeIntellect/Reverse-Text-SFT"
+```
+
+Do not pass the dataset at `env.args.dataset`; config validation rejects that shape because replay data belongs to the taskset.
+
 ## RL trainer token exports
 
 For rollout debugging, enable trainer-side token export under `trainer.experimental.token_export` (or `experimental.token_export` when running the trainer entrypoint directly). It writes one JSONL record per exported sequence under `output_dir/token_exports/step_<step>/rank_<rank>.jsonl`. Each record stores aligned per-token arrays for token ids, loss mask, advantage, reward, entropy, mismatch KL, inference/trainer logprobs, importance ratios, probability deltas, and masking diagnostics. It does not decode token text in the trainer.
diff --git a/skills/training/start-run/SKILL.md b/skills/training/start-run/SKILL.md
index 415778c437..31308eabb9 100644
--- a/skills/training/start-run/SKILL.md
+++ b/skills/training/start-run/SKILL.md
@@ -34,6 +34,7 @@ uv run rl @ examples/reverse_text/rl.toml --dry-run
 - Config: `RLConfig` (`packages/prime-rl-configs/src/prime_rl/configs/rl.py`)
 - Entrypoint: `src/prime_rl/entrypoints/rl.py`
 - SLURM: single- and multi-node
+- Training modes: `orchestrator.training_mode = "rl"` (default), `"opd"` (requires teacher), or `"sft"`. SFT can use a configured teacher, or teacherless replay when every train env is `sft-replay` with `args.taskset.dataset` set.
 - Environment packages: before launching a config with a non-core verifier env id,
   verify the package imports under `uv run` (for example
   `uv run python -c "import importlib.util; print(importlib.util.find_spec('rlm_swe'))"`).
@@ -82,7 +83,7 @@ curl http://localhost:8000/v1/chat/completions \
 
 | Command | Purpose | Typical use |
 |---------|---------|-------------|
-| `rl` | Full RL pipeline | Production RL training |
+| `rl` | Full orchestrator pipeline | RL, OPD, and orchestrator-backed SFT |
 | `sft` | Supervised fine-tuning | SFT and hard-distill |
 | `inference` | vLLM server | Standalone serving / debugging |
 
diff --git a/src/prime_rl/orchestrator/dispatcher.py b/src/prime_rl/orchestrator/dispatcher.py
index 133bc08da0..4dd9f6b98a 100644
--- a/src/prime_rl/orchestrator/dispatcher.py
+++ b/src/prime_rl/orchestrator/dispatcher.py
@@ -135,8 +135,9 @@ def __init__(
         self.policy = policy
         self.train_envs = train_envs
         self.eval_envs = eval_envs
-        # Train rollouts go to ``inference`` (the teacher in SFT mode);
-        # eval always evaluates the student, so it uses ``eval_inference``.
+        # Train rollouts go to ``inference`` (teacher in teacher-SFT, student
+        # otherwise); eval always evaluates the student, so it uses
+        # ``eval_inference``.
         self.inference = inference
         self.eval_inference = eval_inference
         self.train_source = train_source
@@ -173,9 +174,9 @@ def __init__(
 
     @property
     def train_model_name(self) -> str:
-        """Model name for *train* rollouts. In SFT mode train data comes from
-        the teacher pool, so use its model name; otherwise the live student
-        policy. (Eval always uses ``policy.model_name`` — the student.)"""
+        """Model name for *train* rollouts. Teacher-SFT uses the teacher pool
+        name; replay SFT receives the student name but ignores it. Eval always
+        uses ``policy.model_name`` — the student."""
         if self.training_mode == "sft":
             return self.inference.model_name
         return self.policy.model_name
diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py
index 902c8b963b..9b1135884d 100644
--- a/src/prime_rl/orchestrator/orchestrator.py
+++ b/src/prime_rl/orchestrator/orchestrator.py
@@ -345,10 +345,10 @@ async def setup(self) -> None:
         else:
             get_logger().info("Training from scratch")
 
-        # SFT generates rollouts via the teacher (the student is trained on
-        # the teacher's outputs); RL / OPD generate via the student
-        if config.training_mode == "sft":
-            assert self.teacher_inference is not None, "sft mode requires teacher inference"
+        # SFT train rollouts come from the teacher when configured. Teacherless
+        # SFT is validated at config parse time to use replay envs, which ignore
+        # the client/model passed by the dispatcher.
+        if config.training_mode == "sft" and self.teacher_inference is not None:
             rollout_inference = self.teacher_inference
         else:
             rollout_inference = self.student_inference
diff --git a/src/prime_rl/orchestrator/trajectories.py b/src/prime_rl/orchestrator/trajectories.py
index 3e8431c12a..7171901ad2 100644
--- a/src/prime_rl/orchestrator/trajectories.py
+++ b/src/prime_rl/orchestrator/trajectories.py
@@ -164,6 +164,24 @@ def _tokenize_step_with_renderer(
     return build_trajectory_step(renderer, prompt, completion, tools=tools)
 
 
+def _set_token_usage_from_trajectory(output: vf.RolloutOutput) -> None:
+    trajectory = output.get("trajectory") or []
+    tokenized_steps = [step for step in trajectory if step.get("tokens") is not None]
+    if not tokenized_steps:
+        return
+
+    prompt_tokens = [len(step["tokens"]["prompt_ids"]) for step in tokenized_steps]
+    completion_tokens = [len(step["tokens"]["completion_ids"]) for step in tokenized_steps]
+    total_completion = sum(completion_tokens)
+    last_total = prompt_tokens[-1] + completion_tokens[-1]
+    output["token_usage"] = {
+        "input_tokens": float(sum(prompt_tokens)),
+        "output_tokens": float(total_completion),
+        "final_input_tokens": float(max(0, last_total - total_completion)),
+        "final_output_tokens": float(total_completion),
+    }
+
+
 def backfill_rollout_tokens(
     output: vf.RolloutOutput,
     tokenizer: PreTrainedTokenizer,
@@ -175,6 +193,9 @@ def backfill_rollout_tokens(
     Otherwise falls back to the tokenizer + apply_chat_template path.
     """
     if all(step["tokens"] is not None for step in output["trajectory"]):
+        token_usage = output.get("token_usage") or {}
+        if "final_input_tokens" not in token_usage or "final_output_tokens" not in token_usage:
+            _set_token_usage_from_trajectory(output)
         return True
 
     logger = get_logger()
@@ -198,6 +219,10 @@ def backfill_rollout_tokens(
             reconstructed.pop("original_prompt_len")
             step["tokens"] = reconstructed
 
+    token_usage = output.get("token_usage") or {}
+    if "final_input_tokens" not in token_usage or "final_output_tokens" not in token_usage:
+        _set_token_usage_from_trajectory(output)
+
     return True
 
 
diff --git a/tests/unit/orchestrator/test_sft_replay_env.py b/tests/unit/orchestrator/test_sft_replay_env.py
new file mode 100644
index 0000000000..3b69ca5d9d
--- /dev/null
+++ b/tests/unit/orchestrator/test_sft_replay_env.py
@@ -0,0 +1,90 @@
+import pytest
+import verifiers as vf
+from verifiers.clients import Client
+
+from prime_rl.orchestrator.trajectories import backfill_rollout_tokens, interleave_rollout
+
+
+class NoopClient(Client):
+    def setup_client(self, config):
+        return object()
+
+    async def to_native_tool(self, tool):
+        raise AssertionError("sft-replay must not convert tools")
+
+    async def to_native_prompt(self, messages):
+        raise AssertionError("sft-replay must not render prompts through a client")
+
+    async def get_native_response(self, prompt, model, sampling_args, tools=None, **kwargs):
+        raise AssertionError("sft-replay must not request model responses")
+
+    async def raise_from_native_response(self, response) -> None:
+        raise AssertionError("sft-replay must not handle native responses")
+
+    async def from_native_response(self, response):
+        raise AssertionError("sft-replay must not parse native responses")
+
+    async def close(self) -> None:
+        return None
+
+
+class SimpleChatTokenizer:
+    def __init__(self):
+        self._tok2id: dict[str, int] = {}
+        self._next_id = 1
+
+    def _id(self, token: str) -> int:
+        if token not in self._tok2id:
+            self._tok2id[token] = self._next_id
+            self._next_id += 1
+        return self._tok2id[token]
+
+    def apply_chat_template(self, messages, add_generation_prompt=False, return_dict=False, tools=None):
+        del return_dict, tools
+        ids = []
+        for message in messages:
+            role = message.get("role", "unknown")
+            ids.append(self._id(f"<|{role}|>"))
+            content = message.get("content", "")
+            if isinstance(content, str):
+                if content:
+                    ids.append(self._id(content))
+            else:
+                ids.append(self._id(str(content)))
+        if add_generation_prompt:
+            ids.append(self._id("<|assistant|>"))
+        return ids
+
+
+def role_content(messages) -> list[tuple[str, object]]:
+    return [(message["role"], message["content"]) for message in messages]
+
+
+@pytest.mark.asyncio
+async def test_sft_replay_env_replays_messages_for_prime_rl_training_path():
+    env = vf.load_environment("sft-replay", taskset={})
+    row = dict(env.get_dataset()[0])
+
+    output = await env.run_rollout(
+        row,
+        client=NoopClient(vf.ClientConfig()),
+        model="unused-student",
+        sampling_args={},
+        state_columns=["trajectory", "sampling_args"],
+    )
+
+    assert output["error"] is None
+    assert output["stop_condition"] == "replayed_messages"
+    assert len(output["trajectory"]) == 1
+    assert output["trajectory"][0]["tokens"] is None
+    assert role_content(output["trajectory"][0]["prompt"]) == [("user", "Reverse abc.")]
+    assert role_content(output["trajectory"][0]["completion"]) == [("assistant", "cba")]
+
+    backfill_rollout_tokens(output, SimpleChatTokenizer())
+    samples = interleave_rollout(output, env_name="sft-replay")
+
+    assert samples is not None
+    assert len(samples) == 1
+    assert any(samples[0].completion_mask)
+    assert output["token_usage"]["final_input_tokens"] > 0
+    assert output["token_usage"]["final_output_tokens"] > 0
diff --git a/tests/unit/test_configs.py b/tests/unit/test_configs.py
index fcdee7a843..9942ada866 100644
--- a/tests/unit/test_configs.py
+++ b/tests/unit/test_configs.py
@@ -202,6 +202,66 @@ def test_orchestrator_vlm_requires_renderer():
     assert config.renderer is not None
 
 
+def test_sft_training_mode_allows_missing_teacher():
+    config = OrchestratorConfig.model_validate(
+        {
+            "training_mode": "sft",
+            "renderer": None,
+            "train": {"env": [{"id": "sft-replay", "args": {"taskset": {"dataset": "local.jsonl"}}}]},
+        }
+    )
+
+    assert config.teacher is None
+    assert "zero_advantage" not in {f.type for f in config.post_batch_filters}
+
+
+def test_sft_training_mode_preserves_explicit_post_filters():
+    config = OrchestratorConfig.model_validate(
+        {
+            "training_mode": "sft",
+            "renderer": None,
+            "train": {"env": [{"id": "prime/sft-replay@0.1.0", "args": {"taskset": {"dataset": "local.jsonl"}}}]},
+            "post_batch_filters": [{"type": "zero_advantage", "enforce": True}],
+        }
+    )
+
+    assert [f.type for f in config.post_batch_filters] == ["zero_advantage"]
+
+
+def test_sft_training_mode_accepts_nested_env_config_dataset():
+    config = OrchestratorConfig.model_validate(
+        {
+            "training_mode": "sft",
+            "renderer": None,
+            "train": {"env": [{"id": "sft-replay", "args": {"config": {"taskset": {"dataset": "local.jsonl"}}}}]},
+        }
+    )
+
+    assert config.teacher is None
+
+
+def test_teacherless_sft_requires_sft_replay_env():
+    with pytest.raises(ValidationError, match="every train env uses 'sft-replay'"):
+        OrchestratorConfig.model_validate(
+            {
+                "training_mode": "sft",
+                "renderer": None,
+                "train": {"env": [{"id": "reverse-text"}]},
+            }
+        )
+
+
+def test_teacherless_sft_requires_sft_replay_taskset_dataset():
+    with pytest.raises(ValidationError, match="requires an explicit env.args.taskset.dataset"):
+        OrchestratorConfig.model_validate(
+            {
+                "training_mode": "sft",
+                "renderer": None,
+                "train": {"env": [{"id": "sft-replay", "args": {"dataset": "local.jsonl"}}]},
+            }
+        )
+
+
 def test_selective_activation_checkpointing_requires_custom_impl():
     with pytest.raises(ValidationError, match="Selective activation checkpointing requires model.impl='custom'"):
         TrainerModelConfig.model_validate({"impl": "hf", "ac": {"mode": "selective"}})
diff --git a/uv.lock b/uv.lock
index 0fe7a4e076..7cd5141124 100644
--- a/uv.lock
+++ b/uv.lock
@@ -34,9 +34,11 @@ prime = false
 
 [manifest]
 overrides = [
+    { name = "harnesses", editable = "deps/verifiers/packages/harnesses" },
     { name = "nvidia-cudnn-cu12", specifier = ">=9.15" },
     { name = "nvidia-cutlass-dsl", specifier = ">=4.4.1" },
     { name = "openenv-core" },
+    { name = "tasksets", editable = "deps/verifiers/packages/tasksets" },
     { name = "torch", specifier = ">=2.9.0", index = "https://download.pytorch.org/whl/cu128" },
     { name = "transformers", specifier = "==5.6.2" },
 ]
@@ -1461,6 +1463,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
 ]
 
+[[package]]
+name = "harnesses"
+source = { editable = "deps/verifiers/packages/harnesses" }
+dependencies = [
+    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aiohttp", marker = "extra == 'nemogym'", specifier = ">=3.9.0" },
+    { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" },
+    { name = "verifiers", specifier = ">=0.1.15.dev11" },
+]
+provides-extras = ["nemogym"]
+
 [[package]]
 name = "hf-xet"
 version = "1.5.0"
@@ -3576,6 +3593,7 @@ envs = [
     { name = "reverse-text", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "rlm-swe", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "science-env", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "sft-replay", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "simpleqa-verified", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "tau2-bench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "wiki-search", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -3677,6 +3695,7 @@ requires-dist = [
     { name = "rlm-swe", marker = "extra == 'envs'", editable = "deps/research-environments/environments/rlm_swe" },
     { name = "science-env", marker = "extra == 'envs'", editable = "deps/research-environments/environments/science_env" },
     { name = "setproctitle", specifier = ">=1.3.0" },
+    { name = "sft-replay", marker = "extra == 'envs'", editable = "deps/verifiers/environments/sft_replay" },
     { name = "simpleqa-verified", marker = "extra == 'envs'", editable = "deps/research-environments/environments/simpleqa_verified" },
     { name = "tau2-bench", marker = "extra == 'envs'", editable = "deps/research-environments/environments/tau2_bench" },
     { name = "tenacity", specifier = ">=8.2.0" },
@@ -4543,6 +4562,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/94/b8/f1f62a5e3c0ad2ff1d189590bfa4c46b4f3b6e49cef6f26c6ee4e575394d/setuptools-80.10.2-py3-none-any.whl", hash = "sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173", size = 1064234, upload-time = "2026-01-25T22:38:15.216Z" },
 ]
 
+[[package]]
+name = "sft-replay"
+version = "0.1.0"
+source = { editable = "deps/verifiers/environments/sft_replay" }
+dependencies = [
+    { name = "harnesses", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tasksets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "harnesses", specifier = ">=0.1.2" },
+    { name = "tasksets", specifier = ">=0.1.5" },
+    { name = "verifiers", specifier = ">=0.1.15.dev11" },
+]
+
 [[package]]
 name = "shellingham"
 version = "1.5.4"
@@ -4725,6 +4761,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" },
 ]
 
+[[package]]
+name = "tasksets"
+source = { editable = "deps/verifiers/packages/tasksets" }
+dependencies = [
+    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" },
+    { name = "nltk", marker = "extra == 'ta'" },
+    { name = "openenv-core", marker = "extra == 'openenv'", specifier = ">=0.3.0" },
+    { name = "openreward", marker = "python_full_version >= '3.11' and extra == 'openreward'", specifier = ">=0.1.123" },
+    { name = "textarena", marker = "extra == 'ta'" },
+    { name = "verifiers", specifier = ">=0.1.15.dev11" },
+]
+provides-extras = ["nemogym", "openenv", "openreward", "ta"]
+
 [[package]]
 name = "tau2"
 version = "0.2.1.dev0"