[Test] Add tests and benchmarks for collector throughput optimizations

vmoens · vmoens · commit b716fd90db0e · 2026-03-24T08:34:02.000Z
Cover all 7 performance features: _skip_maybe_reset, _StepMDP out= reuse, _trust_step_output, update_traj_ids, combined optimization flags, torch.compile fullgraph, and fast-path benchmarks. Made-with: Cursor ghstack-source-id: ad18afe Pull-Request: #3567
diff --git a/benchmarks/test_collectors_benchmark.py b/benchmarks/test_collectors_benchmark.py
@@ -247,6 +247,32 @@ def test_async_pixels(benchmark):
     benchmark(execute_collector, c)
 
 
+def single_collector_fast_setup():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    env = TransformedEnv(DMControlEnv("cheetah", "run", device=device), StepCounter(50))
+    env._trust_step_output = True
+    env.base_env._trust_step_output = True
+    env._skip_maybe_reset = True
+    c = SyncDataCollector(
+        env,
+        RandomPolicy(env.action_spec),
+        total_frames=-1,
+        frames_per_batch=100,
+        device=device,
+        update_traj_ids=False,
+    )
+    c = iter(c)
+    for i, _ in enumerate(c):
+        if i == 10:
+            break
+    return ((c,), {})
+
+
+def test_single_fast(benchmark):
+    (c,), _ = single_collector_fast_setup()
+    benchmark(execute_collector, c)
+
+
 class TestRBGCollector:
     @pytest.mark.parametrize(
         "n_col,n_wokrers_per_col",
diff --git a/benchmarks/test_envs_benchmark.py b/benchmarks/test_envs_benchmark.py
@@ -128,6 +128,47 @@ def test_step_mdp_speed(
     )
 
 
+def _step_and_maybe_reset_loop(env, td):
+    for _ in range(100):
+        _, td = env.step_and_maybe_reset(td)
+        td = env.rand_action(td)
+
+
+def make_env_fast_path():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    env = TransformedEnv(DMControlEnv("cheetah", "run", device=device), StepCounter(50))
+    env._trust_step_output = True
+    env.base_env._trust_step_output = True
+    env._skip_maybe_reset = True
+    td = env.reset()
+    td = env.rand_action(td)
+    for _ in range(3):
+        _, td = env.step_and_maybe_reset(td)
+        td = env.rand_action(td)
+    return ((env, td), {})
+
+
+def make_env_normal_path():
+    device = "cuda:0" if torch.cuda.device_count() else "cpu"
+    env = TransformedEnv(DMControlEnv("cheetah", "run", device=device), StepCounter(50))
+    td = env.reset()
+    td = env.rand_action(td)
+    for _ in range(3):
+        _, td = env.step_and_maybe_reset(td)
+        td = env.rand_action(td)
+    return ((env, td), {})
+
+
+def test_step_and_maybe_reset_fast_path(benchmark):
+    (env, td), _ = make_env_fast_path()
+    benchmark(_step_and_maybe_reset_loop, env, td)
+
+
+def test_step_and_maybe_reset_normal(benchmark):
+    (env, td), _ = make_env_normal_path()
+    benchmark(_step_and_maybe_reset_loop, env, td)
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/test/compile/test_compile_collectors.py b/test/compile/test_compile_collectors.py
@@ -117,6 +117,42 @@ def test_cudagraph_policy(self, collector_cls, cudagraph_policy):
             collector.shutdown()
             del collector
 
+    def test_compile_step_and_maybe_reset_fullgraph(self):
+        torch._dynamo.reset_code_caches()
+
+        env = ContinuousActionVecMockEnv()
+        env._trust_step_output = True
+        env._skip_maybe_reset = True
+
+        td = env.reset()
+        td = env.rand_action(td)
+
+        for _ in range(3):
+            _, td = env.step_and_maybe_reset(td)
+            td = env.rand_action(td)
+
+        torch._dynamo.reset()
+        explanation = torch._dynamo.explain(env.step_and_maybe_reset)(td)
+        assert explanation.graph_count == 1
+        assert explanation.graph_break_count == 0
+
+        out_eager, next_eager = env.step_and_maybe_reset(td.clone())
+
+        compiled_fn = torch.compile(env.step_and_maybe_reset, fullgraph=True)
+        out_compiled, next_compiled = compiled_fn(td.clone())
+
+        for key in out_eager.keys(True, True):
+            v_e = out_eager.get(key)
+            v_c = out_compiled.get(key)
+            if isinstance(v_e, torch.Tensor):
+                torch.testing.assert_close(v_e, v_c)
+
+        for key in next_eager.keys(True, True):
+            v_e = next_eager.get(key)
+            v_c = next_compiled.get(key)
+            if isinstance(v_e, torch.Tensor):
+                torch.testing.assert_close(v_e, v_c)
+
 
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
diff --git a/test/test_collectors.py b/test/test_collectors.py
@@ -4088,6 +4088,69 @@ def test_unique_traj_sync(self, cat_results):
             del c
 
 
+class TestUpdateTrajIds:
+    def test_update_traj_ids_default_is_true(self):
+        env = ContinuousActionVecMockEnv()
+        policy = TensorDictModule(
+            nn.Linear(
+                env.observation_spec["observation"].shape[-1], env.action_spec.shape[-1]
+            ),
+            in_keys=["observation"],
+            out_keys=["action"],
+        )
+        collector = Collector(env, policy, frames_per_batch=10, total_frames=10)
+        try:
+            assert collector.update_traj_ids is True
+        finally:
+            collector.shutdown()
+
+    def test_update_traj_ids_false_skips_tracking(self):
+        env = ContinuousActionVecMockEnv()
+        policy = TensorDictModule(
+            nn.Linear(
+                env.observation_spec["observation"].shape[-1], env.action_spec.shape[-1]
+            ),
+            in_keys=["observation"],
+            out_keys=["action"],
+        )
+        collector = Collector(
+            env,
+            policy,
+            frames_per_batch=10,
+            total_frames=20,
+            update_traj_ids=False,
+        )
+        try:
+            for data in collector:
+                traj_ids = data.get(("collector", "traj_ids"))
+                assert (traj_ids == traj_ids[..., 0:1]).all()
+        finally:
+            collector.shutdown()
+
+    def test_update_traj_ids_true_updates(self):
+        env = ContinuousActionVecMockEnv()
+        policy = TensorDictModule(
+            nn.Linear(
+                env.observation_spec["observation"].shape[-1], env.action_spec.shape[-1]
+            ),
+            in_keys=["observation"],
+            out_keys=["action"],
+        )
+        collector = Collector(
+            env,
+            policy,
+            frames_per_batch=50,
+            total_frames=100,
+            update_traj_ids=True,
+        )
+        try:
+            for data in collector:
+                traj_ids = data.get(("collector", "traj_ids"))
+                assert traj_ids is not None
+        finally:
+            collector.shutdown()
+
+
 class TestDynamicEnvs:
     def test_dynamic_sync_collector(self):
         env = EnvWithDynamicSpec()
@@ -5296,6 +5359,36 @@ def env_fn():
         assert expected_trace.exists(), f"Trace file not found at {expected_trace}"
 
 
+class TestCollectorOptimizationFlags:
+    def test_collector_all_optimizations(self):
+        env = TransformedEnv(ContinuousActionVecMockEnv(), StepCounter())
+        env._trust_step_output = True
+        env.base_env._trust_step_output = True
+        env._skip_maybe_reset = True
+        policy = TensorDictModule(
+            nn.Linear(
+                env.observation_spec["observation"].shape[-1], env.action_spec.shape[-1]
+            ),
+            in_keys=["observation"],
+            out_keys=["action"],
+        )
+        collector = Collector(
+            env,
+            policy,
+            frames_per_batch=20,
+            total_frames=40,
+            update_traj_ids=False,
+        )
+        try:
+            for data in collector:
+                assert data.shape[-1] == 20
+                assert "observation" in data.keys()
+                assert ("next", "observation") in data.keys(True)
+                assert ("next", "reward") in data.keys(True)
+        finally:
+            collector.shutdown()
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main(
diff --git a/test/test_envs.py b/test/test_envs.py
@@ -2232,6 +2232,28 @@ def test_step_class(
         out_cls = step_func(tensordict)
         assert (out_func == out_cls).all()
 
+    @pytest.mark.parametrize(
+        "envcls",
+        [
+            ContinuousActionVecMockEnv,
+            CountingBatchedEnv,
+            CountingEnv,
+        ],
+    )
+    def test_step_class_out_reuse(self, envcls):
+        torch.manual_seed(0)
+        env = envcls()
+        tensordict = env.rand_step(env.reset())
+
+        step_func = _StepMDP(env, exclude_action=False)
+        result_no_out = step_func(tensordict.clone())
+        out_buf = result_no_out.clone()
+        out_buf_id = id(out_buf)
+
+        result_with_out = step_func(tensordict.clone(), out=out_buf)
+        assert id(result_with_out) == out_buf_id
+        assert (result_no_out == result_with_out).all()
+
     @pytest.mark.parametrize("nested_obs", [True, False])
     @pytest.mark.parametrize("nested_action", [True, False])
     @pytest.mark.parametrize("nested_done", [True, False])
@@ -3780,6 +3802,32 @@ def policy(td):
             assert not lazy["lidar"][~done.squeeze()].isnan().any()
             assert (lazy_root["lidar"][1:][done[:-1].squeeze()] == 0).all()
 
+    def test_skip_maybe_reset_default(self):
+        env = AutoResettingCountingEnv(4, auto_reset=True)
+        assert not env._skip_maybe_reset
+
+    def test_skip_maybe_reset_step_and_maybe_reset(self):
+        env_normal = AutoResettingCountingEnv(100, auto_reset=True)
+        td_normal = env_normal.reset()
+        td_normal.set("action", torch.ones((*td_normal.shape, 1), dtype=torch.int64))
+
+        env_skip = AutoResettingCountingEnv(100, auto_reset=True)
+        env_skip._skip_maybe_reset = True
+        td_skip = env_skip.reset()
+        td_skip.set("action", torch.ones((*td_skip.shape, 1), dtype=torch.int64))
+
+        out_normal, next_normal = env_normal.step_and_maybe_reset(td_normal)
+        out_skip, next_skip = env_skip.step_and_maybe_reset(td_skip)
+
+        torch.testing.assert_close(
+            out_normal["next", "observation"],
+            out_skip["next", "observation"],
+        )
+        torch.testing.assert_close(
+            next_normal["observation"],
+            next_skip["observation"],
+        )
+
 
 class TestEnvWithDynamicSpec:
     def test_dynamic_rollout(self):
@@ -5026,6 +5074,47 @@ def test_parallel_env_no_buffers_mps_rollout(self):
             env.close(raise_if_closed=False)
 
 
+class TestTrustStepOutput:
+    def test_trust_step_output_default(self):
+        env = ContinuousActionVecMockEnv()
+        assert not env._trust_step_output
+
+    def test_trust_step_output_fast_path(self):
+        env = TransformedEnv(ContinuousActionVecMockEnv(), StepCounter())
+        td = env.reset()
+        td = env.rand_action(td)
+
+        out_normal = env.step(td.clone())
+
+        env._trust_step_output = True
+        env.base_env._trust_step_output = True
+        out_fast = env.step(td.clone())
+
+        torch.testing.assert_close(
+            out_normal["next", "observation"],
+            out_fast["next", "observation"],
+        )
+        torch.testing.assert_close(
+            out_normal["next", "reward"],
+            out_fast["next", "reward"],
+        )
+
+    def test_trust_step_fast_path_step_and_maybe_reset(self):
+        env = TransformedEnv(ContinuousActionVecMockEnv(), StepCounter())
+        env._trust_step_output = True
+        env.base_env._trust_step_output = True
+        env._skip_maybe_reset = True
+
+        td = env.reset()
+        td = env.rand_action(td)
+
+        out, next_out = env.step_and_maybe_reset(td)
+
+        assert "next" in out.keys()
+        assert "observation" in next_out.keys()
+        assert "step_count" in next_out.keys()
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)