isaac-sim · jmart-nv · May 11, 2026 · May 11, 2026 · May 11, 2026 · kellyguo11
@@ -30,6 +30,14 @@ Choosing a renderer backend
 | Newton Warp         | No (kit-less)                 | Newton backend, fast training   |
 +---------------------+-------------------------------+---------------------------------+
 
+.. note::
+
+   **Temporal information for camera-based RL.** Unlike RTX modes with temporal
+   anti-aliasing (DLSS, DLAA, TAA), the Newton Warp renderer does not inject
+   prior-frame information into the current image. Camera-control tasks that depend
+   on velocity-like visual cues should add explicit temporal observations
+   (e.g. task-local frame stacking) rather than relying on renderer-specific artifacts.
+
 Architecture Overview
 ---------------------
 

diff --git a/source/isaaclab/changelog.d/jmart-frame-stacking.minor.rst b/source/isaaclab/changelog.d/jmart-frame-stacking.minor.rst
@@ -0,0 +1,6 @@
+Added
+^^^^^
+
+* Added :class:`~isaaclab.envs.utils.FrameStackBuffer`, a ring buffer that stacks the last
+  ``N`` rendered frames along the channel dimension for tasks that need explicit temporal
+  observations.
@@ -4,3 +4,5 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 """Sub-package for environment utils."""
+
+from .frame_stack import FrameStackBuffer
@@ -0,0 +1,130 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Frame-stacking helper for camera-based RL tasks.
+
+Provides :class:`FrameStackBuffer`, a ring buffer over the last ``N`` rendered frames
+that tasks can use to supply explicit temporal observations to a policy.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import torch
+
+
+class FrameStackBuffer:
+    """Ring buffer that stacks the last ``frame_stack`` rendered frames along the channel dim.
+
+    Example::
+
+        self._stack = FrameStackBuffer(
+            single_frame_shape=(self.num_envs, H, W, C),
+            frame_stack=self.cfg.frame_stack,
+            device=self.device,
+        )
+        # in _get_observations:
+        stacked = self._stack.update(rgb)
+        # in _reset_idx:
+        self._stack.reset(env_ids)
+
+    Args:
+        single_frame_shape: Shape of one rendered frame, ``(num_envs, H, W, C)``.
+        frame_stack: Number of frames to keep. Must be ``>= 1``; ``1`` is a passthrough.
+        device: Torch device for the internal buffers.
+        dtype: Torch dtype for the internal buffers. Defaults to :obj:`torch.uint8`.
+    """
+
+    def __init__(
+        self,
+        single_frame_shape: tuple[int, ...],
+        frame_stack: int,
+        device: str | torch.device,
+        dtype: torch.dtype = torch.uint8,
+    ):
+        if frame_stack < 1:
+            raise ValueError(f"frame_stack must be >= 1, got {frame_stack}.")
+        if len(single_frame_shape) < 2:
+            raise ValueError(
+                f"single_frame_shape must have at least 2 dims (envs + channels), got {single_frame_shape}."
+            )
+        self.frame_stack: int = frame_stack
+        self._single_shape: tuple[int, ...] = tuple(int(d) for d in single_frame_shape)
+        self._num_envs: int = self._single_shape[0]
+        self._channels: int = self._single_shape[-1]
+        self._device = torch.device(device) if isinstance(device, str) else device
+        self._dtype = dtype
+
+        self._history: torch.Tensor = torch.zeros((frame_stack, *self._single_shape), device=self._device, dtype=dtype)
+        self._stacked: torch.Tensor = torch.zeros(
+            (*self._single_shape[:-1], self._channels * frame_stack), device=self._device, dtype=dtype
+        )
+        self._frame_idx: int = 0
+        self._needs_init: torch.Tensor = torch.ones(self._num_envs, device=self._device, dtype=torch.bool)
+        # CPU-side mirror of _needs_init.any() — avoids a GPU→CPU sync on the steady-state path.
+        self._needs_init_cpu: bool = True
+
+    @property
+    def output_shape(self) -> tuple[int, ...]:
+        """Shape of the tensor returned by :meth:`update`, ``(num_envs, H, W, C * frame_stack)``."""
+        return (*self._single_shape[:-1], self._channels * self.frame_stack)
+
+    @property
+    def output_channels(self) -> int:
+        """Channel count of the stacked output (``= single_channels * frame_stack``)."""
+        return self._channels * self.frame_stack
+
+    def update(self, single_frame: torch.Tensor) -> torch.Tensor:
+        """Push a new frame and return the stacked output.
+
+        On the first :meth:`update` after construction or :meth:`reset` for an env, all
+        history slots for that env are filled with ``single_frame`` so the policy never
+        sees zero-padded warmup data.
+
+        Args:
+            single_frame: New rendered frame, shape ``(num_envs, H, W, C)``.
+
+        Returns:
+            Stacked tensor ``(num_envs, H, W, C * frame_stack)`` in oldest-to-newest
+            channel order. This is the buffer's own storage — do not mutate it.
+        """
+        if single_frame.shape != self._single_shape:
+            raise ValueError(
+                f"single_frame shape {tuple(single_frame.shape)} does not match expected "
+                f"{self._single_shape} (set at construction)."
+            )
+
+        if self._needs_init_cpu:
+            init_ids = self._needs_init.nonzero(as_tuple=False).squeeze(-1)
+            if init_ids.numel() > 0:
+                for i in range(self.frame_stack):
+                    self._history[i, init_ids] = single_frame[init_ids]
+            self._needs_init.zero_()
+            self._needs_init_cpu = False
+
+        self._history[self._frame_idx].copy_(single_frame)
+
+        # narrow + copy_ rebuild avoids per-frame torch.cat allocations.
+        for i in range(self.frame_stack):
+            src_slot = (self._frame_idx + 1 + i) % self.frame_stack
+            self._stacked.narrow(-1, i * self._channels, self._channels).copy_(self._history[src_slot])
+
+        self._frame_idx = (self._frame_idx + 1) % self.frame_stack
+        return self._stacked
+
+    def reset(self, env_ids: Sequence[int] | torch.Tensor | None = None) -> None:
+        """Mark envs for history re-initialization on the next :meth:`update`.
+
+        Args:
+            env_ids: Indices of envs to reset. ``None`` resets all envs.
+        """
+        if env_ids is None:
+            self._needs_init.fill_(True)
+        else:
+            if not isinstance(env_ids, torch.Tensor):
+                env_ids = torch.as_tensor(env_ids, device=self._device, dtype=torch.long)
+            self._needs_init[env_ids] = True
+        self._needs_init_cpu = True
diff --git a/source/isaaclab/test/envs/test_frame_stack.py b/source/isaaclab/test/envs/test_frame_stack.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Unit tests for :mod:`isaaclab.envs.utils.frame_stack`. Pure tensor logic; no Kit launch."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from isaaclab.envs.utils import FrameStackBuffer
+
+pytestmark = pytest.mark.isaacsim_ci
+
+# Shorthand shape (num_envs, H, W, C) used across tests.
+NUM_ENVS = 4
+HEIGHT = 8
+WIDTH = 8
+CHANNELS = 3
+SINGLE_SHAPE = (NUM_ENVS, HEIGHT, WIDTH, CHANNELS)
+
+
+def _make_frame(value: int, dtype: torch.dtype = torch.uint8) -> torch.Tensor:
+    """Build a constant-valued (N, H, W, C) tensor on CPU."""
+    return torch.full(SINGLE_SHAPE, value, dtype=dtype)
+
+
+class TestFrameStackBuffer:
+    """Pure-tensor tests of the ring buffer."""
+
+    def test_output_shape_and_channels(self):
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=3, device="cpu")
+        assert buf.output_shape == (NUM_ENVS, HEIGHT, WIDTH, CHANNELS * 3)
+        assert buf.output_channels == CHANNELS * 3
+        # The narrow+copy_ rebuild writes into a single pre-allocated buffer; output must stay contiguous.
+        stacked = buf.update(_make_frame(1))
+        assert stacked.is_contiguous()
+
+    def test_init_fills_all_slots_on_first_update(self):
+        """First update post-construction fills every history slot with the new frame."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu")
+        f0 = _make_frame(7)
+        stacked = buf.update(f0)
+        # Both slots equal F0.
+        assert torch.equal(stacked[..., :CHANNELS], f0)
+        assert torch.equal(stacked[..., CHANNELS:], f0)
+
+    def test_ring_buffer_shifts_correctly(self):
+        """After the second update, oldest slot = first frame; newest slot = second frame."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu")
+        f0 = _make_frame(10)
+        f1 = _make_frame(20)
+        buf.update(f0)
+        stacked = buf.update(f1)
+        assert torch.equal(stacked[..., :CHANNELS], f0), "Oldest slot must be the previous frame"
+        assert torch.equal(stacked[..., CHANNELS:], f1), "Newest slot must be the latest frame"
+
+    def test_newest_slot_equals_latest_single(self):
+        """Ring-buffer correctness invariant: newest slot post-update == the latest single input."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu")
+        buf.update(_make_frame(1))
+        f_latest = _make_frame(99)
+        stacked = buf.update(f_latest)
+        assert torch.equal(stacked[..., CHANNELS:], f_latest)
+
+    def test_three_frame_stack_oldest_to_newest_order(self):
+        """frame_stack=3 produces oldest→newest across 3 channel slices."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=3, device="cpu")
+        buf.update(_make_frame(10))  # init: all 3 slots = 10
+        buf.update(_make_frame(20))  # slots: [10, 10, 20]
+        stacked = buf.update(_make_frame(30))  # slots: [10, 20, 30]
+        assert torch.equal(stacked[..., :CHANNELS], _make_frame(10))
+        assert torch.equal(stacked[..., CHANNELS : 2 * CHANNELS], _make_frame(20))
+        assert torch.equal(stacked[..., 2 * CHANNELS :], _make_frame(30))
+
+    def test_reset_all_envs(self):
+        """reset() with no args re-inits every env on the next update."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu")
+        buf.update(_make_frame(1))
+        buf.update(_make_frame(2))  # ring filled
+        buf.reset()  # mark all envs for init
+        stacked = buf.update(_make_frame(50))
+        # All slots filled with 50.
+        assert torch.equal(stacked[..., :CHANNELS], _make_frame(50))
+        assert torch.equal(stacked[..., CHANNELS:], _make_frame(50))
+
+    def test_reset_partial_envs_preserves_others(self):
+        """Resetting env 0 should re-init only env 0; other envs keep their history."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu")
+        buf.update(_make_frame(1))
+        buf.update(_make_frame(2))
+        buf.reset(torch.tensor([0]))
+        stacked = buf.update(_make_frame(9))
+        # Env 0: both slots == 9 (init).
+        assert torch.equal(stacked[0, ..., :CHANNELS], torch.full((HEIGHT, WIDTH, CHANNELS), 9, dtype=torch.uint8))
+        assert torch.equal(stacked[0, ..., CHANNELS:], torch.full((HEIGHT, WIDTH, CHANNELS), 9, dtype=torch.uint8))
+        # Env 1: oldest == 2 (ring shifted from previous), newest == 9.
+        assert torch.equal(stacked[1, ..., :CHANNELS], torch.full((HEIGHT, WIDTH, CHANNELS), 2, dtype=torch.uint8))
+        assert torch.equal(stacked[1, ..., CHANNELS:], torch.full((HEIGHT, WIDTH, CHANNELS), 9, dtype=torch.uint8))
+
+    def test_frame_stack_one_passthrough(self):
+        """frame_stack=1 effectively echoes the input (single-slot ring)."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=1, device="cpu")
+        assert buf.output_shape == SINGLE_SHAPE
+        f = _make_frame(42)
+        stacked = buf.update(f)
+        assert torch.equal(stacked, f)
+
+    def test_invalid_frame_stack_raises(self):
+        with pytest.raises(ValueError, match="frame_stack must be >= 1"):
+            FrameStackBuffer(SINGLE_SHAPE, frame_stack=0, device="cpu")
+
+    def test_invalid_shape_raises(self):
+        with pytest.raises(ValueError, match="at least 2 dims"):
+            FrameStackBuffer((10,), frame_stack=2, device="cpu")
+
+    def test_wrong_input_shape_raises(self):
+        """update() rejects a frame whose shape doesn't match the construction shape."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu")
+        with pytest.raises(ValueError, match="does not match expected"):
+            buf.update(torch.zeros((NUM_ENVS, HEIGHT, WIDTH, CHANNELS + 1), dtype=torch.uint8))
+
+    def test_dtype_preserved_uint8(self):
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu", dtype=torch.uint8)
+        stacked = buf.update(_make_frame(5))
+        assert stacked.dtype == torch.uint8
+
+    def test_dtype_preserved_float32(self):
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu", dtype=torch.float32)
+        stacked = buf.update(_make_frame(5, dtype=torch.float32))
+        assert stacked.dtype == torch.float32
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available in this env")
+    def test_buffer_on_cuda(self):
+        """Buffer allocates and operates correctly on a CUDA device."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cuda")
+        f0 = torch.full(SINGLE_SHAPE, 7, dtype=torch.uint8, device="cuda")
+        stacked = buf.update(f0)
+        assert stacked.device.type == "cuda"
+        assert stacked.shape == (NUM_ENVS, HEIGHT, WIDTH, CHANNELS * 2)
+        # Both slots filled with f0 on the init path.
+        assert torch.equal(stacked[..., :CHANNELS], f0)
+        assert torch.equal(stacked[..., CHANNELS:], f0)
+        # Steady-state shift works on CUDA too.
+        f1 = torch.full(SINGLE_SHAPE, 13, dtype=torch.uint8, device="cuda")
+        stacked = buf.update(f1)
+        assert torch.equal(stacked[..., :CHANNELS], f0)
+        assert torch.equal(stacked[..., CHANNELS:], f1)
+
+    def test_long_run_ring_stability(self):
+        """After many updates exceeding frame_stack cycles, the oldest-to-newest layout stays correct."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=3, device="cpu")
+        # Push 11 frames with values 0..10. After the last update, the ring slots should
+        # hold the 3 most-recent frames: [8, 9, 10] in oldest-to-newest order.
+        for i in range(11):
+            stacked = buf.update(_make_frame(i))
+        assert torch.equal(stacked[..., :CHANNELS], _make_frame(8))
+        assert torch.equal(stacked[..., CHANNELS : 2 * CHANNELS], _make_frame(9))
+        assert torch.equal(stacked[..., 2 * CHANNELS :], _make_frame(10))
+
+    def test_reset_accepts_python_sequence(self):
+        """reset() accepts a plain ``list[int]`` (the type DirectRLEnv hands to ``_reset_idx``)."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu")
+        buf.update(_make_frame(1))
+        buf.update(_make_frame(2))
+        buf.reset([0, 2])
+        stacked = buf.update(_make_frame(9))
+        per_env_shape = (HEIGHT, WIDTH, CHANNELS)
+        nines = torch.full(per_env_shape, 9, dtype=torch.uint8)
+        twos = torch.full(per_env_shape, 2, dtype=torch.uint8)
+        for env_id in (0, 2):
+            assert torch.equal(stacked[env_id, ..., :CHANNELS], nines), f"env {env_id} oldest"
+            assert torch.equal(stacked[env_id, ..., CHANNELS:], nines), f"env {env_id} newest"
+        for env_id in (1, 3):
+            assert torch.equal(stacked[env_id, ..., :CHANNELS], twos), f"env {env_id} oldest"
+            assert torch.equal(stacked[env_id, ..., CHANNELS:], nines), f"env {env_id} newest"
+
+    def test_reset_multi_env_subset_preserves_unrelated(self):
+        """Resetting envs [0, 2] should re-init only those; envs [1, 3] keep their history."""
+        buf = FrameStackBuffer(SINGLE_SHAPE, frame_stack=2, device="cpu")
+        buf.update(_make_frame(1))
+        buf.update(_make_frame(2))  # ring filled
+        buf.reset(torch.tensor([0, 2]))
+        stacked = buf.update(_make_frame(9))
+        per_env_shape = (HEIGHT, WIDTH, CHANNELS)
+        nines = torch.full(per_env_shape, 9, dtype=torch.uint8)
+        twos = torch.full(per_env_shape, 2, dtype=torch.uint8)
+        # Reset envs: both slots = 9 (init).
+        for env_id in (0, 2):
+            assert torch.equal(stacked[env_id, ..., :CHANNELS], nines), f"env {env_id} oldest"
+            assert torch.equal(stacked[env_id, ..., CHANNELS:], nines), f"env {env_id} newest"
+        # Untouched envs: oldest = 2 (shifted from previous newest), newest = 9.
+        for env_id in (1, 3):
+            assert torch.equal(stacked[env_id, ..., :CHANNELS], twos), f"env {env_id} oldest"
+            assert torch.equal(stacked[env_id, ..., CHANNELS:], nines), f"env {env_id} newest"
diff --git a/source/isaaclab_tasks/changelog.d/jmart-frame-stacking.minor.rst b/source/isaaclab_tasks/changelog.d/jmart-frame-stacking.minor.rst
@@ -0,0 +1,8 @@
+Added
+^^^^^
+
+* Added :class:`~isaaclab_tasks.direct.cartpole.cartpole_camera_presets_env.CartpoleCameraPresetsEnv`,
+  a subclass of :class:`~isaaclab_tasks.direct.cartpole.cartpole_camera_env.CartpoleCameraEnv` that
+  wires :class:`~isaaclab.envs.utils.FrameStackBuffer` into the ``Isaac-Cartpole-Camera-Presets-Direct-v0``
+  task. ``frame_stack`` defaults to ``2`` for the Newton + Warp combo and ``1`` otherwise;
+  CLI overrides via ``env.frame_stack=N`` are respected.
@@ -96,7 +96,7 @@
 
 gym.register(
     id="Isaac-Cartpole-Camera-Presets-Direct-v0",
-    entry_point=f"{__name__}.cartpole_camera_env:CartpoleCameraEnv",
+    entry_point=f"{__name__}.cartpole_camera_presets_env:CartpoleCameraPresetsEnv",
     disable_env_checker=True,
     kwargs={
         "env_cfg_entry_point": f"{__name__}.cartpole_camera_presets_env_cfg:CartpoleCameraPresetsEnvCfg",
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,3 +4,5 @@
		# SPDX-License-Identifier: BSD-3-Clause

		"""Sub-package for environment utils."""

		from .frame_stack import FrameStackBuffer