From 14cb7bc8d4e4fcf6dbda0bcb0ac5060bcb4f2404 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Thu, 30 Apr 2026 16:40:12 -0700
Subject: [PATCH 01/22] Updated Camera to use Warp Arrays instead of Torch

---
 docs/source/how-to/save_camera_output.rst     |  15 +-
 .../overview/core-concepts/sensors/camera.rst |  34 ++--
 scripts/benchmarks/benchmark_cameras.py       |  18 +-
 scripts/demos/sensors/cameras.py              |  23 ++-
 .../04_sensors/run_ray_caster_camera.py       |  13 +-
 .../tutorials/04_sensors/run_usd_camera.py    |  11 +-
 source/isaaclab/config/extension.toml         |   2 +-
 source/isaaclab/docs/CHANGELOG.rst            |  53 ++++++
 .../isaaclab/envs/mdp/observations.py         |   7 +-
 .../isaaclab/renderers/base_renderer.py       |   8 +-
 .../isaaclab/renderers/output_contract.py     |  14 +-
 .../isaaclab/sensors/camera/camera.py         | 134 +++++++++------
 .../isaaclab/sensors/camera/camera_data.py    |  37 +++--
 .../renderers/test_camera_output_contract.py  |  34 ++--
 source/isaaclab/test/sensors/test_camera.py   | 156 +++++++++---------
 .../test_first_frame_textured_rendering.py    |   5 +-
 .../test_multi_mesh_ray_caster_camera.py      |  24 +--
 .../test/sensors/test_multi_tiled_camera.py   |  58 +++----
 .../test/sensors/test_tiled_camera.py         |   6 +-
 .../test/test_scripts_torcharray_patterns.py  |  15 +-
 source/isaaclab/test/utils/test_math.py       |  40 ++++-
 source/isaaclab_contrib/config/extension.toml |   2 +-
 source/isaaclab_contrib/docs/CHANGELOG.rst    |  10 ++
 .../tacsl_sensor/visuotactile_sensor.py       |   2 +-
 source/isaaclab_newton/config/extension.toml  |   2 +-
 source/isaaclab_newton/docs/CHANGELOG.rst     |  27 +++
 .../renderers/newton_warp_renderer.py         |  93 ++++++-----
 source/isaaclab_ov/config/extension.toml      |   2 +-
 source/isaaclab_ov/docs/CHANGELOG.rst         |  12 ++
 .../isaaclab_ov/renderers/ovrtx_renderer.py   |  83 ++++------
 .../test/test_ovrtx_renderer_contract.py      |  22 +--
 source/isaaclab_physx/config/extension.toml   |   2 +-
 source/isaaclab_physx/docs/CHANGELOG.rst      |  14 ++
 .../renderers/isaac_rtx_renderer.py           |  40 ++---
 .../isaaclab_physx/sensors/pva/pva.py         |   4 +-
 source/isaaclab_tasks/config/extension.toml   |   2 +-
 source/isaaclab_tasks/docs/CHANGELOG.rst      |  22 +++
 .../direct/cartpole/cartpole_camera_env.py    |  14 +-
 .../cartpole_camera/cartpole_camera_env.py    |   6 +-
 .../manipulation/dexsuite/mdp/observations.py |   3 +-
 .../franka/stack_ik_rel_blueprint_env_cfg.py  |   7 +-
 .../test_visualizer_cartpole_integration.py   |  14 +-
 42 files changed, 664 insertions(+), 426 deletions(-)

diff --git a/docs/source/how-to/save_camera_output.rst b/docs/source/how-to/save_camera_output.rst
index 01b404efc0d8..a731ec159c62 100644
--- a/docs/source/how-to/save_camera_output.rst
+++ b/docs/source/how-to/save_camera_output.rst
@@ -58,14 +58,19 @@ PyTorch operations which allows faster computation.
 
 .. code-block:: python
 
+   import warp as wp
    from isaaclab.utils.math import transform_points, unproject_depth
 
-   # Pointcloud in world frame
-   points_3d_cam = unproject_depth(
-      camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
-   )
+   # Camera ``data.output`` and pose fields are ``wp.array`` values; lift them to torch
+   # tensors before invoking the torch-based math utilities below.
+   depth = wp.to_torch(camera.data.output["distance_to_image_plane"])
+   intrinsics = wp.to_torch(camera.data.intrinsic_matrices)
+   pos_w = wp.to_torch(camera.data.pos_w)
+   quat_w_ros = wp.to_torch(camera.data.quat_w_ros)
 
-   points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros)
+   # Pointcloud in world frame
+   points_3d_cam = unproject_depth(depth, intrinsics)
+   points_3d_world = transform_points(points_3d_cam, pos_w, quat_w_ros)
 
 Alternately, we can use the :meth:`isaaclab.sensors.camera.utils.create_pointcloud_from_depth` function
 to create a point cloud from the depth image and transform it to the world frame.
diff --git a/docs/source/overview/core-concepts/sensors/camera.rst b/docs/source/overview/core-concepts/sensors/camera.rst
index 9d7d7e0512c5..654ae69cac26 100644
--- a/docs/source/overview/core-concepts/sensors/camera.rst
+++ b/docs/source/overview/core-concepts/sensors/camera.rst
@@ -162,8 +162,13 @@ Accessing camera data
 
 .. code-block:: python
 
+    import warp as wp
+
     tiled_camera = Camera(cfg.tiled_camera)
-    data = tiled_camera.data.output["rgb"]  # shape: (num_cameras, H, W, 3), torch.uint8
+    # ``data.output`` entries are ``wp.array`` values (e.g. ``wp.uint8``); use
+    # :func:`warp.to_torch` when Torch tensor operations are required.
+    data_wp = tiled_camera.data.output["rgb"]  # shape: (num_cameras, H, W, 3), wp.uint8
+    data = wp.to_torch(data_wp)  # zero-copy torch.uint8 view
 
 The returned data has shape ``(num_cameras, height, width, num_channels)``, ready to use directly
 as an observation in RL training.
@@ -207,11 +212,12 @@ RGB and RGBA
     :figwidth: 100%
     :alt: A scene captured in RGB
 
-``rgb`` returns a 3-channel RGB image of type ``torch.uint8``, shape ``(B, H, W, 3)``.
+``rgb`` returns a 3-channel RGB image of type ``wp.uint8``, shape ``(B, H, W, 3)``.
 
-``rgba`` returns a 4-channel RGBA image of type ``torch.uint8``, shape ``(B, H, W, 4)``.
+``rgba`` returns a 4-channel RGBA image of type ``wp.uint8``, shape ``(B, H, W, 4)``.
 
-To convert to ``torch.float32``, divide by 255.0.
+Use :func:`warp.to_torch` to obtain a zero-copy ``torch.uint8`` view; divide by ``255.0``
+to convert to ``torch.float32``.
 
 Depth and Distances
 ~~~~~~~~~~~~~~~~~~~
@@ -222,10 +228,10 @@ Depth and Distances
     :alt: A scene captured as depth
 
 ``distance_to_camera`` returns a single-channel depth image with distance to the camera optical
-center, shape ``(B, H, W, 1)``, type ``torch.float32``.
+center, shape ``(B, H, W, 1)``, type ``wp.float32``.
 
 ``distance_to_image_plane`` returns distances of 3D points from the camera plane along the Z-axis,
-shape ``(B, H, W, 1)``, type ``torch.float32``.
+shape ``(B, H, W, 1)``, type ``wp.float32``.
 
 ``depth`` is an alias for ``distance_to_image_plane``.
 
@@ -238,14 +244,14 @@ Normals
     :alt: A scene captured with surface normals
 
 ``normals`` returns local surface normal vectors at each pixel, shape ``(B, H, W, 3)`` containing
-``(x, y, z)``, type ``torch.float32``.
+``(x, y, z)``, type ``wp.float32``.
 
 Motion Vectors
 ~~~~~~~~~~~~~~
 
 ``motion_vectors`` returns per-pixel motion vectors in image space between frames.
 Shape ``(B, H, W, 2)``: ``x`` is horizontal motion (positive = left), ``y`` is vertical motion
-(positive = up). Type ``torch.float32``.
+(positive = up). Type ``wp.float32``.
 
 Semantic Segmentation
 ~~~~~~~~~~~~~~~~~~~~~
@@ -259,8 +265,8 @@ Semantic Segmentation
 An ``info`` dictionary is available via ``tiled_camera.data.info['semantic_segmentation']``.
 
 - If ``colorize_semantic_segmentation=True``: 4-channel RGBA image, shape ``(B, H, W, 4)``,
-  type ``torch.uint8``. The ``idToLabels`` dict maps color to semantic label.
-- If ``colorize_semantic_segmentation=False``: shape ``(B, H, W, 1)``, type ``torch.int32``,
+  type ``wp.uint8``. The ``idToLabels`` dict maps color to semantic label.
+- If ``colorize_semantic_segmentation=False``: shape ``(B, H, W, 1)``, type ``wp.int32``,
   containing semantic IDs. The ``idToLabels`` dict maps ID to label.
 
 Instance ID Segmentation
@@ -274,9 +280,9 @@ Instance ID Segmentation
 ``instance_id_segmentation_fast`` outputs per-pixel instance IDs, unique per USD prim path.
 An ``info`` dictionary is available via ``tiled_camera.data.info['instance_id_segmentation_fast']``.
 
-- If ``colorize_instance_id_segmentation=True``: shape ``(B, H, W, 4)``, type ``torch.uint8``.
+- If ``colorize_instance_id_segmentation=True``: shape ``(B, H, W, 4)``, type ``wp.uint8``.
   The ``idToLabels`` dict maps color to USD prim path.
-- If ``colorize_instance_id_segmentation=False``: shape ``(B, H, W, 1)``, type ``torch.int32``.
+- If ``colorize_instance_id_segmentation=False``: shape ``(B, H, W, 1)``, type ``wp.int32``.
   The ``idToLabels`` dict maps instance ID to USD prim path.
 
 Instance Segmentation
@@ -292,8 +298,8 @@ to the lowest level with semantic labels (unlike ``instance_id_segmentation_fast
 goes to the leaf prim).
 An ``info`` dictionary is available via ``tiled_camera.data.info['instance_segmentation_fast']``.
 
-- If ``colorize_instance_segmentation=True``: shape ``(B, H, W, 4)``, type ``torch.uint8``.
-- If ``colorize_instance_segmentation=False``: shape ``(B, H, W, 1)``, type ``torch.int32``.
+- If ``colorize_instance_segmentation=True``: shape ``(B, H, W, 4)``, type ``wp.uint8``.
+- If ``colorize_instance_segmentation=False``: shape ``(B, H, W, 1)``, type ``wp.int32``.
 
 The ``idToLabels`` dict maps color to USD prim path. The ``idToSemantics`` dict maps color to
 semantic label.
diff --git a/scripts/benchmarks/benchmark_cameras.py b/scripts/benchmarks/benchmark_cameras.py
index 835fb980be0d..ac34e6445645 100644
--- a/scripts/benchmarks/benchmark_cameras.py
+++ b/scripts/benchmarks/benchmark_cameras.py
@@ -256,6 +256,7 @@
 import numpy as np
 import psutil
 import torch
+import warp as wp
 
 import isaaclab.sim as sim_utils
 from isaaclab.assets import RigidObject, RigidObjectCfg
@@ -635,7 +636,7 @@ def run_simulator(
         # Set camera world poses
         for camera_list in camera_lists:
             for camera in camera_list:
-                num_cameras = camera.data.intrinsic_matrices.size(0)
+                num_cameras = camera.data.intrinsic_matrices.shape[0]
                 positions = torch.tensor([[2.5, 2.5, 2.5]], device=sim.device).repeat(num_cameras, 1)
                 targets = torch.tensor([[0.0, 0.0, 0.0]], device=sim.device).repeat(num_cameras, 1)
                 camera.set_world_poses_from_view(positions, targets)
@@ -675,23 +676,24 @@ def run_simulator(
                     # Only update the camera if it hasn't been updated as part of scene_entities.update ...
                     camera.update(dt=sim.get_physics_dt())
 
+                # camera outputs and intrinsics are wp.array; lift to torch for math + collection
+                intrinsics_torch = wp.to_torch(camera.data.intrinsic_matrices)
+
                 for data_type in data_types:
                     data_label = f"{label}_{cam_idx}_{data_type}"
+                    output_torch = wp.to_torch(camera.data.output[data_type])
 
                     if depth_predicate(data_type):  # is a depth image, want to create cloud
-                        depth = camera.data.output[data_type]
+                        depth = output_torch
                         depth_images[data_label + "_raw"] = depth
                         if perspective_depth_predicate(data_type) and convert_depth_to_camera_to_image_plane:
-                            depth = orthogonalize_perspective_depth(
-                                camera.data.output[data_type], camera.data.intrinsic_matrices
-                            )
+                            depth = orthogonalize_perspective_depth(output_torch, intrinsics_torch)
                             depth_images[data_label + "_undistorted"] = depth
 
-                        pointcloud = unproject_depth(depth=depth, intrinsics=camera.data.intrinsic_matrices)
+                        pointcloud = unproject_depth(depth=depth, intrinsics=intrinsics_torch)
                         clouds[data_label] = pointcloud
                     else:  # rgb image, just save it
-                        image = camera.data.output[data_type]
-                        images[data_label] = image
+                        images[data_label] = output_torch
 
         # End timing for the step
         step_end_time = time.time()
diff --git a/scripts/demos/sensors/cameras.py b/scripts/demos/sensors/cameras.py
index 400881ac5d62..fa9b39e89ec9 100644
--- a/scripts/demos/sensors/cameras.py
+++ b/scripts/demos/sensors/cameras.py
@@ -44,6 +44,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
+import warp as wp
 
 import isaaclab.sim as sim_utils
 from isaaclab.assets import ArticulationCfg, AssetBaseCfg
@@ -239,8 +240,15 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
         # save every 10th image (for visualization purposes only)
         # note: saving images will slow down the simulation
         if count % 10 == 0:
+            # camera outputs are wp.array; lift to torch for image saving / matplotlib
+            camera_rgb = wp.to_torch(scene["camera"].data.output["rgb"])
+            tiled_rgb = wp.to_torch(scene["tiled_camera"].data.output["rgb"])
+            camera_depth = wp.to_torch(scene["camera"].data.output["distance_to_image_plane"])
+            tiled_depth = wp.to_torch(scene["tiled_camera"].data.output["distance_to_image_plane"])
+            raycast_depth = wp.to_torch(scene["raycast_camera"].data.output["distance_to_image_plane"])
+
             # compare generated RGB images across different cameras
-            rgb_images = [scene["camera"].data.output["rgb"][0, ..., :3], scene["tiled_camera"].data.output["rgb"][0]]
+            rgb_images = [camera_rgb[0, ..., :3], tiled_rgb[0]]
             save_images_grid(
                 rgb_images,
                 subtitles=["Camera"],
@@ -250,9 +258,9 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
 
             # compare generated Depth images across different cameras
             depth_images = [
-                scene["camera"].data.output["distance_to_image_plane"][0],
-                scene["tiled_camera"].data.output["distance_to_image_plane"][0, ..., 0],
-                scene["raycast_camera"].data.output["distance_to_image_plane"][0],
+                camera_depth[0],
+                tiled_depth[0, ..., 0],
+                raycast_depth[0],
             ]
             save_images_grid(
                 depth_images,
@@ -263,16 +271,15 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
             )
 
             # save all tiled RGB images
-            tiled_images = scene["tiled_camera"].data.output["rgb"]
             save_images_grid(
-                tiled_images,
-                subtitles=[f"Cam{i}" for i in range(tiled_images.shape[0])],
+                tiled_rgb,
+                subtitles=[f"Cam{i}" for i in range(tiled_rgb.shape[0])],
                 title="Tiled RGB Image",
                 filename=os.path.join(output_dir, "tiled_rgb", f"{count:04d}.jpg"),
             )
 
             # save all camera RGB images
-            cam_images = scene["camera"].data.output["rgb"][..., :3]
+            cam_images = camera_rgb[..., :3]
             save_images_grid(
                 cam_images,
                 subtitles=[f"Cam{i}" for i in range(cam_images.shape[0])],
diff --git a/scripts/tutorials/04_sensors/run_ray_caster_camera.py b/scripts/tutorials/04_sensors/run_ray_caster_camera.py
index e19b0ebfa4df..b2664aaeb9b7 100644
--- a/scripts/tutorials/04_sensors/run_ray_caster_camera.py
+++ b/scripts/tutorials/04_sensors/run_ray_caster_camera.py
@@ -38,6 +38,7 @@
 import os
 
 import torch
+import warp as wp
 
 import omni.replicator.core as rep
 
@@ -144,17 +145,17 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
             rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
             rep_writer.write(rep_output)
 
-            # Pointcloud in world frame
-            points_3d_cam = unproject_depth(
-                camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
-            )
+            # Pointcloud in world frame; convert wp.array camera outputs to torch for math ops
+            depth_torch = wp.to_torch(camera.data.output["distance_to_image_plane"])
+            intrinsics_torch = wp.to_torch(camera.data.intrinsic_matrices)
+            points_3d_cam = unproject_depth(depth_torch, intrinsics_torch)
 
             # Check methods are valid
             im_height, im_width = camera.image_shape
             # -- project points to (u, v, d)
-            reproj_points = project_points(points_3d_cam, camera.data.intrinsic_matrices)
+            reproj_points = project_points(points_3d_cam, intrinsics_torch)
             reproj_depths = reproj_points[..., -1].view(-1, im_width, im_height).transpose_(1, 2)
-            sim_depths = camera.data.output["distance_to_image_plane"].squeeze(-1)
+            sim_depths = depth_torch.squeeze(-1)
             torch.testing.assert_close(reproj_depths, sim_depths)
 
 
diff --git a/scripts/tutorials/04_sensors/run_usd_camera.py b/scripts/tutorials/04_sensors/run_usd_camera.py
index 5c041d737c5d..c9b9b3be60d8 100644
--- a/scripts/tutorials/04_sensors/run_usd_camera.py
+++ b/scripts/tutorials/04_sensors/run_usd_camera.py
@@ -65,6 +65,7 @@
 
 import numpy as np
 import torch
+import warp as wp
 from isaaclab_physx.renderers import IsaacRtxRendererCfg
 
 import omni.replicator.core as rep
@@ -254,12 +255,12 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
             and args_cli.draw
             and "distance_to_image_plane" in camera.data.output.keys()
         ):
-            # Derive pointcloud from camera at camera_index
+            # Derive pointcloud from camera at camera_index; lift wp.array fields to torch
             pointcloud = create_pointcloud_from_depth(
-                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
-                depth=camera.data.output["distance_to_image_plane"][camera_index],
-                position=camera.data.pos_w[camera_index],
-                orientation=camera.data.quat_w_ros[camera_index],
+                intrinsic_matrix=wp.to_torch(camera.data.intrinsic_matrices)[camera_index],
+                depth=wp.to_torch(camera.data.output["distance_to_image_plane"])[camera_index],
+                position=wp.to_torch(camera.data.pos_w)[camera_index],
+                orientation=wp.to_torch(camera.data.quat_w_ros)[camera_index],
                 device=sim.device,
             )
 
diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml
index 551e1ff12bf5..322e44e742c9 100644
--- a/source/isaaclab/config/extension.toml
+++ b/source/isaaclab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "4.6.22"
+version = "4.6.23"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst
index 0731a7b2d3d5..d159ae64b7e1 100644
--- a/source/isaaclab/docs/CHANGELOG.rst
+++ b/source/isaaclab/docs/CHANGELOG.rst
@@ -1,6 +1,59 @@
 Changelog
 ---------
 
+4.6.23 (2026-04-30)
+~~~~~~~~~~~~~~~~~~~
+
+Fixed
+^^^^^
+
+* Fixed :class:`~isaaclab.envs.mdp.observations.image` (and the equivalent
+  ``image()`` helper in the Franka stack ``stack_ik_rel_blueprint`` config) so
+  that Torch tensor operations are applied via :func:`warp.to_torch` rather than
+  invoked directly on the new ``wp.array`` camera outputs.
+* Fixed downstream consumers (camera tutorials, ``demos/sensors/cameras.py``,
+  ``benchmarks/benchmark_cameras.py``, dexsuite ``vision_camera`` observation,
+  visualizer integration test, ``save_camera_output`` how-to and the camera
+  overview docs) to lift ``wp.array`` camera fields to torch tensors via
+  :func:`warp.to_torch` before performing Torch operations.
+* Fixed the camera and test suites to use ``wp.array`` dtypes
+  (``wp.uint8``, ``wp.float32``, ``wp.int32``) and :func:`warp.to_torch` views
+  in assertions on ``CameraData.output``, ``CameraData.intrinsic_matrices``,
+  ``CameraData.pos_w``/``quat_w_*`` and ``Camera.frame``.
+* Fixed :class:`~isaaclab.renderers.NewtonWarpRenderer` to populate the ``rgb``
+  output buffer when both ``rgb`` and ``rgba`` are requested, restoring the
+  legacy "rgb mirrors rgba" behavior that broke when ``rgb`` and ``rgba``
+  became independent ``wp.array`` allocations.
+
+Changed
+^^^^^^^
+
+* Tightened :class:`~isaaclab.renderers.RenderBufferSpec` ``dtype`` annotation
+  from ``Any`` to ``type`` to document that all renderers must publish Warp
+  scalar dtype classes (e.g. ``warp.uint8``).
+* Removed the transitional ``torch.dtype → wp.dtype`` shim in
+  :meth:`~isaaclab.sensors.camera.CameraData.allocate` now that all in-tree
+  renderers publish ``wp`` dtypes via :class:`~isaaclab.renderers.RenderBufferSpec`.
+* Documented the transitional Torch input fallback on
+  :func:`~isaaclab.utils.math.convert_camera_frame_orientation_convention` and
+  consolidated the redundant ``wp ↔ torch`` round-trips in
+  :meth:`~isaaclab.sensors.camera.Camera.set_world_poses` and
+  :meth:`~isaaclab.sensors.camera.Camera.set_world_poses_from_view`.
+* Changed :class:`~isaaclab.sensors.camera.CameraData` array fields and
+  :attr:`~isaaclab.sensors.camera.CameraData.output` buffers to expose
+  ``wp.array`` values instead of :class:`torch.Tensor` values. Use
+  :func:`warp.to_torch` where Torch tensor operations are required.
+* Changed :class:`~isaaclab.sensors.camera.Camera` pose, intrinsic, and frame
+  array APIs to accept or return ``wp.array`` values instead of
+  :class:`torch.Tensor` values. Existing Torch inputs are still accepted during
+  the transition; prefer ``wp.array`` at public call sites.
+* Changed :class:`~isaaclab.renderers.BaseRenderer` output and camera-update
+  APIs to exchange ``wp.array`` buffers with camera sensors.
+* Changed :func:`~isaaclab.utils.math.convert_camera_frame_orientation_convention`
+  to accept and return ``wp.array`` quaternion arrays. Use :func:`warp.to_torch`
+  where Torch tensor operations are required.
+
+
 4.6.22 (2026-04-27)
 ~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab/isaaclab/envs/mdp/observations.py b/source/isaaclab/isaaclab/envs/mdp/observations.py
index 4fc24fc5b944..d0ec31065e35 100644
--- a/source/isaaclab/isaaclab/envs/mdp/observations.py
+++ b/source/isaaclab/isaaclab/envs/mdp/observations.py
@@ -14,6 +14,7 @@
 from typing import TYPE_CHECKING
 
 import torch
+import warp as wp
 
 import isaaclab.utils.math as math_utils
 from isaaclab.managers import SceneEntityCfg
@@ -402,12 +403,12 @@ def image(
     # extract the used quantities (to enable type-hinting)
     sensor: Camera | RayCasterCamera = env.scene.sensors[sensor_cfg.name]
 
-    # obtain the input image
-    images = sensor.data.output[data_type]
+    # obtain the input image; camera outputs are wp.array, lift to torch for tensor ops
+    images = wp.to_torch(sensor.data.output[data_type])
 
     # depth image conversion
     if (data_type == "distance_to_camera") and convert_perspective_to_orthogonal:
-        images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices)
+        images = math_utils.orthogonalize_perspective_depth(images, wp.to_torch(sensor.data.intrinsic_matrices))
 
     # rgb/depth/normals image normalization
     if normalize:
diff --git a/source/isaaclab/isaaclab/renderers/base_renderer.py b/source/isaaclab/isaaclab/renderers/base_renderer.py
index a8cba01ca6c6..d43d881d97fd 100644
--- a/source/isaaclab/isaaclab/renderers/base_renderer.py
+++ b/source/isaaclab/isaaclab/renderers/base_renderer.py
@@ -13,7 +13,7 @@
 from .output_contract import RenderBufferKind, RenderBufferSpec
 
 if TYPE_CHECKING:
-    import torch
+    import warp as wp
 
     from isaaclab.sensors import SensorBase
     from isaaclab.sensors.camera.camera_data import CameraData
@@ -65,13 +65,13 @@ def create_render_data(self, sensor: SensorBase) -> Any:
         pass
 
     @abstractmethod
-    def set_outputs(self, render_data: Any, output_data: dict[str, torch.Tensor]) -> None:
+    def set_outputs(self, render_data: Any, output_data: dict[str, wp.array]) -> None:
         """Store reference to output buffers for writing during render.
 
         Args:
             render_data: The render data object from :meth:`create_render_data`.
             output_data: Dictionary mapping output names (e.g. ``"rgb"``, ``"depth"``)
-                to pre-allocated tensors where rendered data will be written.
+                to pre-allocated Warp arrays where rendered data will be written.
         """
         pass
 
@@ -85,7 +85,7 @@ def update_transforms(self) -> None:
 
     @abstractmethod
     def update_camera(
-        self, render_data: Any, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor
+        self, render_data: Any, positions: wp.array, orientations: wp.array, intrinsics: wp.array
     ) -> None:
         """Update camera poses and intrinsics for the next render.
 
diff --git a/source/isaaclab/isaaclab/renderers/output_contract.py b/source/isaaclab/isaaclab/renderers/output_contract.py
index bfa3fff41d8b..8a9e78bd4d67 100644
--- a/source/isaaclab/isaaclab/renderers/output_contract.py
+++ b/source/isaaclab/isaaclab/renderers/output_contract.py
@@ -14,10 +14,6 @@
 
 from dataclasses import dataclass
 from enum import StrEnum
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    import torch
 
 
 class RenderBufferKind(StrEnum):
@@ -48,7 +44,11 @@ class RenderBufferSpec:
     """Per-pixel layout (channels + dtype) for one render buffer kind."""
 
     channels: int
-    """Number of per-pixel channels (last dimension of the allocated tensor)."""
+    """Number of per-pixel channels (last dimension of the allocated array)."""
+
+    dtype: type
+    """Warp dtype the renderer writes for this render buffer kind.
 
-    dtype: torch.dtype
-    """Torch dtype the renderer writes for this render buffer kind."""
+    Must be a Warp scalar dtype class (e.g. ``warp.uint8``, ``warp.float32``,
+    ``warp.int32``) accepted by :func:`warp.zeros`.
+    """
diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index 2244d191db72..1243788eed7e 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -168,7 +168,7 @@ def data(self) -> CameraData:
         return self._data
 
     @property
-    def frame(self) -> torch.tensor:
+    def frame(self) -> wp.array:
         """Frame number when the measurement took place."""
         return self._frame
 
@@ -182,7 +182,7 @@ def image_shape(self) -> tuple[int, int]:
     """
 
     def set_intrinsic_matrices(
-        self, matrices: torch.Tensor, focal_length: float | None = None, env_ids: Sequence[int] | None = None
+        self, matrices: wp.array, focal_length: float | None = None, env_ids: Sequence[int] | wp.array | None = None
     ):
         """Set parameters of the USD camera from its intrinsic matrix.
 
@@ -209,11 +209,17 @@ def set_intrinsic_matrices(
         # resolve env_ids
         if env_ids is None:
             env_ids = self._ALL_INDICES
-        # convert matrices to numpy tensors
-        if isinstance(matrices, torch.Tensor):
+        elif isinstance(env_ids, wp.array):
+            env_ids = env_ids.numpy()
+
+        # convert matrices to numpy arrays for USD helpers
+        if isinstance(matrices, wp.array):
+            matrices = matrices.numpy()
+        elif isinstance(matrices, torch.Tensor):
             matrices = matrices.cpu().numpy()
         else:
             matrices = np.asarray(matrices, dtype=float)
+
         # iterate over env_ids
         for i, intrinsic_matrix in zip(env_ids, matrices):
             height, width = self.image_shape
@@ -244,9 +250,9 @@ def set_intrinsic_matrices(
 
     def set_world_poses(
         self,
-        positions: torch.Tensor | None = None,
-        orientations: torch.Tensor | None = None,
-        env_ids: Sequence[int] | None = None,
+        positions: wp.array | None = None,
+        orientations: wp.array | None = None,
+        env_ids: Sequence[int] | wp.array | None = None,
         convention: Literal["opengl", "ros", "world"] = "ros",
     ):
         r"""Set the pose of the camera w.r.t. the world frame using specified convention.
@@ -272,35 +278,39 @@ def set_world_poses(
         Raises:
             RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
         """
-        # resolve env_ids
+        # resolve env_ids (None defaults to all sensors)
         if env_ids is None:
             env_ids = self._ALL_INDICES
-        # convert to backend tensor
+
+        # convert to backend warp array
         if positions is not None:
-            if isinstance(positions, np.ndarray):
-                positions = torch.from_numpy(positions).to(device=self._device)
-            elif not isinstance(positions, torch.Tensor):
-                positions = torch.tensor(positions, device=self._device)
+            if isinstance(positions, torch.Tensor):
+                positions = wp.from_torch(positions, dtype=wp.vec3, device=self._device)
+            elif not isinstance(positions, wp.array):
+                positions = wp.array(positions, dtype=wp.vec3, device=self._device)
+
         # convert rotation matrix from input convention to OpenGL
         if orientations is not None:
-            if isinstance(orientations, np.ndarray):
-                orientations = torch.from_numpy(orientations).to(device=self._device)
-            elif not isinstance(orientations, torch.Tensor):
-                orientations = torch.tensor(orientations, device=self._device)
-            orientations = convert_camera_frame_orientation_convention(orientations, origin=convention, target="opengl")
-        # convert torch tensors to warp arrays for the view
-        pos_wp = wp.from_torch(positions.contiguous()) if positions is not None else None
-        ori_wp = wp.from_torch(orientations.contiguous()) if orientations is not None else None
+            if isinstance(orientations, torch.Tensor):
+                orientations = wp.from_torch(orientations, dtype=wp.quat, device=self._device)
+            elif not isinstance(orientations, wp.array):
+                orientations = wp.array(orientations, dtype=wp.quat, device=self._device)
+            orientations = wp.from_torch(
+                convert_camera_frame_orientation_convention(
+                    wp.to_torch(orientations), origin=convention, target="opengl"
+                )
+            )
+
         if env_ids is not None:
-            if not isinstance(env_ids, torch.Tensor):
-                env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device)
-            idx_wp = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32)
-        else:
-            idx_wp = None
-        self._view.set_world_poses(pos_wp, ori_wp, idx_wp)
+            if isinstance(env_ids, torch.Tensor):
+                env_ids = wp.from_torch(env_ids, dtype=wp.int32, device=self._device)
+            elif not isinstance(env_ids, wp.array):
+                env_ids = wp.array(env_ids, dtype=wp.int32, device=self._device)
+
+        self._view.set_world_poses(positions, orientations, env_ids)
 
     def set_world_poses_from_view(
-        self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None
+        self, eyes: wp.array, targets: wp.array, env_ids: Sequence[int] | wp.array | None = None
     ):
         """Set the poses of the camera from the eye position and look-at target position.
 
@@ -319,11 +329,16 @@ def set_world_poses_from_view(
         # get up axis of current stage
         up_axis = UsdGeom.GetStageUpAxis(self.stage)
         # set camera poses using the view
-        orientations = quat_from_matrix(create_rotation_matrix_from_view(eyes, targets, up_axis, device=self._device))
-        if not isinstance(env_ids, torch.Tensor):
-            env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device)
-        idx_wp = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32)
-        self._view.set_world_poses(wp.from_torch(eyes.contiguous()), wp.from_torch(orientations.contiguous()), idx_wp)
+        orientations = quat_from_matrix(
+            create_rotation_matrix_from_view(wp.to_torch(eyes), wp.to_torch(targets), up_axis, device=self._device)
+        )
+
+        if isinstance(env_ids, torch.Tensor):
+            env_ids = wp.from_torch(env_ids, dtype=wp.int32, device=self._device)
+        elif not isinstance(env_ids, wp.array):
+            env_ids = wp.array(env_ids, dtype=wp.int32, device=self._device)
+
+        self._view.set_world_poses(eyes, orientations, env_ids)
 
     """
     Operations
@@ -338,14 +353,15 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None
         super().reset(env_ids, env_mask)
         # resolve to indices for torch indexing
         if env_ids is None and env_mask is not None:
-            env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1)
+            env_ids = wp.from_torch(wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1))
         elif env_ids is None:
             env_ids = self._ALL_INDICES
         # reset the data
         # note: this recomputation is useful if one performs events such as randomizations on the camera poses.
         self._update_poses(env_ids)
         # Reset the frame count
-        self._frame[env_ids] = 0
+        frame_torch = wp.to_torch(self._frame)
+        frame_torch[env_ids] = 0
 
     """
     Implementation.
@@ -385,9 +401,9 @@ def _initialize_impl(self):
             )
 
         # Create all env_ids buffer
-        self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
+        self._ALL_INDICES = wp.array(np.arange(self._view.count), device=self._device, dtype=wp.int32)
         # Create frame count buffer
-        self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long)
+        self._frame = wp.zeros(self._view.count, device=self._device, dtype=wp.int64)
 
         # Convert all encapsulated prims to Camera
         for cam_prim in self._view.prims:
@@ -409,8 +425,11 @@ def _update_buffers_impl(self, env_mask: wp.array):
         env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1)
         if len(env_ids) == 0:
             return
+
         # Increment frame count
-        self._frame[env_ids] += 1
+        frame_torch = wp.to_torch(self._frame)
+        frame_torch[env_ids] += 1
+
         # update latest camera pose if requested
         if self.cfg.update_latest_camera_pose:
             self._update_poses(env_ids)
@@ -427,7 +446,7 @@ def _update_buffers_impl(self, env_mask: wp.array):
     def _check_supported_data_types(self, cfg: CameraCfg):
         """Checks if the data types are supported by the ray-caster camera."""
         # check if there is any intersection in unsupported types
-        # reason: these use np structured data types which we can't yet convert to torch tensor
+        # reason: these use np structured data types which we can't yet convert to Warp arrays
         common_elements = set(cfg.data_types) & Camera.UNSUPPORTED_TYPES
         if common_elements:
             # provide alternative fast counterparts
@@ -439,7 +458,7 @@ def _check_supported_data_types(self, cfg: CameraCfg):
             raise ValueError(
                 f"Camera class does not support the following sensor types: {common_elements}."
                 "\n\tThis is because these sensor types output numpy structured data types which"
-                "can't be converted to torch tensors easily."
+                " can't be converted to Warp arrays easily."
                 "\n\tHint: If you need to work with these sensor types, we recommend using their fast counterparts."
                 f"\n\t\tFast counterparts: {fast_common_elements}"
             )
@@ -474,14 +493,14 @@ def _create_buffers(self):
         )
         # Camera-frame state (pose / intrinsics) is owned by the camera, not
         # the renderer: populate it on the freshly constructed ``CameraData``.
-        self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device)
+        self._data.intrinsic_matrices = wp.zeros((self._view.count, 3, 3), dtype=wp.float32, device=self._device)
         self._update_intrinsic_matrices(self._ALL_INDICES)
-        self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device)
-        self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device)
+        self._data.pos_w = wp.zeros((self._view.count, 3), dtype=wp.float32, device=self._device)
+        self._data.quat_w_world = wp.zeros((self._view.count, 4), dtype=wp.float32, device=self._device)
         self._update_poses(self._ALL_INDICES)
         self._renderer.set_outputs(self._render_data, self._data.output)
 
-    def _update_intrinsic_matrices(self, env_ids: Sequence[int]):
+    def _update_intrinsic_matrices(self, env_ids: Sequence[int] | wp.array):
         """Compute camera's matrix of intrinsic parameters.
 
         Also called calibration matrix. This matrix works for linear depth images. We assume square pixels.
@@ -491,6 +510,9 @@ def _update_intrinsic_matrices(self, env_ids: Sequence[int]):
             The coordinates of points on the image plane are in the homogeneous representation.
         """
         # iterate over all cameras
+        if isinstance(env_ids, wp.array):
+            env_ids = wp.to_torch(env_ids)
+        intrinsic_matrices = wp.to_torch(self._data.intrinsic_matrices)
         for i in env_ids:
             # Get corresponding sensor prim
             sensor_prim = self._sensor_prims[i]
@@ -507,13 +529,13 @@ def _update_intrinsic_matrices(self, env_ids: Sequence[int]):
             c_x = width * 0.5
             c_y = height * 0.5
             # create intrinsic matrix for depth linear
-            self._data.intrinsic_matrices[i, 0, 0] = f_x
-            self._data.intrinsic_matrices[i, 0, 2] = c_x
-            self._data.intrinsic_matrices[i, 1, 1] = f_y
-            self._data.intrinsic_matrices[i, 1, 2] = c_y
-            self._data.intrinsic_matrices[i, 2, 2] = 1
+            intrinsic_matrices[i, 0, 0] = f_x
+            intrinsic_matrices[i, 0, 2] = c_x
+            intrinsic_matrices[i, 1, 1] = f_y
+            intrinsic_matrices[i, 1, 2] = c_y
+            intrinsic_matrices[i, 2, 2] = 1
 
-    def _update_poses(self, env_ids: Sequence[int]):
+    def _update_poses(self, env_ids: Sequence[int] | wp.array):
         """Computes the pose of the camera in the world frame with ROS convention.
 
         This methods uses the ROS convention to resolve the input pose. In this convention,
@@ -527,13 +549,17 @@ def _update_poses(self, env_ids: Sequence[int]):
             raise RuntimeError("Camera prim is None. Please call 'sim.play()' first.")
 
         # get the poses from the view (returns ProxyArray, use .torch for tensor access)
-        if env_ids is not None and not isinstance(env_ids, torch.Tensor):
+        if isinstance(env_ids, wp.array):
+            env_ids = wp.to_torch(env_ids)
+        elif env_ids is not None and not isinstance(env_ids, torch.Tensor):
             env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device)
         indices = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32) if env_ids is not None else None
         pos_w, quat_w = self._view.get_world_poses(indices)
-        self._data.pos_w[env_ids] = pos_w.torch
-        self._data.quat_w_world[env_ids] = convert_camera_frame_orientation_convention(
-            quat_w.torch, origin="opengl", target="world"
+        pos_w_torch = wp.to_torch(self._data.pos_w)
+        quat_w_world_torch = wp.to_torch(self._data.quat_w_world)
+        pos_w_torch[env_ids] = pos_w.torch
+        quat_w_world_torch[env_ids] = convert_camera_frame_orientation_convention(
+            wp.to_torch(quat_w.warp), origin="opengl", target="world"
         )
         # notify renderer of updated poses (guarded in case called before initialization completes)
         if self._render_data is not None:
diff --git a/source/isaaclab/isaaclab/sensors/camera/camera_data.py b/source/isaaclab/isaaclab/sensors/camera/camera_data.py
index 6ec5484531b2..62f436175d2b 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera_data.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera_data.py
@@ -8,7 +8,7 @@
 from dataclasses import dataclass
 from typing import Any
 
-import torch
+import warp as wp
 
 # Re-exported as part of the public isaaclab.sensors.camera API
 from isaaclab.renderers.output_contract import RenderBufferKind, RenderBufferSpec
@@ -25,13 +25,13 @@ class CameraData:
     # Frame state.
     ##
 
-    pos_w: torch.Tensor = None
+    pos_w: wp.array = None
     """Position of the sensor origin in world frame, following ROS convention.
 
     Shape is (N, 3) where N is the number of sensors.
     """
 
-    quat_w_world: torch.Tensor = None
+    quat_w_world: wp.array = None
     """Quaternion orientation `(x, y, z, w)` of the sensor origin in world frame, following the world coordinate frame
 
     .. note::
@@ -47,13 +47,13 @@ class CameraData:
     image_shape: tuple[int, int] = None
     """A tuple containing (height, width) of the camera sensor."""
 
-    intrinsic_matrices: torch.Tensor = None
+    intrinsic_matrices: wp.array = None
     """The intrinsic matrices for the camera.
 
     Shape is (N, 3, 3) where N is the number of sensors.
     """
 
-    output: dict[str, torch.Tensor] = None
+    output: dict[str, wp.array] = None
     """The retrieved sensor data with sensor types as key.
 
     The format of the data is available in the `Replicator Documentation`_. For semantic-based data,
@@ -77,12 +77,12 @@ def allocate(
         height: int,
         width: int,
         num_views: int,
-        device: torch.device | str,
+        device: str,
         supported_specs: dict[RenderBufferKind, RenderBufferSpec],
     ) -> CameraData:
         """Build a :class:`CameraData` with output buffers pre-allocated.
 
-        Allocates one ``(num_views, height, width, channels)`` tensor per kind
+        Allocates one ``(num_views, height, width, channels)`` Warp array per kind
         in the intersection of ``data_types`` and ``supported_specs``, using
         the channels and dtype from each :class:`RenderBufferSpec`.
 
@@ -92,7 +92,7 @@ def allocate(
             height: Image height in pixels.
             width: Image width in pixels.
             num_views: Number of camera views (batch dimension).
-            device: Torch device on which to allocate the buffers.
+            device: Device on which to allocate the buffers.
             supported_specs: Per-buffer layout the active renderer can produce,
                 keyed by :class:`RenderBufferKind`. Names absent from this mapping
                 are not allocated.
@@ -124,19 +124,20 @@ def allocate(
         if rgb_alias:
             requested.update({RenderBufferKind.RGB, RenderBufferKind.RGBA})
 
-        buffers: dict[str, torch.Tensor] = {}
+        buffers: dict[str, wp.array] = {}
         for name, spec in supported_specs.items():
             if name not in requested:
                 continue
             if rgb_alias and name == RenderBufferKind.RGB:
                 continue
-            buffers[str(name)] = torch.zeros(
+            buffers[str(name)] = wp.zeros(
                 (num_views, height, width, spec.channels),
                 dtype=spec.dtype,
                 device=device,
-            ).contiguous()
+            )
         if rgb_alias:
-            buffers[str(RenderBufferKind.RGB)] = buffers[str(RenderBufferKind.RGBA)][..., :3]
+            torch_rgba = wp.to_torch(buffers[str(RenderBufferKind.RGBA)])
+            buffers[str(RenderBufferKind.RGB)] = wp.from_torch(torch_rgba[..., :3])
 
         return cls(
             image_shape=(height, width),
@@ -149,7 +150,7 @@ def allocate(
     ##
 
     @property
-    def quat_w_ros(self) -> torch.Tensor:
+    def quat_w_ros(self) -> wp.array:
         """Quaternion orientation `(x, y, z, w)` of the sensor origin in the world frame, following ROS convention.
 
         .. note::
@@ -157,10 +158,12 @@ def quat_w_ros(self) -> torch.Tensor:
 
         Shape is (N, 4) where N is the number of sensors.
         """
-        return convert_camera_frame_orientation_convention(self.quat_w_world, origin="world", target="ros")
+        return wp.from_torch(
+            convert_camera_frame_orientation_convention(wp.to_torch(self.quat_w_world), origin="world", target="ros")
+        )
 
     @property
-    def quat_w_opengl(self) -> torch.Tensor:
+    def quat_w_opengl(self) -> wp.array:
         """Quaternion orientation `(x, y, z, w)` of the sensor origin in the world frame, following
         Opengl / USD Camera convention.
 
@@ -169,4 +172,6 @@ def quat_w_opengl(self) -> torch.Tensor:
 
         Shape is (N, 4) where N is the number of sensors.
         """
-        return convert_camera_frame_orientation_convention(self.quat_w_world, origin="world", target="opengl")
+        return wp.from_torch(
+            convert_camera_frame_orientation_convention(wp.to_torch(self.quat_w_world), origin="world", target="opengl")
+        )
diff --git a/source/isaaclab/test/renderers/test_camera_output_contract.py b/source/isaaclab/test/renderers/test_camera_output_contract.py
index 2d6087d29708..c7b188b2903c 100644
--- a/source/isaaclab/test/renderers/test_camera_output_contract.py
+++ b/source/isaaclab/test/renderers/test_camera_output_contract.py
@@ -8,7 +8,7 @@
 import warnings
 
 import pytest
-import torch
+import warp as wp
 
 pytest.importorskip("isaaclab_physx")
 
@@ -144,14 +144,14 @@ def _make_camera_cfg(data_types: list[str]) -> CameraCfg:
     )
 
 
-def test_camera_data_allocates_supported_subset_and_aliases_rgb():
-    """CameraData allocates the intersection of requested + supported and aliases rgb into rgba."""
+def test_camera_data_allocates_supported_subset_and_rgb():
+    """CameraData allocates the intersection of requested + supported output arrays."""
     cfg = _make_camera_cfg(["rgb", "rgba", "depth"])
     specs = {
-        RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-        RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-        RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
-        RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32),
+        RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+        RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+        RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
+        RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32),
     }
     data = CameraData.allocate(
         data_types=cfg.data_types, height=8, width=16, num_views=2, device="cpu", supported_specs=specs
@@ -159,10 +159,10 @@ def test_camera_data_allocates_supported_subset_and_aliases_rgb():
 
     assert set(data.output.keys()) == {"rgba", "rgb", "depth"}
     assert data.output["rgba"].shape == (2, 8, 16, 4)
-    assert data.output["rgba"].dtype == torch.uint8
+    assert data.output["rgba"].dtype == wp.uint8
     assert data.output["depth"].shape == (2, 8, 16, 1)
-    assert data.output["depth"].dtype == torch.float32
-    assert data.output["rgb"].data_ptr() == data.output["rgba"].data_ptr()
+    assert data.output["depth"].dtype == wp.float32
+    assert isinstance(data.output["rgb"], wp.array)
     assert data.image_shape == (8, 16)
     assert data.info == {"rgba": None, "rgb": None, "depth": None}
 
@@ -171,8 +171,8 @@ def test_camera_data_drops_requested_types_not_in_supported_specs():
     """Requested types absent from supported_specs are absent from data.output."""
     cfg = _make_camera_cfg(["rgb", "normals"])
     specs = {
-        RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-        RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
+        RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+        RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
     }
     data = CameraData.allocate(
         data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=specs
@@ -196,8 +196,8 @@ def test_camera_data_no_arg_construction_yields_empty_container():
 def test_camera_data_segmentation_dtype_follows_supported_spec():
     """CameraData consumes the layout dtype declared by the renderer spec."""
     cfg = _make_camera_cfg(["instance_segmentation_fast"])
-    raw_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(1, torch.int32)}
-    colorized_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(4, torch.uint8)}
+    raw_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(1, wp.int32)}
+    colorized_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(4, wp.uint8)}
 
     raw = CameraData.allocate(
         data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=raw_specs
@@ -206,15 +206,15 @@ def test_camera_data_segmentation_dtype_follows_supported_spec():
         data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=colorized_specs
     )
 
-    assert raw.output["instance_segmentation_fast"].dtype == torch.int32
+    assert raw.output["instance_segmentation_fast"].dtype == wp.int32
     assert raw.output["instance_segmentation_fast"].shape == (1, 4, 4, 1)
-    assert colorized.output["instance_segmentation_fast"].dtype == torch.uint8
+    assert colorized.output["instance_segmentation_fast"].dtype == wp.uint8
     assert colorized.output["instance_segmentation_fast"].shape == (1, 4, 4, 4)
 
 
 def test_camera_data_allocate_raises_on_unknown_name():
     """An unknown data_types name raises ValueError naming the offender."""
-    supported_specs = {RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8)}
+    supported_specs = {RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8)}
     with pytest.raises(ValueError) as exc_info:
         CameraData.allocate(
             data_types=["not_a_real_type"],
diff --git a/source/isaaclab/test/sensors/test_camera.py b/source/isaaclab/test/sensors/test_camera.py
index daed8e95773d..659784834154 100644
--- a/source/isaaclab/test/sensors/test_camera.py
+++ b/source/isaaclab/test/sensors/test_camera.py
@@ -22,6 +22,7 @@
 import pytest
 import scipy.spatial.transform as tf
 import torch
+import warp as wp
 
 import omni.replicator.core as rep
 from pxr import Gf, Usd, UsdGeom
@@ -192,11 +193,13 @@ def test_camera_init_offset(setup_sim_camera):
         rtol=1e-5,
     )
 
-    # check if transform correctly set in output
-    np.testing.assert_allclose(camera_ros.data.pos_w[0].cpu().numpy(), cam_cfg_offset_ros.offset.pos, rtol=1e-5)
-    np.testing.assert_allclose(camera_ros.data.quat_w_ros[0].cpu().numpy(), QUAT_ROS, rtol=1e-5)
-    np.testing.assert_allclose(camera_ros.data.quat_w_opengl[0].cpu().numpy(), QUAT_OPENGL, rtol=1e-5)
-    np.testing.assert_allclose(camera_ros.data.quat_w_world[0].cpu().numpy(), QUAT_WORLD, rtol=1e-5)
+    # check if transform correctly set in output (camera pose fields are wp.array)
+    np.testing.assert_allclose(
+        wp.to_torch(camera_ros.data.pos_w)[0].cpu().numpy(), cam_cfg_offset_ros.offset.pos, rtol=1e-5
+    )
+    np.testing.assert_allclose(wp.to_torch(camera_ros.data.quat_w_ros)[0].cpu().numpy(), QUAT_ROS, rtol=1e-5)
+    np.testing.assert_allclose(wp.to_torch(camera_ros.data.quat_w_opengl)[0].cpu().numpy(), QUAT_OPENGL, rtol=1e-5)
+    np.testing.assert_allclose(wp.to_torch(camera_ros.data.quat_w_world)[0].cpu().numpy(), QUAT_WORLD, rtol=1e-5)
 
 
 def test_multi_camera_init(setup_sim_camera):
@@ -263,7 +266,7 @@ def test_camera_init_intrinsic_matrix(setup_sim_camera):
     camera_1 = Camera(cfg=camera_cfg)
     # get intrinsic matrix
     sim.reset()
-    intrinsic_matrix = camera_1.data.intrinsic_matrices[0].cpu().flatten().tolist()
+    intrinsic_matrix = wp.to_torch(camera_1.data.intrinsic_matrices)[0].cpu().flatten().tolist()
     teardown(sim)
     # reinit the first camera
     sim, camera_cfg, dt = setup()
@@ -295,15 +298,15 @@ def test_camera_init_intrinsic_matrix(setup_sim_camera):
 
     # check image data
     torch.testing.assert_close(
-        camera_1.data.output["distance_to_image_plane"],
-        camera_2.data.output["distance_to_image_plane"],
+        wp.to_torch(camera_1.data.output["distance_to_image_plane"]),
+        wp.to_torch(camera_2.data.output["distance_to_image_plane"]),
         rtol=5e-3,
         atol=1e-4,
     )
     # check that both intrinsic matrices are the same
     torch.testing.assert_close(
-        camera_1.data.intrinsic_matrices[0],
-        camera_2.data.intrinsic_matrices[0],
+        wp.to_torch(camera_1.data.intrinsic_matrices)[0],
+        wp.to_torch(camera_2.data.intrinsic_matrices)[0],
         rtol=5e-3,
         atol=1e-4,
     )
@@ -326,8 +329,8 @@ def test_camera_set_world_poses(setup_sim_camera):
     camera.set_world_poses(position.clone(), orientation.clone(), convention="world")
 
     # check if transform correctly set in output
-    torch.testing.assert_close(camera.data.pos_w, position)
-    torch.testing.assert_close(camera.data.quat_w_world, orientation)
+    torch.testing.assert_close(wp.to_torch(camera.data.pos_w), position)
+    torch.testing.assert_close(wp.to_torch(camera.data.quat_w_world), orientation)
 
 
 def test_camera_set_world_poses_from_view(setup_sim_camera):
@@ -348,8 +351,8 @@ def test_camera_set_world_poses_from_view(setup_sim_camera):
     camera.set_world_poses_from_view(eyes.clone(), targets.clone())
 
     # check if transform correctly set in output
-    torch.testing.assert_close(camera.data.pos_w, eyes)
-    torch.testing.assert_close(camera.data.quat_w_ros, quat_ros_gt)
+    torch.testing.assert_close(wp.to_torch(camera.data.pos_w), eyes)
+    torch.testing.assert_close(wp.to_torch(camera.data.quat_w_ros), quat_ros_gt)
 
 
 def test_intrinsic_matrix(setup_sim_camera):
@@ -374,10 +377,11 @@ def test_intrinsic_matrix(setup_sim_camera):
         # update camera
         camera.update(dt)
         # Check that matrix is correct
-        torch.testing.assert_close(rs_intrinsic_matrix[0, 0, 0], camera.data.intrinsic_matrices[0, 0, 0])
-        torch.testing.assert_close(rs_intrinsic_matrix[0, 1, 1], camera.data.intrinsic_matrices[0, 1, 1])
-        torch.testing.assert_close(rs_intrinsic_matrix[0, 0, 2], camera.data.intrinsic_matrices[0, 0, 2])
-        torch.testing.assert_close(rs_intrinsic_matrix[0, 1, 2], camera.data.intrinsic_matrices[0, 1, 2])
+        intrinsics_torch = wp.to_torch(camera.data.intrinsic_matrices)
+        torch.testing.assert_close(rs_intrinsic_matrix[0, 0, 0], intrinsics_torch[0, 0, 0])
+        torch.testing.assert_close(rs_intrinsic_matrix[0, 1, 1], intrinsics_torch[0, 1, 1])
+        torch.testing.assert_close(rs_intrinsic_matrix[0, 0, 2], intrinsics_torch[0, 0, 2])
+        torch.testing.assert_close(rs_intrinsic_matrix[0, 1, 2], intrinsics_torch[0, 1, 2])
 
 
 def test_depth_clipping(setup_sim_camera):
@@ -536,17 +540,17 @@ def test_camera_resolution_all_colorize(setup_sim_camera):
 
     # access image data and compare dtype
     output = camera.data.output
-    assert output["rgb"].dtype == torch.uint8
-    assert output["rgba"].dtype == torch.uint8
-    assert output["albedo"].dtype == torch.uint8
-    assert output["depth"].dtype == torch.float
-    assert output["distance_to_camera"].dtype == torch.float
-    assert output["distance_to_image_plane"].dtype == torch.float
-    assert output["normals"].dtype == torch.float
-    assert output["motion_vectors"].dtype == torch.float
-    assert output["semantic_segmentation"].dtype == torch.uint8
-    assert output["instance_segmentation_fast"].dtype == torch.uint8
-    assert output["instance_id_segmentation_fast"].dtype == torch.uint8
+    assert output["rgb"].dtype == wp.uint8
+    assert output["rgba"].dtype == wp.uint8
+    assert output["albedo"].dtype == wp.uint8
+    assert output["depth"].dtype == wp.float32
+    assert output["distance_to_camera"].dtype == wp.float32
+    assert output["distance_to_image_plane"].dtype == wp.float32
+    assert output["normals"].dtype == wp.float32
+    assert output["motion_vectors"].dtype == wp.float32
+    assert output["semantic_segmentation"].dtype == wp.uint8
+    assert output["instance_segmentation_fast"].dtype == wp.uint8
+    assert output["instance_id_segmentation_fast"].dtype == wp.uint8
 
 
 def test_camera_resolution_no_colorize(setup_sim_camera):
@@ -597,17 +601,17 @@ def test_camera_resolution_no_colorize(setup_sim_camera):
 
     # access image data and compare dtype
     output = camera.data.output
-    assert output["rgb"].dtype == torch.uint8
-    assert output["rgba"].dtype == torch.uint8
-    assert output["albedo"].dtype == torch.uint8
-    assert output["depth"].dtype == torch.float
-    assert output["distance_to_camera"].dtype == torch.float
-    assert output["distance_to_image_plane"].dtype == torch.float
-    assert output["normals"].dtype == torch.float
-    assert output["motion_vectors"].dtype == torch.float
-    assert output["semantic_segmentation"].dtype == torch.int32
-    assert output["instance_segmentation_fast"].dtype == torch.int32
-    assert output["instance_id_segmentation_fast"].dtype == torch.int32
+    assert output["rgb"].dtype == wp.uint8
+    assert output["rgba"].dtype == wp.uint8
+    assert output["albedo"].dtype == wp.uint8
+    assert output["depth"].dtype == wp.float32
+    assert output["distance_to_camera"].dtype == wp.float32
+    assert output["distance_to_image_plane"].dtype == wp.float32
+    assert output["normals"].dtype == wp.float32
+    assert output["motion_vectors"].dtype == wp.float32
+    assert output["semantic_segmentation"].dtype == wp.int32
+    assert output["instance_segmentation_fast"].dtype == wp.int32
+    assert output["instance_id_segmentation_fast"].dtype == wp.int32
 
 
 def test_camera_large_resolution_all_colorize(setup_sim_camera):
@@ -661,17 +665,17 @@ def test_camera_large_resolution_all_colorize(setup_sim_camera):
 
     # access image data and compare dtype
     output = camera.data.output
-    assert output["rgb"].dtype == torch.uint8
-    assert output["rgba"].dtype == torch.uint8
-    assert output["albedo"].dtype == torch.uint8
-    assert output["depth"].dtype == torch.float
-    assert output["distance_to_camera"].dtype == torch.float
-    assert output["distance_to_image_plane"].dtype == torch.float
-    assert output["normals"].dtype == torch.float
-    assert output["motion_vectors"].dtype == torch.float
-    assert output["semantic_segmentation"].dtype == torch.uint8
-    assert output["instance_segmentation_fast"].dtype == torch.uint8
-    assert output["instance_id_segmentation_fast"].dtype == torch.uint8
+    assert output["rgb"].dtype == wp.uint8
+    assert output["rgba"].dtype == wp.uint8
+    assert output["albedo"].dtype == wp.uint8
+    assert output["depth"].dtype == wp.float32
+    assert output["distance_to_camera"].dtype == wp.float32
+    assert output["distance_to_image_plane"].dtype == wp.float32
+    assert output["normals"].dtype == wp.float32
+    assert output["motion_vectors"].dtype == wp.float32
+    assert output["semantic_segmentation"].dtype == wp.uint8
+    assert output["instance_segmentation_fast"].dtype == wp.uint8
+    assert output["instance_id_segmentation_fast"].dtype == wp.uint8
 
 
 def test_camera_resolution_rgb_only(setup_sim_camera):
@@ -693,7 +697,7 @@ def test_camera_resolution_rgb_only(setup_sim_camera):
     output = camera.data.output
     assert output["rgb"].shape == hw_3c_shape
     # access image data and compare dtype
-    assert output["rgb"].dtype == torch.uint8
+    assert output["rgb"].dtype == wp.uint8
 
 
 def test_camera_resolution_rgba_only(setup_sim_camera):
@@ -715,7 +719,7 @@ def test_camera_resolution_rgba_only(setup_sim_camera):
     output = camera.data.output
     assert output["rgba"].shape == hw_4c_shape
     # access image data and compare dtype
-    assert output["rgba"].dtype == torch.uint8
+    assert output["rgba"].dtype == wp.uint8
 
 
 def test_camera_resolution_albedo_only(setup_sim_camera):
@@ -737,7 +741,7 @@ def test_camera_resolution_albedo_only(setup_sim_camera):
     output = camera.data.output
     assert output["albedo"].shape == hw_4c_shape
     # access image data and compare dtype
-    assert output["albedo"].dtype == torch.uint8
+    assert output["albedo"].dtype == wp.uint8
 
 
 @pytest.mark.parametrize(
@@ -763,7 +767,7 @@ def test_camera_resolution_simple_shading_only(setup_sim_camera, data_type):
     output = camera.data.output
     assert output[data_type].shape == hw_3c_shape
     # access image data and compare dtype
-    assert output[data_type].dtype == torch.uint8
+    assert output[data_type].dtype == wp.uint8
 
 
 def test_camera_resolution_depth_only(setup_sim_camera):
@@ -785,7 +789,7 @@ def test_camera_resolution_depth_only(setup_sim_camera):
     output = camera.data.output
     assert output["depth"].shape == hw_1c_shape
     # access image data and compare dtype
-    assert output["depth"].dtype == torch.float
+    assert output["depth"].dtype == wp.float32
 
 
 def test_sensor_print(setup_sim_camera):
@@ -857,7 +861,8 @@ def test_camera_multi_regex_init(setup_camera_device, device):
     for _ in range(10):
         sim.step()
         camera.update(dt)
-        for im_type, im_data in camera.data.output.items():
+        for im_type, im_data_wp in camera.data.output.items():
+            im_data = wp.to_torch(im_data_wp)
             if im_type == "rgb":
                 assert im_data.shape == (num_cameras, camera_cfg.height, camera_cfg.width, 3)
                 for i in range(4):
@@ -904,7 +909,8 @@ def test_camera_all_annotators(setup_camera_device, device):
     for _ in range(10):
         sim.step()
         camera.update(dt)
-        for data_type, im_data in camera.data.output.items():
+        for data_type, im_data_wp in camera.data.output.items():
+            im_data = wp.to_torch(im_data_wp)
             if data_type in ["rgb", "normals"]:
                 assert im_data.shape == (num_cameras, camera_cfg.height, camera_cfg.width, 3)
             elif data_type in [
@@ -928,17 +934,17 @@ def test_camera_all_annotators(setup_camera_device, device):
 
     output = camera.data.output
     info = camera.data.info
-    assert output["rgb"].dtype == torch.uint8
-    assert output["rgba"].dtype == torch.uint8
-    assert output["albedo"].dtype == torch.uint8
-    assert output["depth"].dtype == torch.float
-    assert output["distance_to_camera"].dtype == torch.float
-    assert output["distance_to_image_plane"].dtype == torch.float
-    assert output["normals"].dtype == torch.float
-    assert output["motion_vectors"].dtype == torch.float
-    assert output["semantic_segmentation"].dtype == torch.uint8
-    assert output["instance_segmentation_fast"].dtype == torch.uint8
-    assert output["instance_id_segmentation_fast"].dtype == torch.uint8
+    assert output["rgb"].dtype == wp.uint8
+    assert output["rgba"].dtype == wp.uint8
+    assert output["albedo"].dtype == wp.uint8
+    assert output["depth"].dtype == wp.float32
+    assert output["distance_to_camera"].dtype == wp.float32
+    assert output["distance_to_image_plane"].dtype == wp.float32
+    assert output["normals"].dtype == wp.float32
+    assert output["motion_vectors"].dtype == wp.float32
+    assert output["semantic_segmentation"].dtype == wp.uint8
+    assert output["instance_segmentation_fast"].dtype == wp.uint8
+    assert output["instance_id_segmentation_fast"].dtype == wp.uint8
     assert isinstance(info["semantic_segmentation"], dict)
     assert isinstance(info["instance_segmentation_fast"], dict)
     assert isinstance(info["instance_id_segmentation_fast"], dict)
@@ -970,7 +976,7 @@ def test_camera_segmentation_non_colorize(setup_camera_device, device):
 
     for seg_type in camera_cfg.data_types:
         assert camera.data.output[seg_type].shape == (num_cameras, camera_cfg.height, camera_cfg.width, 1)
-        assert camera.data.output[seg_type].dtype == torch.int32
+        assert camera.data.output[seg_type].dtype == wp.int32
         assert isinstance(camera.data.info[seg_type], dict)
 
     del camera
@@ -994,14 +1000,14 @@ def test_camera_normals_unit_length(setup_camera_device, device):
     for _ in range(10):
         sim.step()
         camera.update(dt)
-        im_data = camera.data.output["normals"]
+        im_data = wp.to_torch(camera.data.output["normals"])
         assert im_data.shape == (num_cameras, camera_cfg.height, camera_cfg.width, 3)
         for i in range(4):
             assert im_data[i].mean() > 0.0
         norms = torch.linalg.norm(im_data, dim=-1)
         assert torch.allclose(norms, torch.ones_like(norms), atol=1e-9)
 
-    assert camera.data.output["normals"].dtype == torch.float
+    assert camera.data.output["normals"].dtype == wp.float32
     del camera
 
 
@@ -1059,7 +1065,7 @@ def test_camera_frame_offset(setup_camera_device, device):
         sim.step()
         camera.update(dt)
 
-    image_before = camera.data.output["rgb"].clone() / 255.0
+    image_before = wp.to_torch(camera.data.output["rgb"]).clone() / 255.0
 
     for i in range(10):
         prim = stage.GetPrimAtPath(f"/World/Objects/Obj_{i:02d}")
@@ -1069,7 +1075,7 @@ def test_camera_frame_offset(setup_camera_device, device):
     sim.step()
     camera.update(dt)
 
-    image_after = camera.data.output["rgb"].clone() / 255.0
+    image_after = wp.to_torch(camera.data.output["rgb"]).clone() / 255.0
 
     assert torch.abs(image_after - image_before).mean() > 0.01
 
@@ -1096,7 +1102,7 @@ def __init__(self, cfg=None):
             self.cfg = cfg
 
         def supported_output_types(self):
-            return {RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8)}
+            return {RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8)}
 
         def prepare_stage(self, stage, num_envs):
             pass
diff --git a/source/isaaclab/test/sensors/test_first_frame_textured_rendering.py b/source/isaaclab/test/sensors/test_first_frame_textured_rendering.py
index 1c2ef0ec7b00..5cf7f0e54676 100644
--- a/source/isaaclab/test/sensors/test_first_frame_textured_rendering.py
+++ b/source/isaaclab/test/sensors/test_first_frame_textured_rendering.py
@@ -14,6 +14,7 @@
 
 import pytest
 import torch
+import warp as wp
 
 import omni.replicator.core as rep
 
@@ -124,13 +125,13 @@ def test_first_frame_is_textured_camera(setup_sim, device):
     # The first sim step + camera update should produce textured output
     sim.step()
     camera.update(dt)
-    first_frame = camera.data.output["rgb"][0].clone().to(dtype=torch.float32)
+    first_frame = wp.to_torch(camera.data.output["rgb"])[0].clone().to(dtype=torch.float32)
 
     # Let the renderer stabilise, then capture the reference frame
     for _ in range(STABILISATION_STEPS):
         sim.step()
     camera.update(dt)
-    stable_frame = camera.data.output["rgb"][0].clone().to(dtype=torch.float32)
+    stable_frame = wp.to_torch(camera.data.output["rgb"])[0].clone().to(dtype=torch.float32)
 
     del camera
 
diff --git a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
index 8657c938c691..4c70d0a09488 100644
--- a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
@@ -20,6 +20,7 @@
 import numpy as np
 import pytest
 import torch
+import warp as wp
 
 import omni.replicator.core as rep
 from pxr import Gf
@@ -115,7 +116,7 @@ def test_camera_init_offset(setup_simulation, convention, quat):
     camera.update(dt)
 
     # check that transform is set correctly
-    np.testing.assert_allclose(camera.data.pos_w[0].cpu().numpy(), cam_cfg_offset.offset.pos)
+    np.testing.assert_allclose(wp.to_torch(camera.data.pos_w)[0].cpu().numpy(), cam_cfg_offset.offset.pos)
 
     del camera
 
@@ -272,8 +273,8 @@ def test_camera_set_world_poses(setup_simulation):
     camera.set_world_poses(position.clone(), orientation.clone(), convention="world")
 
     # check if transform correctly set in output
-    torch.testing.assert_close(camera.data.pos_w, position)
-    torch.testing.assert_close(camera.data.quat_w_world, orientation)
+    torch.testing.assert_close(wp.to_torch(camera.data.pos_w), position)
+    torch.testing.assert_close(wp.to_torch(camera.data.quat_w_world), orientation)
 
     del camera
 
@@ -295,8 +296,8 @@ def test_camera_set_world_poses_from_view(setup_simulation):
     camera.set_world_poses_from_view(eyes.clone(), targets.clone())
 
     # check if transform correctly set in output
-    torch.testing.assert_close(camera.data.pos_w, eyes)
-    torch.testing.assert_close(camera.data.quat_w_ros, quat_ros_gt)
+    torch.testing.assert_close(wp.to_torch(camera.data.pos_w), eyes)
+    torch.testing.assert_close(wp.to_torch(camera.data.quat_w_ros), quat_ros_gt)
 
     del camera
 
@@ -325,7 +326,7 @@ def test_intrinsic_matrix(setup_simulation, height, width):
         # update camera
         camera.update(dt)
         # Check that matrix is correct
-        torch.testing.assert_close(rs_intrinsic_matrix, camera.data.intrinsic_matrices)
+        torch.testing.assert_close(rs_intrinsic_matrix, wp.to_torch(camera.data.intrinsic_matrices))
 
     del camera
 
@@ -792,16 +793,17 @@ def test_image_mesh_ids_identifies_hit_mesh(setup_simulation):
     sim.reset()
     camera.update(dt)
 
-    mesh_ids = camera.data.image_mesh_ids  # shape (N, H, W, 1), dtype torch.int16
-    assert mesh_ids is not None, "image_mesh_ids should not be None when update_mesh_ids=True"
-    assert mesh_ids.shape[-1] == 1
-    assert mesh_ids.dtype == torch.int16
+    mesh_ids_wp = camera.data.image_mesh_ids  # shape (N, H, W, 1), wp.int16
+    assert mesh_ids_wp is not None, "image_mesh_ids should not be None when update_mesh_ids=True"
+    assert mesh_ids_wp.shape[-1] == 1
+    assert mesh_ids_wp.dtype == wp.int16
+    mesh_ids = wp.to_torch(mesh_ids_wp)
 
     # Identify actual hits via distance < inf. This relies on depth_clipping_behavior="none"
     # (the default), which leaves missed rays at the Warp-kernel fill value of inf.
     # Under "max" clipping, missed rays would be clamped to a finite max_distance, making
     # the inf comparison incorrect.
-    hit_mask = camera.data.output["distance_to_camera"][0, :, :, 0] < float("inf")
+    hit_mask = wp.to_torch(camera.data.output["distance_to_camera"])[0, :, :, 0] < float("inf")
     assert hit_mask.any(), "Expected at least some rays to hit the ground plane"
 
     # All hits against the single registered mesh must carry mesh_id=0 (first mesh index).
diff --git a/source/isaaclab/test/sensors/test_multi_tiled_camera.py b/source/isaaclab/test/sensors/test_multi_tiled_camera.py
index 113524cfe934..3807df66c669 100644
--- a/source/isaaclab/test/sensors/test_multi_tiled_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_tiled_camera.py
@@ -21,6 +21,7 @@
 import numpy as np
 import pytest
 import torch
+import warp as wp
 from flaky import flaky
 
 import omni.replicator.core as rep
@@ -119,16 +120,16 @@ def test_multi_tiled_camera_init(setup_camera):
         for i, camera in enumerate(tiled_cameras):
             # update camera
             camera.update(dt)
-            # check image data
-            for data_type, im_data in camera.data.output.items():
+            # check image data (camera outputs are wp.array; lift to torch for tensor ops)
+            for data_type, im_data_wp in camera.data.output.items():
                 if data_type == "rgb":
-                    im_data = im_data.clone() / 255.0
+                    im_data = wp.to_torch(im_data_wp).clone() / 255.0
                     assert im_data.shape == (num_cameras_per_tiled_camera, camera.cfg.height, camera.cfg.width, 3)
                     for j in range(num_cameras_per_tiled_camera):
                         assert (im_data[j]).mean().item() > 0.0
                     rgbs.append(im_data)
                 elif data_type == "distance_to_camera":
-                    im_data = im_data.clone()
+                    im_data = wp.to_torch(im_data_wp).clone()
                     im_data[torch.isinf(im_data)] = 0
                     assert im_data.shape == (num_cameras_per_tiled_camera, camera.cfg.height, camera.cfg.width, 1)
                     for j in range(num_cameras_per_tiled_camera):
@@ -207,8 +208,9 @@ def test_all_annotators_multi_tiled_camera(setup_camera):
         for i, camera in enumerate(tiled_cameras):
             # update camera
             camera.update(dt)
-            # check image data
-            for data_type, im_data in camera.data.output.items():
+            # check image data (camera outputs are wp.array; lift to torch for tensor ops)
+            for data_type, im_data_wp in camera.data.output.items():
+                im_data = wp.to_torch(im_data_wp)
                 if data_type in ["rgb", "normals"]:
                     assert im_data.shape == (num_cameras_per_tiled_camera, camera.cfg.height, camera.cfg.width, 3)
                 elif data_type in [
@@ -234,17 +236,17 @@ def test_all_annotators_multi_tiled_camera(setup_camera):
         # access image data and compare dtype
         output = camera.data.output
         info = camera.data.info
-        assert output["rgb"].dtype == torch.uint8
-        assert output["rgba"].dtype == torch.uint8
-        assert output["albedo"].dtype == torch.uint8
-        assert output["depth"].dtype == torch.float
-        assert output["distance_to_camera"].dtype == torch.float
-        assert output["distance_to_image_plane"].dtype == torch.float
-        assert output["normals"].dtype == torch.float
-        assert output["motion_vectors"].dtype == torch.float
-        assert output["semantic_segmentation"].dtype == torch.uint8
-        assert output["instance_segmentation_fast"].dtype == torch.uint8
-        assert output["instance_id_segmentation_fast"].dtype == torch.uint8
+        assert output["rgb"].dtype == wp.uint8
+        assert output["rgba"].dtype == wp.uint8
+        assert output["albedo"].dtype == wp.uint8
+        assert output["depth"].dtype == wp.float32
+        assert output["distance_to_camera"].dtype == wp.float32
+        assert output["distance_to_image_plane"].dtype == wp.float32
+        assert output["normals"].dtype == wp.float32
+        assert output["motion_vectors"].dtype == wp.float32
+        assert output["semantic_segmentation"].dtype == wp.uint8
+        assert output["instance_segmentation_fast"].dtype == wp.uint8
+        assert output["instance_id_segmentation_fast"].dtype == wp.uint8
         assert isinstance(info["semantic_segmentation"], dict)
         assert isinstance(info["instance_segmentation_fast"], dict)
         assert isinstance(info["instance_id_segmentation_fast"], dict)
@@ -303,15 +305,15 @@ def test_different_resolution_multi_tiled_camera(setup_camera):
         for i, camera in enumerate(tiled_cameras):
             # update camera
             camera.update(dt)
-            # check image data
-            for data_type, im_data in camera.data.output.items():
+            # check image data (camera outputs are wp.array; lift to torch for tensor ops)
+            for data_type, im_data_wp in camera.data.output.items():
                 if data_type == "rgb":
-                    im_data = im_data.clone() / 255.0
+                    im_data = wp.to_torch(im_data_wp).clone() / 255.0
                     assert im_data.shape == (num_cameras_per_tiled_camera, camera.cfg.height, camera.cfg.width, 3)
                     for j in range(num_cameras_per_tiled_camera):
                         assert (im_data[j]).mean().item() > 0.0
                 elif data_type == "distance_to_camera":
-                    im_data = im_data.clone()
+                    im_data = wp.to_torch(im_data_wp).clone()
                     assert im_data.shape == (num_cameras_per_tiled_camera, camera.cfg.height, camera.cfg.width, 1)
                     for j in range(num_cameras_per_tiled_camera):
                         assert im_data[j].mean().item() > 0.0
@@ -357,8 +359,8 @@ def test_frame_offset_multi_tiled_camera(setup_camera):
         for camera in tiled_cameras:
             camera.update(dt)
 
-    # collect image data
-    image_befores = [camera.data.output["rgb"].clone() / 255.0 for camera in tiled_cameras]
+    # collect image data (camera outputs are wp.array; lift to torch for tensor ops)
+    image_befores = [wp.to_torch(camera.data.output["rgb"]).clone() / 255.0 for camera in tiled_cameras]
 
     # update scene
     for i in range(10):
@@ -374,7 +376,7 @@ def test_frame_offset_multi_tiled_camera(setup_camera):
         camera.update(dt)
 
     # make sure the image is different
-    image_afters = [camera.data.output["rgb"].clone() / 255.0 for camera in tiled_cameras]
+    image_afters = [wp.to_torch(camera.data.output["rgb"]).clone() / 255.0 for camera in tiled_cameras]
 
     # check difference is above threshold
     for i in range(num_tiled_cameras):
@@ -422,16 +424,16 @@ def test_frame_different_poses_multi_tiled_camera(setup_camera):
         for i, camera in enumerate(tiled_cameras):
             # update camera
             camera.update(dt)
-            # check image data
-            for data_type, im_data in camera.data.output.items():
+            # check image data (camera outputs are wp.array; lift to torch for tensor ops)
+            for data_type, im_data_wp in camera.data.output.items():
                 if data_type == "rgb":
-                    im_data = im_data.clone() / 255.0
+                    im_data = wp.to_torch(im_data_wp).clone() / 255.0
                     assert im_data.shape == (num_cameras_per_tiled_camera, camera.cfg.height, camera.cfg.width, 3)
                     for j in range(num_cameras_per_tiled_camera):
                         assert (im_data[j]).mean().item() > 0.0
                     rgbs.append(im_data)
                 elif data_type == "distance_to_camera":
-                    im_data = im_data.clone()
+                    im_data = wp.to_torch(im_data_wp).clone()
                     # replace inf with 0
                     im_data[torch.isinf(im_data)] = 0
                     assert im_data.shape == (num_cameras_per_tiled_camera, camera.cfg.height, camera.cfg.width, 1)
diff --git a/source/isaaclab/test/sensors/test_tiled_camera.py b/source/isaaclab/test/sensors/test_tiled_camera.py
index 4ce62cd5336f..2267e83af3f0 100644
--- a/source/isaaclab/test/sensors/test_tiled_camera.py
+++ b/source/isaaclab/test/sensors/test_tiled_camera.py
@@ -29,6 +29,7 @@
 import numpy as np
 import pytest
 import torch
+import warp as wp
 
 import omni.replicator.core as rep
 from pxr import Gf, UsdGeom
@@ -142,8 +143,9 @@ def test_tiled_camera_basic_functionality(setup_camera, device):
         sim.step()
         # update camera
         camera.update(dt)
-        # check image data
-        for im_type, im_data in camera.data.output.items():
+        # check image data (camera outputs are wp.array; lift to torch for tensor ops)
+        for im_type, im_data_wp in camera.data.output.items():
+            im_data = wp.to_torch(im_data_wp)
             if im_type == "rgb":
                 assert im_data.shape == (1, camera_cfg.height, camera_cfg.width, 3)
                 assert (im_data / 255.0).mean() > 0.0
diff --git a/source/isaaclab/test/test_scripts_torcharray_patterns.py b/source/isaaclab/test/test_scripts_torcharray_patterns.py
index 227bbda7be3c..48966126504b 100644
--- a/source/isaaclab/test/test_scripts_torcharray_patterns.py
+++ b/source/isaaclab/test/test_scripts_torcharray_patterns.py
@@ -6,9 +6,9 @@
 """Static scanner ensuring scripts/source do not regress the ProxyArray migration.
 
 Every public ``.data.<field>`` on an asset or sensor now returns a
-:class:`~isaaclab.utils.warp.ProxyArray` (or, for CameraData, a raw
-``torch.Tensor``). Legacy conversion or tensor-method callsites on these
-properties should migrate to explicit ``.torch`` or ``.warp`` access. These
+:class:`~isaaclab.utils.warp.ProxyArray` or ``wp.array``. Legacy conversion or
+tensor-method callsites on these properties should migrate to explicit
+``.torch`` / ``.warp`` access or ``wp.to_torch`` for raw Warp arrays. These
 tests regex-scan user-facing scripts/source files and flag regressions before
 they reach users running tutorials or demos.
 """
@@ -49,7 +49,7 @@
 #   <chain>.data.<field>[...].clone()
 #   <chain>.data.<field>.assign(...)
 # These are tensor/wp.array instance methods that ProxyArray intentionally does
-# not forward. ``data.output[...]`` is camera data and remains torch-native.
+# not forward.
 _PROXYARRAY_DIRECT_METHOD_DOT_DATA = re.compile(
     r"\.data\."
     r"(?!_)"  # ignore private backing buffers such as data._sim_bind_...
@@ -85,10 +85,9 @@ def _source_and_scripts_files() -> list[Path]:
 def test_no_wp_to_torch_on_torcharray_data(path: Path) -> None:
     """No ``wp.to_torch(<x>.data.<field>)`` / ``wp.to_torch(<x>_data.<field>)`` in scripts/.
 
-    Post-migration, ``<asset>.data.<field>`` returns a ``ProxyArray``
-    (or ``torch.Tensor`` for CameraData). The temporary ``wp.to_torch``
-    shim is deprecated, so use the ``.torch`` accessor instead (or omit
-    the wrap entirely for torch-native fields).
+    Post-migration, ``<asset>.data.<field>`` returns a ``ProxyArray``.
+    The temporary ``wp.to_torch`` shim is deprecated, so use the ``.torch``
+    accessor instead.
     """
     rel = path.relative_to(_repo_root()).as_posix()
     if any(rel.endswith(suffix) for suffix in _EXCLUDE):
diff --git a/source/isaaclab/test/utils/test_math.py b/source/isaaclab/test/utils/test_math.py
index e0d85b14eb25..e6c262aa6c51 100644
--- a/source/isaaclab/test/utils/test_math.py
+++ b/source/isaaclab/test/utils/test_math.py
@@ -24,6 +24,7 @@
 import scipy.spatial.transform as scipy_tf
 import torch
 import torch.utils.benchmark as benchmark
+import warp as wp
 
 import isaaclab.utils.math as math_utils
 
@@ -333,31 +334,52 @@ def test_convention_converter(device):
 
     # from ROS
     torch.testing.assert_close(
-        math_utils.convert_camera_frame_orientation_convention(quat_ros, "ros", "opengl"), quat_opengl
+        wp.to_torch(math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_ros), "ros", "opengl")),
+        quat_opengl,
     )
     torch.testing.assert_close(
-        math_utils.convert_camera_frame_orientation_convention(quat_ros, "ros", "world"), quat_world
+        wp.to_torch(math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_ros), "ros", "world")),
+        quat_world,
+    )
+    torch.testing.assert_close(
+        wp.to_torch(math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_ros), "ros", "ros")),
+        quat_ros,
     )
-    torch.testing.assert_close(math_utils.convert_camera_frame_orientation_convention(quat_ros, "ros", "ros"), quat_ros)
     # from OpenGL
     torch.testing.assert_close(
-        math_utils.convert_camera_frame_orientation_convention(quat_opengl, "opengl", "ros"), quat_ros
+        wp.to_torch(
+            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_opengl), "opengl", "ros")
+        ),
+        quat_ros,
     )
     torch.testing.assert_close(
-        math_utils.convert_camera_frame_orientation_convention(quat_opengl, "opengl", "world"), quat_world
+        wp.to_torch(
+            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_opengl), "opengl", "world")
+        ),
+        quat_world,
     )
     torch.testing.assert_close(
-        math_utils.convert_camera_frame_orientation_convention(quat_opengl, "opengl", "opengl"), quat_opengl
+        wp.to_torch(
+            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_opengl), "opengl", "opengl")
+        ),
+        quat_opengl,
     )
     # from World
     torch.testing.assert_close(
-        math_utils.convert_camera_frame_orientation_convention(quat_world, "world", "ros"), quat_ros
+        wp.to_torch(math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_world), "world", "ros")),
+        quat_ros,
     )
     torch.testing.assert_close(
-        math_utils.convert_camera_frame_orientation_convention(quat_world, "world", "opengl"), quat_opengl
+        wp.to_torch(
+            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_world), "world", "opengl")
+        ),
+        quat_opengl,
     )
     torch.testing.assert_close(
-        math_utils.convert_camera_frame_orientation_convention(quat_world, "world", "world"), quat_world
+        wp.to_torch(
+            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_world), "world", "world")
+        ),
+        quat_world,
     )
 
 
diff --git a/source/isaaclab_contrib/config/extension.toml b/source/isaaclab_contrib/config/extension.toml
index 7fd2b5d139e5..0fba6e220442 100644
--- a/source/isaaclab_contrib/config/extension.toml
+++ b/source/isaaclab_contrib/config/extension.toml
@@ -1,6 +1,6 @@
 [package]
 # Semantic Versioning is used: https://semver.org/
-version = "0.3.0"
+version = "0.3.1"
 
 # Description
 title =  "Isaac Lab External Contributions"
diff --git a/source/isaaclab_contrib/docs/CHANGELOG.rst b/source/isaaclab_contrib/docs/CHANGELOG.rst
index 721c705a5981..a3b2b550a22e 100644
--- a/source/isaaclab_contrib/docs/CHANGELOG.rst
+++ b/source/isaaclab_contrib/docs/CHANGELOG.rst
@@ -1,6 +1,16 @@
 Changelog
 ---------
 
+0.3.1 (2026-04-30)
+~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Updated :class:`~isaaclab_contrib.sensors.tacsl_sensor.VisuotactileSensor`
+  to consume ``wp.array`` camera depth outputs via :func:`warp.to_torch`.
+
+
 0.3.0 (2026-02-13)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py b/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
index 720a85223aa6..dab39d44c2db 100644
--- a/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
+++ b/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
@@ -583,7 +583,7 @@ def _update_camera_tactile(self, env_ids: Sequence[int] | slice):
             depth_key = "depth"
 
         if depth_key:
-            self._data.tactile_depth_image[env_ids] = camera_data.output[depth_key][env_ids].clone()
+            self._data.tactile_depth_image[env_ids] = wp.to_torch(camera_data.output[depth_key])[env_ids].clone()
             diff = self._nominal_tactile[depth_key][env_ids] - self._data.tactile_depth_image[env_ids]
             self._data.tactile_rgb_image[env_ids] = self._tactile_rgb_render.render(diff.squeeze(-1))
 
diff --git a/source/isaaclab_newton/config/extension.toml b/source/isaaclab_newton/config/extension.toml
index 03b74794b012..2e598bf86303 100644
--- a/source/isaaclab_newton/config/extension.toml
+++ b/source/isaaclab_newton/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.5.25"
+version = "0.5.27"
 
 # Description
 title = "Newton simulation interfaces for IsaacLab core package"
diff --git a/source/isaaclab_newton/docs/CHANGELOG.rst b/source/isaaclab_newton/docs/CHANGELOG.rst
index 710b3920ca77..4999064ae458 100644
--- a/source/isaaclab_newton/docs/CHANGELOG.rst
+++ b/source/isaaclab_newton/docs/CHANGELOG.rst
@@ -1,6 +1,33 @@
 Changelog
 ---------
 
+0.5.27 (2026-04-30)
+~~~~~~~~~~~~~~~~~~~
+
+Fixed
+^^^^^
+
+* Fixed :class:`~isaaclab_newton.renderers.NewtonWarpRenderer` to populate
+  the ``rgb`` output buffer when both ``rgb`` and ``rgba`` are requested.
+  Without the fix, ``CameraData.output["rgb"]`` was left zero-filled because
+  the renderer skipped ``rgb`` in :meth:`read_output` after ``rgb`` and
+  ``rgba`` became independent ``wp.array`` allocations.
+
+
+0.5.26 (2026-04-30)
+~~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Changed :class:`~isaaclab_newton.renderers.NewtonWarpRenderer` to consume
+  ``wp.array`` camera output buffers and camera state arrays from
+  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
+  ``camera.data.output`` entries if Torch tensor operations are required.
+* Updated Newton PVA debug visualization to convert camera-convention
+  orientation outputs with :func:`warp.to_torch`.
+
+
 0.5.25 (2026-04-28)
 ~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
index 3fb198faf53d..b0570d7c0fc0 100644
--- a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
+++ b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
@@ -58,18 +58,18 @@ def __init__(self, newton_sensor: newton.sensors.SensorTiledCamera, sensor: Sens
         self.width = getattr(sensor.cfg, "width", 100)
         self.height = getattr(sensor.cfg, "height", 100)
 
-    def set_outputs(self, output_data: dict[str, torch.Tensor]):
-        for output_name, tensor_data in output_data.items():
+    def set_outputs(self, output_data: dict[str, wp.array]):
+        for output_name, array_data in output_data.items():
             if output_name == RenderBufferKind.RGBA:
-                self.outputs.color_image = self._from_torch(tensor_data, dtype=wp.uint32)
+                self.outputs.color_image = self._from_warp(array_data, dtype=wp.uint32)
             elif output_name == RenderBufferKind.ALBEDO:
-                self.outputs.albedo_image = self._from_torch(tensor_data, dtype=wp.uint32)
+                self.outputs.albedo_image = self._from_warp(array_data, dtype=wp.uint32)
             elif output_name == RenderBufferKind.DEPTH:
-                self.outputs.depth_image = self._from_torch(tensor_data, dtype=wp.float32)
+                self.outputs.depth_image = self._from_warp(array_data, dtype=wp.float32)
             elif output_name == RenderBufferKind.NORMALS:
-                self.outputs.normals_image = self._from_torch(tensor_data, dtype=wp.vec3f)
+                self.outputs.normals_image = self._from_warp(array_data, dtype=wp.vec3f)
             elif output_name == RenderBufferKind.INSTANCE_SEGMENTATION_FAST:
-                self.outputs.instance_segmentation_image = self._from_torch(tensor_data, dtype=wp.uint32)
+                self.outputs.instance_segmentation_image = self._from_warp(array_data, dtype=wp.uint32)
             elif output_name == RenderBufferKind.RGB:
                 pass
             else:
@@ -88,10 +88,11 @@ def get_output(self, output_name: str) -> wp.array:
             return self.outputs.instance_segmentation_image
         return None
 
-    def update(self, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor):
-        converted_orientations = convert_camera_frame_orientation_convention(
-            orientations, origin="world", target="opengl"
-        )
+    def update(self, positions: wp.array, orientations: wp.array, intrinsics: wp.array):
+        intrinsics_torch = wp.to_torch(intrinsics)
+        converted_orientations = wp.from_torch(convert_camera_frame_orientation_convention(
+            wp.to_torch(orientations), origin="world", target="opengl"
+        ))
 
         self.camera_transforms = wp.empty(
             (1, self.newton_sensor.model.world_count), dtype=wp.transformf, device=self.newton_sensor.model.device
@@ -99,34 +100,41 @@ def update(self, positions: torch.Tensor, orientations: torch.Tensor, intrinsics
         wp.launch(
             RenderData._update_transforms,
             self.newton_sensor.model.world_count,
-            [positions, converted_orientations, self.camera_transforms],
+            [
+                wp.array(
+                    ptr=positions.ptr,
+                    dtype=wp.vec3f,
+                    shape=(positions.shape[0],),
+                    device=positions.device,
+                    copy=False,
+                ),
+                wp.array(
+                    ptr=converted_orientations.ptr,
+                    dtype=wp.quatf,
+                    shape=(converted_orientations.shape[0],),
+                    device=converted_orientations.device,
+                    copy=False,
+                ),
+                self.camera_transforms,
+            ],
             device=self.newton_sensor.model.device,
         )
 
         if self.camera_rays is None:
-            first_focal_length = intrinsics[:, 1, 1][0:1]
+            first_focal_length = intrinsics_torch[:, 1, 1][0:1]
             fov_radians_all = 2.0 * torch.atan(self.height / (2.0 * first_focal_length))
 
             self.camera_rays = self.newton_sensor.utils.compute_pinhole_camera_rays(
                 self.width, self.height, wp.from_torch(fov_radians_all, dtype=wp.float32)
             )
 
-    def _from_torch(self, tensor: torch.Tensor, dtype) -> wp.array:
-        proxy_array = wp.from_torch(tensor)
-        if tensor.is_contiguous():
-            return wp.array(
-                ptr=proxy_array.ptr,
-                dtype=dtype,
-                shape=(self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width),
-                device=proxy_array.device,
-                copy=False,
-            )
-
-        logger.warning("NewtonWarpRenderer - torch output array is non-contiguous")
-        return wp.zeros(
-            (self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width),
+    def _from_warp(self, array: wp.array, dtype) -> wp.array:
+        return wp.array(
+            ptr=array.ptr,
             dtype=dtype,
-            device=proxy_array.device,
+            shape=(self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width),
+            device=array.device,
+            copy=False,
         )
 
     @wp.kernel
@@ -185,16 +193,14 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]:
         """Publish the per-output layout this Newton Warp backend writes.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.supported_output_types`."""
         seg_spec = (
-            RenderBufferSpec(4, torch.uint8)
-            if self.cfg.colorize_instance_segmentation
-            else RenderBufferSpec(1, torch.int32)
+            RenderBufferSpec(4, wp.uint8) if self.cfg.colorize_instance_segmentation else RenderBufferSpec(1, wp.int32)
         )
         return {
-            RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.ALBEDO: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32),
+            RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.ALBEDO: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32),
             RenderBufferKind.INSTANCE_SEGMENTATION_FAST: seg_spec,
         }
 
@@ -208,7 +214,7 @@ def create_render_data(self, sensor: SensorBase) -> RenderData:
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.create_render_data`."""
         return RenderData(self.newton_sensor, sensor)
 
-    def set_outputs(self, render_data: RenderData, output_data: dict[str, torch.Tensor]):
+    def set_outputs(self, render_data: RenderData, output_data: dict[str, wp.array]):
         """Store output buffers. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`."""
         render_data.set_outputs(output_data)
 
@@ -217,9 +223,7 @@ def update_transforms(self):
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_transforms`."""
         SimulationContext.instance().update_scene_data_provider(True)
 
-    def update_camera(
-        self, render_data: RenderData, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor
-    ):
+    def update_camera(self, render_data: RenderData, positions: wp.array, orientations: wp.array, intrinsics: wp.array):
         """Update camera poses and intrinsics.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera`."""
         render_data.update(positions, orientations, intrinsics)
@@ -244,12 +248,17 @@ def read_output(self, render_data: RenderData, camera_data: CameraData) -> None:
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.read_output`."""
         for output_name in camera_data.output:
             if output_name == "rgb":
+                # rgb is populated below by mirroring the first three channels of rgba.
                 continue
             image_data = render_data.get_output(output_name)
             if image_data is not None:
                 output_data = camera_data.output[output_name]
-                if image_data.ptr != output_data.data_ptr():
-                    wp.copy(wp.from_torch(output_data), image_data)
+                if image_data.ptr != output_data.ptr:
+                    wp.copy(output_data, image_data)
+        # Mirror rgba's first three channels into rgb when both are requested. This matches the
+        # legacy slice-aliased behavior so consumers requesting "rgb" still see populated pixels.
+        if "rgb" in camera_data.output and "rgba" in camera_data.output:
+            wp.to_torch(camera_data.output["rgb"]).copy_(wp.to_torch(camera_data.output["rgba"])[..., :3])
 
     def cleanup(self, render_data: RenderData | None):
         """Release resources. No-op for Newton Warp.
diff --git a/source/isaaclab_ov/config/extension.toml b/source/isaaclab_ov/config/extension.toml
index 67f5582202d9..ba8f5046c4eb 100644
--- a/source/isaaclab_ov/config/extension.toml
+++ b/source/isaaclab_ov/config/extension.toml
@@ -1,5 +1,5 @@
 [package]
-version = "0.1.2"
+version = "0.1.3"
 title = "Omniverse renderers for IsaacLab"
 description = "Extension providing Omniverse renderers (OVRTX, ovphysx, etc.) for tiled camera rendering."
 readme = "docs/README.md"
diff --git a/source/isaaclab_ov/docs/CHANGELOG.rst b/source/isaaclab_ov/docs/CHANGELOG.rst
index 2babcbdabd9a..8c39529aa0bc 100644
--- a/source/isaaclab_ov/docs/CHANGELOG.rst
+++ b/source/isaaclab_ov/docs/CHANGELOG.rst
@@ -1,6 +1,18 @@
 Changelog
 ---------
 
+0.1.3 (2026-04-30)
+~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Changed :class:`~isaaclab_ov.renderers.OVRTXRenderer` to consume ``wp.array``
+  camera output buffers and camera state arrays from
+  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
+  ``camera.data.output`` entries if Torch tensor operations are required.
+
+
 0.1.2 (2026-03-23)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
index 0b52324b0238..778185c1dfdf 100644
--- a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
+++ b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
@@ -100,13 +100,13 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]:
         """Publish the per-output layout this OVRTX backend writes.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.supported_output_types`."""
         return {
-            RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.ALBEDO: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.SEMANTIC_SEGMENTATION: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, torch.float32),
+            RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.ALBEDO: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.SEMANTIC_SEGMENTATION: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, wp.float32),
         }
 
     def __init__(self, cfg: OVRTXRendererCfg):
@@ -330,44 +330,21 @@ def create_render_data(self, sensor: SensorBase) -> OVRTXRenderData:
             self.initialize(sensor)
         return OVRTXRenderData(sensor, DEVICE)
 
-    # Map torch dtypes to their warp counterparts for zero-copy wrapping.
-    _TORCH_TO_WP_DTYPE: dict[torch.dtype, Any] = {
-        torch.uint8: wp.uint8,
-        torch.float32: wp.float32,
-        torch.int32: wp.int32,
-    }
+    _SUPPORTED_OUTPUT_DTYPES: set[Any] = {wp.uint8, wp.float32, wp.int32}
 
-    def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, torch.Tensor]) -> None:
-        """Wrap caller-owned torch output tensors as zero-copy warp arrays.
-
-        Aliased views over a contiguous sibling (e.g. ``rgb`` over ``rgba``) are
-        skipped; any other non-contiguous tensor raises ``ValueError``.
+    def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, wp.array]) -> None:
+        """Store caller-owned Warp output arrays.
 
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`.
         """
         render_data.warp_buffers = {}
-        for name, tensor in output_data.items():
-            if not tensor.is_contiguous():
-                if tensor.data_ptr() in {t.data_ptr() for t in output_data.values() if t.is_contiguous()}:
-                    continue
-                raise ValueError(
-                    f"OVRTXRenderer.set_outputs: output '{name}' is non-contiguous and is not an"
-                    " alias of a contiguous output tensor; cannot wrap as a zero-copy warp array."
-                )
-            wp_dtype = self._TORCH_TO_WP_DTYPE.get(tensor.dtype)
-            if wp_dtype is None:
+        for name, output_array in output_data.items():
+            if output_array.dtype not in self._SUPPORTED_OUTPUT_DTYPES:
                 raise ValueError(
-                    f"OVRTXRenderer.set_outputs: unsupported torch dtype {tensor.dtype} for output"
-                    f" '{name}'. Add it to OVRTXRenderer._TORCH_TO_WP_DTYPE."
+                    f"OVRTXRenderer.set_outputs: unsupported Warp dtype {output_array.dtype} for output"
+                    f" '{name}'. Add it to OVRTXRenderer._SUPPORTED_OUTPUT_DTYPES."
                 )
-            torch_array = wp.from_torch(tensor)
-            render_data.warp_buffers[name] = wp.array(
-                ptr=torch_array.ptr,
-                dtype=wp_dtype,
-                shape=tuple(tensor.shape),
-                device=torch_array.device,
-                copy=False,
-            )
+            render_data.warp_buffers[name] = output_array
 
     def update_transforms(self) -> None:
         """Sync physics objects to OVRTX."""
@@ -399,15 +376,25 @@ def update_transforms(self) -> None:
     def update_camera(
         self,
         render_data: OVRTXRenderData,
-        positions: torch.Tensor,
-        orientations: torch.Tensor,
-        intrinsics: torch.Tensor,
+        positions: wp.array,
+        orientations: wp.array,
+        intrinsics: wp.array,
     ) -> None:
         """Update camera transforms in OVRTX binding."""
         num_envs = positions.shape[0]
-        camera_quats_opengl = convert_camera_frame_orientation_convention(orientations, origin="world", target="opengl")
-        camera_positions_wp = wp.from_torch(positions.contiguous(), dtype=wp.vec3)
-        camera_orientations_wp = wp.from_torch(camera_quats_opengl.contiguous(), dtype=wp.quatf)
+        camera_quats_opengl = wp.from_torch(
+            convert_camera_frame_orientation_convention(wp.to_torch(orientations), origin="world", target="opengl")
+        )
+        camera_positions_wp = wp.array(
+            ptr=positions.ptr, dtype=wp.vec3, shape=(num_envs,), device=positions.device, copy=False
+        )
+        camera_orientations_wp = wp.array(
+            ptr=camera_quats_opengl.ptr,
+            dtype=wp.quatf,
+            shape=(num_envs,),
+            device=camera_quats_opengl.device,
+            copy=False,
+        )
         camera_transforms = wp.zeros(num_envs, dtype=wp.mat44d, device=DEVICE)
         wp.launch(
             kernel=create_camera_transforms_kernel,
@@ -425,10 +412,10 @@ def read_output(
         render_data: OVRTXRenderData,
         camera_data: CameraData,
     ) -> None:
-        """No-op: outputs already live in the caller's torch storage.
+        """No-op: outputs already live in the caller's Warp storage.
 
-        :meth:`set_outputs` wraps each ``camera_data.output`` tensor as a
-        zero-copy warp array stored in ``render_data.warp_buffers``, and
+        :meth:`set_outputs` stores each ``camera_data.output`` array in
+        ``render_data.warp_buffers``, and
         :meth:`render` writes the rendered tiles directly into those warp
         arrays. There is therefore nothing to copy here.
 
@@ -505,6 +492,8 @@ def _process_render_frame(self, render_data: OVRTXRenderData, frame, output_buff
             with frame.render_vars[rgb_render_var].map(device=Device.CUDA) as mapping:
                 tiled_data = wp.from_dlpack(mapping.tensor)
                 self._extract_rgba_tiles(render_data, tiled_data, output_buffers, "rgba", suffix="rgb")
+                if "rgb" in output_buffers:
+                    wp.to_torch(output_buffers["rgb"]).copy_(wp.to_torch(output_buffers["rgba"])[..., :3])
 
         for depth_var in ["DistanceToImagePlaneSD", "DepthSD"]:
             if depth_var not in frame.render_vars:
diff --git a/source/isaaclab_ov/test/test_ovrtx_renderer_contract.py b/source/isaaclab_ov/test/test_ovrtx_renderer_contract.py
index b6c590a54a54..35bf2db178e3 100644
--- a/source/isaaclab_ov/test/test_ovrtx_renderer_contract.py
+++ b/source/isaaclab_ov/test/test_ovrtx_renderer_contract.py
@@ -6,7 +6,7 @@
 """Tests for the OVRTX renderer output contract."""
 
 import pytest
-import torch
+import warp as wp
 
 pytest.importorskip("isaaclab_ov")
 pytest.importorskip("ovrtx")
@@ -58,19 +58,15 @@ def test_ovrtx_supported_output_types_key_set():
         RenderBufferKind.DISTANCE_TO_IMAGE_PLANE,
         RenderBufferKind.DISTANCE_TO_CAMERA,
     }
-    assert specs[RenderBufferKind.RGBA] == RenderBufferSpec(4, torch.uint8)
-    assert specs[RenderBufferKind.DEPTH] == RenderBufferSpec(1, torch.float32)
+    assert specs[RenderBufferKind.RGBA] == RenderBufferSpec(4, wp.uint8)
+    assert specs[RenderBufferKind.DEPTH] == RenderBufferSpec(1, wp.float32)
 
 
-def test_ovrtx_set_outputs_wraps_caller_torch_zero_copy():
-    """OVRTXRenderer.set_outputs publishes warp views over the caller's torch storage."""
-    import warp as wp
-
+def test_ovrtx_set_outputs_stores_caller_warp_arrays():
+    """OVRTXRenderer.set_outputs stores caller-owned Warp arrays."""
     renderer = OVRTXRenderer(OVRTXRendererCfg())
 
-    if not torch.cuda.is_available():
-        pytest.skip("OVRTX zero-copy wrapping requires a CUDA device")
-    device = "cuda"
+    device = "cpu"
 
     cfg = _make_camera_cfg(["rgb", "rgba", "depth"])
     data = CameraData.allocate(
@@ -85,9 +81,9 @@ def test_ovrtx_set_outputs_wraps_caller_torch_zero_copy():
     renderer.set_outputs(render_data, data.output)
 
     assert set(render_data.warp_buffers.keys()) >= {"rgba", "depth"}
-    assert render_data.warp_buffers["rgba"].ptr == wp.from_torch(data.output["rgba"]).ptr
-    assert render_data.warp_buffers["depth"].ptr == wp.from_torch(data.output["depth"]).ptr
-    assert "rgb" not in render_data.warp_buffers
+    assert render_data.warp_buffers["rgba"].ptr == data.output["rgba"].ptr
+    assert render_data.warp_buffers["depth"].ptr == data.output["depth"].ptr
+    assert render_data.warp_buffers["rgb"].ptr == data.output["rgb"].ptr
 
 
 def test_ovrtx_read_output_is_a_no_op_after_consolidation():
diff --git a/source/isaaclab_physx/config/extension.toml b/source/isaaclab_physx/config/extension.toml
index 3c0711934431..5c63b0e6322f 100644
--- a/source/isaaclab_physx/config/extension.toml
+++ b/source/isaaclab_physx/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.5.28"
+version = "0.5.29"
 
 # Description
 title = "PhysX simulation interfaces for IsaacLab core package"
diff --git a/source/isaaclab_physx/docs/CHANGELOG.rst b/source/isaaclab_physx/docs/CHANGELOG.rst
index 5f284dc3c891..abfa0454f3ba 100644
--- a/source/isaaclab_physx/docs/CHANGELOG.rst
+++ b/source/isaaclab_physx/docs/CHANGELOG.rst
@@ -1,6 +1,20 @@
 Changelog
 ---------
 
+0.5.29 (2026-04-30)
+~~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Changed :class:`~isaaclab_physx.renderers.IsaacRtxRenderer` to consume
+  ``wp.array`` camera output buffers and camera state arrays from
+  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
+  ``camera.data.output`` entries if Torch tensor operations are required.
+* Updated PhysX PVA debug visualization to convert camera-convention
+  orientation outputs with :func:`warp.to_torch`.
+
+
 0.5.28 (2026-04-29)
 ~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
index 246ab5a9fe33..b82db50c55e8 100644
--- a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
+++ b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
@@ -73,7 +73,7 @@ class IsaacRtxRenderData:
 
     annotators: dict[str, Any]
     render_product_paths: list[str]
-    output_data: dict[str, torch.Tensor] | None = None
+    output_data: dict[str, wp.array] | None = None
     sensor: SensorBase | None = None
     renderer_info: dict[str, Any] = field(default_factory=dict)
 
@@ -108,19 +108,19 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]:
         specs: dict[RenderBufferKind, RenderBufferSpec] = {
             # Replicator's native layout for color output is rgba/uint8;
             # ``Camera`` aliases ``rgb`` as a view into ``rgba`` storage.
-            RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32),
-            RenderBufferKind.MOTION_VECTORS: RenderBufferSpec(2, torch.float32),
+            RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32),
+            RenderBufferKind.MOTION_VECTORS: RenderBufferSpec(2, wp.float32),
         }
 
         if sim_major >= 6:
-            specs[RenderBufferKind.ALBEDO] = RenderBufferSpec(4, torch.uint8)
+            specs[RenderBufferKind.ALBEDO] = RenderBufferSpec(4, wp.uint8)
             for shading_type in SIMPLE_SHADING_MODES:
-                specs[RenderBufferKind(shading_type)] = RenderBufferSpec(3, torch.uint8)
+                specs[RenderBufferKind(shading_type)] = RenderBufferSpec(3, wp.uint8)
 
         seg_specs = (
             (RenderBufferKind.SEMANTIC_SEGMENTATION, self.cfg.colorize_semantic_segmentation),
@@ -128,7 +128,7 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]:
             (RenderBufferKind.INSTANCE_ID_SEGMENTATION_FAST, self.cfg.colorize_instance_id_segmentation),
         )
         for name, colorize in seg_specs:
-            specs[name] = RenderBufferSpec(4, torch.uint8) if colorize else RenderBufferSpec(1, torch.int32)
+            specs[name] = RenderBufferSpec(4, wp.uint8) if colorize else RenderBufferSpec(1, wp.int32)
 
         return specs
 
@@ -283,7 +283,7 @@ def _resolve_simple_shading_mode(self, sensor: SensorBase) -> int | None:
             )
         return SIMPLE_SHADING_MODES[requested[0]]
 
-    def set_outputs(self, render_data: IsaacRtxRenderData, output_data: dict[str, torch.Tensor]):
+    def set_outputs(self, render_data: IsaacRtxRenderData, output_data: dict[str, wp.array]):
         """Store reference to output buffers for writing during render.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`."""
         render_data.output_data = output_data
@@ -296,9 +296,9 @@ def update_transforms(self) -> None:
     def update_camera(
         self,
         render_data: IsaacRtxRenderData,
-        positions: torch.Tensor,
-        orientations: torch.Tensor,
-        intrinsics: torch.Tensor,
+        positions: wp.array,
+        orientations: wp.array,
+        intrinsics: wp.array,
     ):
         """No-op for Replicator - uses USD camera prims directly.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera`."""
@@ -374,7 +374,7 @@ def tiling_grid_shape():
                 dim=(view_count, cfg.height, cfg.width),
                 inputs=[
                     tiled_data_buffer.flatten(),
-                    wp.from_torch(output_data[data_type]),
+                    output_data[data_type],
                     *list(output_data[data_type].shape[1:]),
                     num_tiles_x,
                 ],
@@ -383,21 +383,23 @@ def tiling_grid_shape():
 
             # alias rgb as first 3 channels of rgba
             if data_type == "rgba" and "rgb" in cfg.data_types:
-                output_data["rgb"] = output_data["rgba"][..., :3]
+                wp.to_torch(output_data["rgb"]).copy_(wp.to_torch(output_data["rgba"])[..., :3])
 
             # NOTE: The `distance_to_camera` annotator returns the distance to the camera optical center.
             #       However, the replicator depth clipping is applied w.r.t. to the image plane which may result
             #       in values larger than the clipping range in the output. We apply an additional clipping to
             #       ensure values are within the clipping range for all the annotators.
             if data_type == "distance_to_camera":
-                output_data[data_type][output_data[data_type] > cfg.spawn.clipping_range[1]] = torch.inf
+                output_tensor = wp.to_torch(output_data[data_type])
+                output_tensor[output_tensor > cfg.spawn.clipping_range[1]] = torch.inf
 
             # apply defined clipping behavior
             if (
                 data_type in ("distance_to_camera", "distance_to_image_plane", "depth")
                 and self.cfg.depth_clipping_behavior != "none"
             ):
-                output_data[data_type][torch.isinf(output_data[data_type])] = (
+                output_tensor = wp.to_torch(output_data[data_type])
+                output_tensor[torch.isinf(output_tensor)] = (
                     0.0 if self.cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1]
                 )
 
diff --git a/source/isaaclab_physx/isaaclab_physx/sensors/pva/pva.py b/source/isaaclab_physx/isaaclab_physx/sensors/pva/pva.py
index 32f4549e416d..dde2a536cfc1 100644
--- a/source/isaaclab_physx/isaaclab_physx/sensors/pva/pva.py
+++ b/source/isaaclab_physx/isaaclab_physx/sensors/pva/pva.py
@@ -296,6 +296,8 @@ def _debug_vis_callback(self, event):
                 device=self._device,
             )
         )
-        quat_w = math_utils.convert_camera_frame_orientation_convention(quat_opengl, "opengl", "world")
+        quat_w = wp.to_torch(
+            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_opengl), "opengl", "world")
+        )
         # display markers
         self.acceleration_visualizer.visualize(base_pos_w, quat_w, arrow_scale)
diff --git a/source/isaaclab_tasks/config/extension.toml b/source/isaaclab_tasks/config/extension.toml
index 2e7ee00764d7..4fa2321c276f 100644
--- a/source/isaaclab_tasks/config/extension.toml
+++ b/source/isaaclab_tasks/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "1.5.30"
+version = "1.5.33"
 
 # Description
 title = "Isaac Lab Environments"
diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst
index 2bb26e429744..1377ea7d0f4b 100644
--- a/source/isaaclab_tasks/docs/CHANGELOG.rst
+++ b/source/isaaclab_tasks/docs/CHANGELOG.rst
@@ -1,6 +1,28 @@
 Changelog
 ---------
 
+1.5.33 (2026-04-30)
+~~~~~~~~~~~~~~~~~~~
+
+Fixed
+^^^^^
+
+* Fixed dexsuite ``vision_camera`` observation and the Franka stack
+  ``stack_ik_rel_blueprint`` ``image()`` helper to lift ``wp.array`` camera
+  outputs and intrinsics to torch tensors via :func:`warp.to_torch` before
+  applying Torch tensor operations.
+
+
+1.5.32 (2026-04-30)
+~~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Updated cartpole camera environments to consume ``wp.array`` camera outputs
+  via :func:`warp.to_torch` before applying Torch observation processing.
+
+
 1.5.31 (2026-04-29)
 ~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py
index 807c0cc09d91..97bb505f6832 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py
@@ -10,6 +10,7 @@
 from typing import TYPE_CHECKING
 
 import torch
+import warp as wp
 
 import isaaclab.sim as sim_utils
 from isaaclab.assets import Articulation
@@ -96,31 +97,32 @@ def _apply_action(self) -> None:
 
     def _get_observations(self) -> dict:
         data_type = self.cfg.tiled_camera.data_types[0]
+        camera_output = self._tiled_camera.data.output
         if "rgb" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type] / 255.0
+            camera_data = wp.to_torch(camera_output[data_type]) / 255.0
             # normalize the camera data for better training results
             mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True)
             camera_data -= mean_tensor
         elif "albedo" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type][..., :3] / 255.0
+            camera_data = wp.to_torch(camera_output[data_type])[..., :3] / 255.0
             # normalize the camera data for better training results
             mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True)
             camera_data -= mean_tensor
         elif data_type in SIMPLE_SHADING_TYPES:
-            camera_data = self._tiled_camera.data.output[data_type] / 255.0
+            camera_data = wp.to_torch(camera_output[data_type]) / 255.0
             # normalize the camera data for better training results
             mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True)
             camera_data -= mean_tensor
         elif "depth" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type]
+            camera_data = wp.to_torch(camera_output[data_type])
             camera_data[camera_data == float("inf")] = 0
         elif "semantic_segmentation" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type]
+            camera_data = wp.to_torch(camera_output[data_type])
 
         observations = {"policy": camera_data.clone()}
 
         if self.cfg.write_image_to_file:
-            save_images_to_file(self._tiled_camera.data.output[data_type] / 255.0, f"cartpole_{data_type}.png")
+            save_images_to_file(wp.to_torch(camera_output[data_type]) / 255.0, f"cartpole_{data_type}.png")
 
         return observations
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole_showcase/cartpole_camera/cartpole_camera_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole_showcase/cartpole_camera/cartpole_camera_env.py
index d06d5c595c69..1ce018505c27 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole_showcase/cartpole_camera/cartpole_camera_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole_showcase/cartpole_camera/cartpole_camera_env.py
@@ -7,6 +7,7 @@
 
 import gymnasium as gym
 import torch
+import warp as wp
 
 from isaaclab_tasks.direct.cartpole.cartpole_camera_env import CartpoleCameraEnv
 from isaaclab_tasks.direct.cartpole.cartpole_camera_env_cfg import CartpoleRGBCameraEnvCfg
@@ -45,13 +46,14 @@ def _apply_action(self) -> None:
     def _get_observations(self) -> dict:
         # get camera data
         data_type = "rgb" if "rgb" in self.cfg.tiled_camera.data_types else "depth"
+        camera_output = self._tiled_camera.data.output
         if "rgb" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type] / 255.0
+            camera_data = wp.to_torch(camera_output[data_type]) / 255.0
             # normalize the camera data for better training results
             mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True)
             camera_data -= mean_tensor
         elif "depth" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type]
+            camera_data = wp.to_torch(camera_output[data_type])
             camera_data[camera_data == float("inf")] = 0
 
         # fundamental spaces
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py
index fe666a7ecffe..bec3abec7062 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py
@@ -8,6 +8,7 @@
 from typing import TYPE_CHECKING
 
 import torch
+import warp as wp
 
 from isaaclab.managers import ManagerTermBase, SceneEntityCfg
 from isaaclab.utils.math import quat_apply, quat_apply_inverse, quat_inv, quat_mul, subtract_frame_transforms
@@ -215,7 +216,7 @@ def __init__(self, cfg, env: ManagerBasedRLEnv):
     def __call__(
         self, env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, normalize: bool = True
     ) -> torch.Tensor:  # obtain the input image
-        images = self.sensor.data.output[self.sensor_type]
+        images = wp.to_torch(self.sensor.data.output[self.sensor_type])
         torch.nan_to_num_(images, nan=1e6)
         if normalize:
             images = self.norm_fn(images)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/stack_ik_rel_blueprint_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/stack_ik_rel_blueprint_env_cfg.py
index e123a25604bb..d874e3a396a0 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/stack_ik_rel_blueprint_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/stack/config/franka/stack_ik_rel_blueprint_env_cfg.py
@@ -9,6 +9,7 @@
 from typing import TYPE_CHECKING
 
 import torch
+import warp as wp
 from torchvision.utils import save_image
 
 import isaaclab.sim as sim_utils
@@ -65,12 +66,12 @@ def image(
     # extract the used quantities (to enable type-hinting)
     sensor: Camera | RayCasterCamera = env.scene.sensors[sensor_cfg.name]
 
-    # obtain the input image
-    images = sensor.data.output[data_type]
+    # obtain the input image; camera outputs are wp.array, lift to torch for tensor ops
+    images = wp.to_torch(sensor.data.output[data_type])
 
     # depth image conversion
     if (data_type == "distance_to_camera") and convert_perspective_to_orthogonal:
-        images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices)
+        images = math_utils.orthogonalize_perspective_depth(images, wp.to_torch(sensor.data.intrinsic_matrices))
 
     # rgb/depth image normalization
     if normalize:
diff --git a/source/isaaclab_visualizers/test/test_visualizer_cartpole_integration.py b/source/isaaclab_visualizers/test/test_visualizer_cartpole_integration.py
index eb5f7149432e..7821f5cbae5b 100644
--- a/source/isaaclab_visualizers/test/test_visualizer_cartpole_integration.py
+++ b/source/isaaclab_visualizers/test/test_visualizer_cartpole_integration.py
@@ -236,9 +236,17 @@ def _get_physics_cfg(backend_kind: str):
     raise ValueError(f"Unknown backend: {backend_kind!r}")
 
 
-def _assert_non_black_tensor(image_tensor: torch.Tensor, *, min_nonzero_pixels: int = 1) -> None:
-    """Assert camera-like tensor contains non-black pixels."""
-    assert isinstance(image_tensor, torch.Tensor), f"Expected torch.Tensor, got {type(image_tensor)!r}"
+def _assert_non_black_tensor(image: torch.Tensor | wp.array, *, min_nonzero_pixels: int = 1) -> None:
+    """Assert camera-like image contains non-black pixels.
+
+    Accepts both ``torch.Tensor`` and ``wp.array`` (camera ``data.output`` entries are
+    Warp arrays after the wp-array migration).
+    """
+    if isinstance(image, wp.array):
+        image_tensor = wp.to_torch(image)
+    else:
+        assert isinstance(image, torch.Tensor), f"Expected torch.Tensor or wp.array, got {type(image)!r}"
+        image_tensor = image
     assert image_tensor.numel() > 0, "Image tensor is empty."
     finite_tensor = torch.where(torch.isfinite(image_tensor), image_tensor, torch.zeros_like(image_tensor))
     if finite_tensor.dtype.is_floating_point:

From 29993831af39a50fdd01a26bb4c326162af1120d Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Fri, 1 May 2026 15:29:25 -0700
Subject: [PATCH 02/22] Minor fixes

---
 docs/source/how-to/save_camera_output.rst     | 15 ++++-------
 .../04_sensors/run_ray_caster_camera.py       | 13 +++++-----
 .../test_multi_mesh_ray_caster_camera.py      | 24 ++++++++----------
 .../renderers/newton_warp_renderer.py         | 25 ++-----------------
 4 files changed, 24 insertions(+), 53 deletions(-)

diff --git a/docs/source/how-to/save_camera_output.rst b/docs/source/how-to/save_camera_output.rst
index a731ec159c62..01b404efc0d8 100644
--- a/docs/source/how-to/save_camera_output.rst
+++ b/docs/source/how-to/save_camera_output.rst
@@ -58,19 +58,14 @@ PyTorch operations which allows faster computation.
 
 .. code-block:: python
 
-   import warp as wp
    from isaaclab.utils.math import transform_points, unproject_depth
 
-   # Camera ``data.output`` and pose fields are ``wp.array`` values; lift them to torch
-   # tensors before invoking the torch-based math utilities below.
-   depth = wp.to_torch(camera.data.output["distance_to_image_plane"])
-   intrinsics = wp.to_torch(camera.data.intrinsic_matrices)
-   pos_w = wp.to_torch(camera.data.pos_w)
-   quat_w_ros = wp.to_torch(camera.data.quat_w_ros)
-
    # Pointcloud in world frame
-   points_3d_cam = unproject_depth(depth, intrinsics)
-   points_3d_world = transform_points(points_3d_cam, pos_w, quat_w_ros)
+   points_3d_cam = unproject_depth(
+      camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
+   )
+
+   points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros)
 
 Alternately, we can use the :meth:`isaaclab.sensors.camera.utils.create_pointcloud_from_depth` function
 to create a point cloud from the depth image and transform it to the world frame.
diff --git a/scripts/tutorials/04_sensors/run_ray_caster_camera.py b/scripts/tutorials/04_sensors/run_ray_caster_camera.py
index b2664aaeb9b7..e19b0ebfa4df 100644
--- a/scripts/tutorials/04_sensors/run_ray_caster_camera.py
+++ b/scripts/tutorials/04_sensors/run_ray_caster_camera.py
@@ -38,7 +38,6 @@
 import os
 
 import torch
-import warp as wp
 
 import omni.replicator.core as rep
 
@@ -145,17 +144,17 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
             rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
             rep_writer.write(rep_output)
 
-            # Pointcloud in world frame; convert wp.array camera outputs to torch for math ops
-            depth_torch = wp.to_torch(camera.data.output["distance_to_image_plane"])
-            intrinsics_torch = wp.to_torch(camera.data.intrinsic_matrices)
-            points_3d_cam = unproject_depth(depth_torch, intrinsics_torch)
+            # Pointcloud in world frame
+            points_3d_cam = unproject_depth(
+                camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
+            )
 
             # Check methods are valid
             im_height, im_width = camera.image_shape
             # -- project points to (u, v, d)
-            reproj_points = project_points(points_3d_cam, intrinsics_torch)
+            reproj_points = project_points(points_3d_cam, camera.data.intrinsic_matrices)
             reproj_depths = reproj_points[..., -1].view(-1, im_width, im_height).transpose_(1, 2)
-            sim_depths = depth_torch.squeeze(-1)
+            sim_depths = camera.data.output["distance_to_image_plane"].squeeze(-1)
             torch.testing.assert_close(reproj_depths, sim_depths)
 
 
diff --git a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
index 4c70d0a09488..8657c938c691 100644
--- a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
@@ -20,7 +20,6 @@
 import numpy as np
 import pytest
 import torch
-import warp as wp
 
 import omni.replicator.core as rep
 from pxr import Gf
@@ -116,7 +115,7 @@ def test_camera_init_offset(setup_simulation, convention, quat):
     camera.update(dt)
 
     # check that transform is set correctly
-    np.testing.assert_allclose(wp.to_torch(camera.data.pos_w)[0].cpu().numpy(), cam_cfg_offset.offset.pos)
+    np.testing.assert_allclose(camera.data.pos_w[0].cpu().numpy(), cam_cfg_offset.offset.pos)
 
     del camera
 
@@ -273,8 +272,8 @@ def test_camera_set_world_poses(setup_simulation):
     camera.set_world_poses(position.clone(), orientation.clone(), convention="world")
 
     # check if transform correctly set in output
-    torch.testing.assert_close(wp.to_torch(camera.data.pos_w), position)
-    torch.testing.assert_close(wp.to_torch(camera.data.quat_w_world), orientation)
+    torch.testing.assert_close(camera.data.pos_w, position)
+    torch.testing.assert_close(camera.data.quat_w_world, orientation)
 
     del camera
 
@@ -296,8 +295,8 @@ def test_camera_set_world_poses_from_view(setup_simulation):
     camera.set_world_poses_from_view(eyes.clone(), targets.clone())
 
     # check if transform correctly set in output
-    torch.testing.assert_close(wp.to_torch(camera.data.pos_w), eyes)
-    torch.testing.assert_close(wp.to_torch(camera.data.quat_w_ros), quat_ros_gt)
+    torch.testing.assert_close(camera.data.pos_w, eyes)
+    torch.testing.assert_close(camera.data.quat_w_ros, quat_ros_gt)
 
     del camera
 
@@ -326,7 +325,7 @@ def test_intrinsic_matrix(setup_simulation, height, width):
         # update camera
         camera.update(dt)
         # Check that matrix is correct
-        torch.testing.assert_close(rs_intrinsic_matrix, wp.to_torch(camera.data.intrinsic_matrices))
+        torch.testing.assert_close(rs_intrinsic_matrix, camera.data.intrinsic_matrices)
 
     del camera
 
@@ -793,17 +792,16 @@ def test_image_mesh_ids_identifies_hit_mesh(setup_simulation):
     sim.reset()
     camera.update(dt)
 
-    mesh_ids_wp = camera.data.image_mesh_ids  # shape (N, H, W, 1), wp.int16
-    assert mesh_ids_wp is not None, "image_mesh_ids should not be None when update_mesh_ids=True"
-    assert mesh_ids_wp.shape[-1] == 1
-    assert mesh_ids_wp.dtype == wp.int16
-    mesh_ids = wp.to_torch(mesh_ids_wp)
+    mesh_ids = camera.data.image_mesh_ids  # shape (N, H, W, 1), dtype torch.int16
+    assert mesh_ids is not None, "image_mesh_ids should not be None when update_mesh_ids=True"
+    assert mesh_ids.shape[-1] == 1
+    assert mesh_ids.dtype == torch.int16
 
     # Identify actual hits via distance < inf. This relies on depth_clipping_behavior="none"
     # (the default), which leaves missed rays at the Warp-kernel fill value of inf.
     # Under "max" clipping, missed rays would be clamped to a finite max_distance, making
     # the inf comparison incorrect.
-    hit_mask = wp.to_torch(camera.data.output["distance_to_camera"])[0, :, :, 0] < float("inf")
+    hit_mask = camera.data.output["distance_to_camera"][0, :, :, 0] < float("inf")
     assert hit_mask.any(), "Expected at least some rays to hit the ground plane"
 
     # All hits against the single registered mesh must carry mesh_id=0 (first mesh index).
diff --git a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
index bd07b165a8b4..194fec321a07 100644
--- a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
+++ b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
@@ -89,7 +89,6 @@ def get_output(self, output_name: str) -> wp.array:
         return None
 
     def update(self, positions: wp.array, orientations: wp.array, intrinsics: wp.array):
-        intrinsics_torch = wp.to_torch(intrinsics)
         converted_orientations = wp.from_torch(
             convert_camera_frame_orientation_convention(wp.to_torch(orientations), origin="world", target="opengl")
         )
@@ -100,28 +99,12 @@ def update(self, positions: wp.array, orientations: wp.array, intrinsics: wp.arr
         wp.launch(
             RenderData._update_transforms,
             self.newton_sensor.model.world_count,
-            [
-                wp.array(
-                    ptr=positions.ptr,
-                    dtype=wp.vec3f,
-                    shape=(positions.shape[0],),
-                    device=positions.device,
-                    copy=False,
-                ),
-                wp.array(
-                    ptr=converted_orientations.ptr,
-                    dtype=wp.quatf,
-                    shape=(converted_orientations.shape[0],),
-                    device=converted_orientations.device,
-                    copy=False,
-                ),
-                self.camera_transforms,
-            ],
+            [positions, converted_orientations, self.camera_transforms],
             device=self.newton_sensor.model.device,
         )
 
         if self.camera_rays is None:
-            first_focal_length = intrinsics_torch[:, 1, 1][0:1]
+            first_focal_length = wp.to_torch(intrinsics)[:, 1, 1][0:1]
             fov_radians_all = 2.0 * torch.atan(self.height / (2.0 * first_focal_length))
 
             self.camera_rays = self.newton_sensor.utils.compute_pinhole_camera_rays(
@@ -255,10 +238,6 @@ def read_output(self, render_data: RenderData, camera_data: CameraData) -> None:
                 output_data = camera_data.output[output_name]
                 if image_data.ptr != output_data.ptr:
                     wp.copy(output_data, image_data)
-        # Mirror rgba's first three channels into rgb when both are requested. This matches the
-        # legacy slice-aliased behavior so consumers requesting "rgb" still see populated pixels.
-        if "rgb" in camera_data.output and "rgba" in camera_data.output:
-            wp.to_torch(camera_data.output["rgb"]).copy_(wp.to_torch(camera_data.output["rgba"])[..., :3])
 
     def cleanup(self, render_data: RenderData | None):
         """Release resources. No-op for Newton Warp.

From b4ee7e99346c3b13a5b8eee15bf9c4fc1baad91e Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 10:38:53 -0700
Subject: [PATCH 03/22] Camera ArrayType fix

---
 source/isaaclab/isaaclab/sensors/camera/camera.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index 1243788eed7e..5c84c7091919 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -564,7 +564,7 @@ def _update_poses(self, env_ids: Sequence[int] | wp.array):
         # notify renderer of updated poses (guarded in case called before initialization completes)
         if self._render_data is not None:
             self._renderer.update_camera(
-                self._render_data, self._data.pos_w, self._data.quat_w_world, self._data.intrinsic_matrices
+                self._render_data, self._data.pos_w.view(wp.vec3f), self._data.quat_w_world.view(wp.quatf), self._data.intrinsic_matrices
             )
 
     """

From 1a83af84b6f19fe032380f690b3b3c18e92caf1a Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 10:43:39 -0700
Subject: [PATCH 04/22] Reverted extension version updates

---
 source/isaaclab/config/extension.toml         | 2 +-
 source/isaaclab_contrib/config/extension.toml | 2 +-
 source/isaaclab_newton/config/extension.toml  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml
index f28030cdc980..729d39541f41 100644
--- a/source/isaaclab/config/extension.toml
+++ b/source/isaaclab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "4.6.26"
+version = "4.6.25"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/isaaclab_contrib/config/extension.toml b/source/isaaclab_contrib/config/extension.toml
index 0fba6e220442..7fd2b5d139e5 100644
--- a/source/isaaclab_contrib/config/extension.toml
+++ b/source/isaaclab_contrib/config/extension.toml
@@ -1,6 +1,6 @@
 [package]
 # Semantic Versioning is used: https://semver.org/
-version = "0.3.1"
+version = "0.3.0"
 
 # Description
 title =  "Isaac Lab External Contributions"
diff --git a/source/isaaclab_newton/config/extension.toml b/source/isaaclab_newton/config/extension.toml
index 2e598bf86303..03b74794b012 100644
--- a/source/isaaclab_newton/config/extension.toml
+++ b/source/isaaclab_newton/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.5.27"
+version = "0.5.25"
 
 # Description
 title = "Newton simulation interfaces for IsaacLab core package"

From 6b71566d528dff4a0bde262ba69476789f168861 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 10:50:34 -0700
Subject: [PATCH 05/22] Updated Changelogs

---
 .../replace-torch-with-warp.minor.rst         | 48 +++++++++++++++++++
 .../replace-torch-with-warp.minor.rst         |  5 ++
 source/isaaclab_contrib/docs/CHANGELOG.rst    | 10 ----
 .../replace-torch-with-warp.minor.rst         | 19 ++++++++
 .../replace-torch-with-warp.minor.rst         | 11 +++++
 source/isaaclab_ov/docs/CHANGELOG.rst         | 16 -------
 .../replace-torch-with-warp.minor.rst         |  9 ++++
 source/isaaclab_physx/docs/CHANGELOG.rst      |  6 ---
 .../replace-torch-with-warp.minor.rst         |  5 ++
 source/isaaclab_tasks/docs/CHANGELOG.rst      | 13 +----
 10 files changed, 99 insertions(+), 43 deletions(-)
 create mode 100644 source/isaaclab/changelog.d/replace-torch-with-warp.minor.rst
 create mode 100644 source/isaaclab_contrib/changelog.d/replace-torch-with-warp.minor.rst
 create mode 100644 source/isaaclab_newton/changelog.d/replace-torch-with-warp.minor.rst
 create mode 100644 source/isaaclab_ov/changelog.d/replace-torch-with-warp.minor.rst
 create mode 100644 source/isaaclab_physx/changelog.d/replace-torch-with-warp.minor.rst
 create mode 100644 source/isaaclab_tasks/changelog.d/replace-torch-with-warp.minor.rst

diff --git a/source/isaaclab/changelog.d/replace-torch-with-warp.minor.rst b/source/isaaclab/changelog.d/replace-torch-with-warp.minor.rst
new file mode 100644
index 000000000000..20cfc03c7792
--- /dev/null
+++ b/source/isaaclab/changelog.d/replace-torch-with-warp.minor.rst
@@ -0,0 +1,48 @@
+Fixed
+^^^^^
+
+* Fixed :class:`~isaaclab.envs.mdp.observations.image` (and the equivalent
+  ``image()`` helper in the Franka stack ``stack_ik_rel_blueprint`` config) so
+  that Torch tensor operations are applied via :func:`warp.to_torch` rather than
+  invoked directly on the new ``wp.array`` camera outputs.
+* Fixed downstream consumers (camera tutorials, ``demos/sensors/cameras.py``,
+  ``benchmarks/benchmark_cameras.py``, dexsuite ``vision_camera`` observation,
+  visualizer integration test, ``save_camera_output`` how-to and the camera
+  overview docs) to lift ``wp.array`` camera fields to torch tensors via
+  :func:`warp.to_torch` before performing Torch operations.
+* Fixed the camera and test suites to use ``wp.array`` dtypes
+  (``wp.uint8``, ``wp.float32``, ``wp.int32``) and :func:`warp.to_torch` views
+  in assertions on ``CameraData.output``, ``CameraData.intrinsic_matrices``,
+  ``CameraData.pos_w``/``quat_w_*`` and ``Camera.frame``.
+* Fixed :class:`~isaaclab.renderers.NewtonWarpRenderer` to populate the ``rgb``
+  output buffer when both ``rgb`` and ``rgba`` are requested, restoring the
+  legacy "rgb mirrors rgba" behavior that broke when ``rgb`` and ``rgba``
+  became independent ``wp.array`` allocations.
+
+Changed
+^^^^^^^
+
+* Tightened :class:`~isaaclab.renderers.RenderBufferSpec` ``dtype`` annotation
+  from ``Any`` to ``type`` to document that all renderers must publish Warp
+  scalar dtype classes (e.g. ``warp.uint8``).
+* Removed the transitional ``torch.dtype → wp.dtype`` shim in
+  :meth:`~isaaclab.sensors.camera.CameraData.allocate` now that all in-tree
+  renderers publish ``wp`` dtypes via :class:`~isaaclab.renderers.RenderBufferSpec`.
+* Documented the transitional Torch input fallback on
+  :func:`~isaaclab.utils.math.convert_camera_frame_orientation_convention` and
+  consolidated the redundant ``wp ↔ torch`` round-trips in
+  :meth:`~isaaclab.sensors.camera.Camera.set_world_poses` and
+  :meth:`~isaaclab.sensors.camera.Camera.set_world_poses_from_view`.
+* Changed :class:`~isaaclab.sensors.camera.CameraData` array fields and
+  :attr:`~isaaclab.sensors.camera.CameraData.output` buffers to expose
+  ``wp.array`` values instead of :class:`torch.Tensor` values. Use
+  :func:`warp.to_torch` where Torch tensor operations are required.
+* Changed :class:`~isaaclab.sensors.camera.Camera` pose, intrinsic, and frame
+  array APIs to accept or return ``wp.array`` values instead of
+  :class:`torch.Tensor` values. Existing Torch inputs are still accepted during
+  the transition; prefer ``wp.array`` at public call sites.
+* Changed :class:`~isaaclab.renderers.BaseRenderer` output and camera-update
+  APIs to exchange ``wp.array`` buffers with camera sensors.
+* Changed :func:`~isaaclab.utils.math.convert_camera_frame_orientation_convention`
+  to accept and return ``wp.array`` quaternion arrays. Use :func:`warp.to_torch`
+  where Torch tensor operations are required.
diff --git a/source/isaaclab_contrib/changelog.d/replace-torch-with-warp.minor.rst b/source/isaaclab_contrib/changelog.d/replace-torch-with-warp.minor.rst
new file mode 100644
index 000000000000..a2748ea9f039
--- /dev/null
+++ b/source/isaaclab_contrib/changelog.d/replace-torch-with-warp.minor.rst
@@ -0,0 +1,5 @@
+Changed
+^^^^^^^
+
+* Updated :class:`~isaaclab_contrib.sensors.tacsl_sensor.VisuotactileSensor`
+  to consume ``wp.array`` camera depth outputs via :func:`warp.to_torch`.
diff --git a/source/isaaclab_contrib/docs/CHANGELOG.rst b/source/isaaclab_contrib/docs/CHANGELOG.rst
index a3b2b550a22e..721c705a5981 100644
--- a/source/isaaclab_contrib/docs/CHANGELOG.rst
+++ b/source/isaaclab_contrib/docs/CHANGELOG.rst
@@ -1,16 +1,6 @@
 Changelog
 ---------
 
-0.3.1 (2026-04-30)
-~~~~~~~~~~~~~~~~~~
-
-Changed
-^^^^^^^
-
-* Updated :class:`~isaaclab_contrib.sensors.tacsl_sensor.VisuotactileSensor`
-  to consume ``wp.array`` camera depth outputs via :func:`warp.to_torch`.
-
-
 0.3.0 (2026-02-13)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_newton/changelog.d/replace-torch-with-warp.minor.rst b/source/isaaclab_newton/changelog.d/replace-torch-with-warp.minor.rst
new file mode 100644
index 000000000000..e4f2fb943dad
--- /dev/null
+++ b/source/isaaclab_newton/changelog.d/replace-torch-with-warp.minor.rst
@@ -0,0 +1,19 @@
+Fixed
+^^^^^
+
+* Fixed :class:`~isaaclab_newton.renderers.NewtonWarpRenderer` to populate
+  the ``rgb`` output buffer when both ``rgb`` and ``rgba`` are requested.
+  Without the fix, ``CameraData.output["rgb"]`` was left zero-filled because
+  the renderer skipped ``rgb`` in :meth:`read_output` after ``rgb`` and
+  ``rgba`` became independent ``wp.array`` allocations.
+
+
+Changed
+^^^^^^^
+
+* Changed :class:`~isaaclab_newton.renderers.NewtonWarpRenderer` to consume
+  ``wp.array`` camera output buffers and camera state arrays from
+  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
+  ``camera.data.output`` entries if Torch tensor operations are required.
+* Updated Newton PVA debug visualization to convert camera-convention
+  orientation outputs with :func:`warp.to_torch`.
diff --git a/source/isaaclab_ov/changelog.d/replace-torch-with-warp.minor.rst b/source/isaaclab_ov/changelog.d/replace-torch-with-warp.minor.rst
new file mode 100644
index 000000000000..f25c45b55c20
--- /dev/null
+++ b/source/isaaclab_ov/changelog.d/replace-torch-with-warp.minor.rst
@@ -0,0 +1,11 @@
+Changed
+^^^^^^^
+
+* Changed :class:`~isaaclab_ov.renderers.OVRTXRenderer` to consume ``wp.array``
+  camera output buffers and camera state arrays from
+  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
+  ``camera.data.output`` entries if Torch tensor operations are required.
+* Changed :class:`~isaaclab_ov.renderers.OVRTXRenderer` to consume ``wp.array``
+  camera output buffers and camera state arrays from
+  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
+  ``camera.data.output`` entries if Torch tensor operations are required.
diff --git a/source/isaaclab_ov/docs/CHANGELOG.rst b/source/isaaclab_ov/docs/CHANGELOG.rst
index b020cdf918dc..e8afeda30bda 100644
--- a/source/isaaclab_ov/docs/CHANGELOG.rst
+++ b/source/isaaclab_ov/docs/CHANGELOG.rst
@@ -1,22 +1,6 @@
 Changelog
 ---------
 
-0.1.3 (2026-04-30)
-~~~~~~~~~~~~~~~~~~
-
-Changed
-^^^^^^^
-
-* Changed :class:`~isaaclab_ov.renderers.OVRTXRenderer` to consume ``wp.array``
-  camera output buffers and camera state arrays from
-  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
-  ``camera.data.output`` entries if Torch tensor operations are required.
-* Changed :class:`~isaaclab_ov.renderers.OVRTXRenderer` to consume ``wp.array``
-  camera output buffers and camera state arrays from
-  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
-  ``camera.data.output`` entries if Torch tensor operations are required.
-
-
 0.1.3 (2026-04-30)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_physx/changelog.d/replace-torch-with-warp.minor.rst b/source/isaaclab_physx/changelog.d/replace-torch-with-warp.minor.rst
new file mode 100644
index 000000000000..1135b6b422d1
--- /dev/null
+++ b/source/isaaclab_physx/changelog.d/replace-torch-with-warp.minor.rst
@@ -0,0 +1,9 @@
+Changed
+^^^^^^^
+
+* Changed :class:`~isaaclab_physx.renderers.IsaacRtxRenderer` to consume
+  ``wp.array`` camera output buffers and camera state arrays from
+  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
+  ``camera.data.output`` entries if Torch tensor operations are required.
+* Updated PhysX PVA debug visualization to convert camera-convention
+  orientation outputs with :func:`warp.to_torch`.
diff --git a/source/isaaclab_physx/docs/CHANGELOG.rst b/source/isaaclab_physx/docs/CHANGELOG.rst
index 7f37919aaa72..14425eb74869 100644
--- a/source/isaaclab_physx/docs/CHANGELOG.rst
+++ b/source/isaaclab_physx/docs/CHANGELOG.rst
@@ -7,12 +7,6 @@ Changelog
 Changed
 ^^^^^^^
 
-* Changed :class:`~isaaclab_physx.renderers.IsaacRtxRenderer` to consume
-  ``wp.array`` camera output buffers and camera state arrays from
-  :class:`~isaaclab.renderers.BaseRenderer`. Use :func:`warp.to_torch` on
-  ``camera.data.output`` entries if Torch tensor operations are required.
-* Updated PhysX PVA debug visualization to convert camera-convention
-  orientation outputs with :func:`warp.to_torch`.
 * Added fused :meth:`~isaaclab_physx.assets.Articulation.write_joint_state_to_sim_index`
   that writes joint position and velocity in a single kernel launch instead of two.
 * Cached ``.view(wp.float32)`` results in root pose/velocity writers and wrench
diff --git a/source/isaaclab_tasks/changelog.d/replace-torch-with-warp.minor.rst b/source/isaaclab_tasks/changelog.d/replace-torch-with-warp.minor.rst
new file mode 100644
index 000000000000..f2e21f59ed63
--- /dev/null
+++ b/source/isaaclab_tasks/changelog.d/replace-torch-with-warp.minor.rst
@@ -0,0 +1,5 @@
+Changed
+^^^^^^^
+
+* Updated cartpole camera environments to consume ``wp.array`` camera outputs
+  via :func:`warp.to_torch` before applying Torch observation processing.
\ No newline at end of file
diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst
index c9b61dd07237..2044f807afc5 100644
--- a/source/isaaclab_tasks/docs/CHANGELOG.rst
+++ b/source/isaaclab_tasks/docs/CHANGELOG.rst
@@ -22,14 +22,8 @@ Fixed
 1.5.33 (2026-04-30)
 ~~~~~~~~~~~~~~~~~~~
 
-Fixed
-^^^^^
-
-* Fixed dexsuite ``vision_camera`` observation and the Franka stack
-  ``stack_ik_rel_blueprint`` ``image()`` helper to lift ``wp.array`` camera
-  outputs and intrinsics to torch tensors via :func:`warp.to_torch` before
-  applying Torch tensor operations.
-
+Changed
+^^^^^^^
 
 * Re-enabled ``add_base_mass`` randomization on H1 and Cassie in their
   rough-terrain configs (previously ``= None`` per the pre-existing biped
@@ -40,9 +34,6 @@ Fixed
   lighter pelvis mass; ``(1.0, 1.25)`` recovers to 90% of the
   mass-rand-disabled baseline while retaining the domain-randomization
   benefit.
-* Updated cartpole camera environments to consume ``wp.array`` camera outputs
-  via :func:`warp.to_torch` before applying Torch observation processing.
-
 
 
 1.5.32 (2026-04-30)

From 7165ad4290f055aa0bf00d3efcba14d4540fe06a Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 10:52:24 -0700
Subject: [PATCH 06/22] Formatting Fix

---
 source/isaaclab/isaaclab/sensors/camera/camera.py            | 5 ++++-
 .../changelog.d/replace-torch-with-warp.minor.rst            | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index ff0b06cb69cf..bf7edd764c80 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -618,7 +618,10 @@ def _update_poses(self, env_ids: Sequence[int] | wp.array):
         # notify renderer of updated poses (guarded in case called before initialization completes)
         if self._render_data is not None:
             self._renderer.update_camera(
-                self._render_data, self._data.pos_w.view(wp.vec3f), self._data.quat_w_world.view(wp.quatf), self._data.intrinsic_matrices
+                self._render_data,
+                self._data.pos_w.view(wp.vec3f),
+                self._data.quat_w_world.view(wp.quatf),
+                self._data.intrinsic_matrices,
             )
 
     """
diff --git a/source/isaaclab_tasks/changelog.d/replace-torch-with-warp.minor.rst b/source/isaaclab_tasks/changelog.d/replace-torch-with-warp.minor.rst
index f2e21f59ed63..c654219e3143 100644
--- a/source/isaaclab_tasks/changelog.d/replace-torch-with-warp.minor.rst
+++ b/source/isaaclab_tasks/changelog.d/replace-torch-with-warp.minor.rst
@@ -2,4 +2,4 @@ Changed
 ^^^^^^^
 
 * Updated cartpole camera environments to consume ``wp.array`` camera outputs
-  via :func:`warp.to_torch` before applying Torch observation processing.
\ No newline at end of file
+  via :func:`warp.to_torch` before applying Torch observation processing.

From 972e885498e110ca1842c1713ba687d3c3bff847 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 11:35:56 -0700
Subject: [PATCH 07/22] Test Fix

---
 source/isaaclab/test/utils/test_math.py | 34 ++++++++-----------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/source/isaaclab/test/utils/test_math.py b/source/isaaclab/test/utils/test_math.py
index e6c262aa6c51..31f78780633d 100644
--- a/source/isaaclab/test/utils/test_math.py
+++ b/source/isaaclab/test/utils/test_math.py
@@ -3,10 +3,7 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-"""Launch Isaac Sim Simulator first.
-
-This is only needed because of warp dependency.
-"""
+"""Launch Isaac Sim Simulator first."""
 
 from isaaclab.app import AppLauncher
 
@@ -24,7 +21,6 @@
 import scipy.spatial.transform as scipy_tf
 import torch
 import torch.utils.benchmark as benchmark
-import warp as wp
 
 import isaaclab.utils.math as math_utils
 
@@ -334,51 +330,41 @@ def test_convention_converter(device):
 
     # from ROS
     torch.testing.assert_close(
-        wp.to_torch(math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_ros), "ros", "opengl")),
+        math_utils.convert_camera_frame_orientation_convention(quat_ros, "ros", "opengl"),
         quat_opengl,
     )
     torch.testing.assert_close(
-        wp.to_torch(math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_ros), "ros", "world")),
+        math_utils.convert_camera_frame_orientation_convention(quat_ros, "ros", "world"),
         quat_world,
     )
     torch.testing.assert_close(
-        wp.to_torch(math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_ros), "ros", "ros")),
+        math_utils.convert_camera_frame_orientation_convention(quat_ros, "ros", "ros"),
         quat_ros,
     )
     # from OpenGL
     torch.testing.assert_close(
-        wp.to_torch(
-            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_opengl), "opengl", "ros")
-        ),
+        math_utils.convert_camera_frame_orientation_convention(quat_opengl, "opengl", "ros"),
         quat_ros,
     )
     torch.testing.assert_close(
-        wp.to_torch(
-            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_opengl), "opengl", "world")
-        ),
+        math_utils.convert_camera_frame_orientation_convention(quat_opengl, "opengl", "world"),
         quat_world,
     )
     torch.testing.assert_close(
-        wp.to_torch(
-            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_opengl), "opengl", "opengl")
-        ),
+        math_utils.convert_camera_frame_orientation_convention(quat_opengl, "opengl", "opengl"),
         quat_opengl,
     )
     # from World
     torch.testing.assert_close(
-        wp.to_torch(math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_world), "world", "ros")),
+        math_utils.convert_camera_frame_orientation_convention(quat_world, "world", "ros"),
         quat_ros,
     )
     torch.testing.assert_close(
-        wp.to_torch(
-            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_world), "world", "opengl")
-        ),
+        math_utils.convert_camera_frame_orientation_convention(quat_world, "world", "opengl"),
         quat_opengl,
     )
     torch.testing.assert_close(
-        wp.to_torch(
-            math_utils.convert_camera_frame_orientation_convention(wp.from_torch(quat_world), "world", "world")
-        ),
+        math_utils.convert_camera_frame_orientation_convention(quat_world, "world", "world"),
         quat_world,
     )
 

From 5ba2c7ceaf7ae0373a08ae5bbf079c8178eb462d Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 13:21:01 -0700
Subject: [PATCH 08/22] Fixes

---
 scripts/benchmarks/benchmark_cameras.py                | 5 +++--
 scripts/tutorials/04_sensors/run_usd_camera.py         | 9 +++++----
 source/isaaclab/isaaclab/sensors/camera/camera_data.py | 9 +++++++--
 source/isaaclab_tasks/test/rendering_test_utils.py     | 5 ++++-
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/scripts/benchmarks/benchmark_cameras.py b/scripts/benchmarks/benchmark_cameras.py
index ac34e6445645..6fc71e399171 100644
--- a/scripts/benchmarks/benchmark_cameras.py
+++ b/scripts/benchmarks/benchmark_cameras.py
@@ -269,6 +269,7 @@
     patterns,
 )
 from isaaclab.test.benchmark import BaseIsaacLabBenchmark, DictMeasurement, SingleMeasurement
+from isaaclab.utils.array import convert_to_torch
 from isaaclab.utils.math import orthogonalize_perspective_depth, unproject_depth
 
 from isaaclab_tasks.utils import load_cfg_from_registry
@@ -676,8 +677,8 @@ def run_simulator(
                     # Only update the camera if it hasn't been updated as part of scene_entities.update ...
                     camera.update(dt=sim.get_physics_dt())
 
-                # camera outputs and intrinsics are wp.array; lift to torch for math + collection
-                intrinsics_torch = wp.to_torch(camera.data.intrinsic_matrices)
+                # Camera outputs are raw wp.array, while migrated metadata accessors expose .torch.
+                intrinsics_torch = convert_to_torch(camera.data.intrinsic_matrices)
 
                 for data_type in data_types:
                     data_label = f"{label}_{cam_idx}_{data_type}"
diff --git a/scripts/tutorials/04_sensors/run_usd_camera.py b/scripts/tutorials/04_sensors/run_usd_camera.py
index c9b9b3be60d8..ea6ff3004295 100644
--- a/scripts/tutorials/04_sensors/run_usd_camera.py
+++ b/scripts/tutorials/04_sensors/run_usd_camera.py
@@ -77,6 +77,7 @@
 from isaaclab.sensors.camera import Camera, CameraCfg
 from isaaclab.sensors.camera.utils import create_pointcloud_from_depth
 from isaaclab.utils import convert_dict_to_backend
+from isaaclab.utils.array import convert_to_torch
 
 
 def define_sensor() -> Camera:
@@ -255,12 +256,12 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
             and args_cli.draw
             and "distance_to_image_plane" in camera.data.output.keys()
         ):
-            # Derive pointcloud from camera at camera_index; lift wp.array fields to torch
+            # Derive pointcloud from camera at camera_index; lift buffers to torch.
             pointcloud = create_pointcloud_from_depth(
-                intrinsic_matrix=wp.to_torch(camera.data.intrinsic_matrices)[camera_index],
+                intrinsic_matrix=convert_to_torch(camera.data.intrinsic_matrices)[camera_index],
                 depth=wp.to_torch(camera.data.output["distance_to_image_plane"])[camera_index],
-                position=wp.to_torch(camera.data.pos_w)[camera_index],
-                orientation=wp.to_torch(camera.data.quat_w_ros)[camera_index],
+                position=convert_to_torch(camera.data.pos_w)[camera_index],
+                orientation=convert_to_torch(camera.data.quat_w_ros)[camera_index],
                 device=sim.device,
             )
 
diff --git a/source/isaaclab/isaaclab/sensors/camera/camera_data.py b/source/isaaclab/isaaclab/sensors/camera/camera_data.py
index 62f436175d2b..6d0b3b6f6e37 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera_data.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera_data.py
@@ -12,6 +12,7 @@
 
 # Re-exported as part of the public isaaclab.sensors.camera API
 from isaaclab.renderers.output_contract import RenderBufferKind, RenderBufferSpec
+from isaaclab.utils.array import convert_to_torch
 from isaaclab.utils.math import convert_camera_frame_orientation_convention
 
 __all__ = ["CameraData", "RenderBufferKind", "RenderBufferSpec"]
@@ -159,7 +160,9 @@ def quat_w_ros(self) -> wp.array:
         Shape is (N, 4) where N is the number of sensors.
         """
         return wp.from_torch(
-            convert_camera_frame_orientation_convention(wp.to_torch(self.quat_w_world), origin="world", target="ros")
+            convert_camera_frame_orientation_convention(
+                convert_to_torch(self.quat_w_world), origin="world", target="ros"
+            )
         )
 
     @property
@@ -173,5 +176,7 @@ def quat_w_opengl(self) -> wp.array:
         Shape is (N, 4) where N is the number of sensors.
         """
         return wp.from_torch(
-            convert_camera_frame_orientation_convention(wp.to_torch(self.quat_w_world), origin="world", target="opengl")
+            convert_camera_frame_orientation_convention(
+                convert_to_torch(self.quat_w_world), origin="world", target="opengl"
+            )
         )
diff --git a/source/isaaclab_tasks/test/rendering_test_utils.py b/source/isaaclab_tasks/test/rendering_test_utils.py
index c6c797fc937d..a768b4f7115c 100644
--- a/source/isaaclab_tasks/test/rendering_test_utils.py
+++ b/source/isaaclab_tasks/test/rendering_test_utils.py
@@ -14,6 +14,8 @@
 import torch
 from PIL import Image, ImageChops
 
+from isaaclab.utils.array import convert_to_torch
+
 # Directory containing golden images.
 _GOLDEN_IMAGES_DIRECTORY = os.path.join(os.path.dirname(os.path.abspath(__file__)), "golden_images")
 
@@ -573,7 +575,7 @@ def validate_camera_outputs(
     test_name: str,
     physics_backend: str,
     renderer: str,
-    camera_outputs: dict[str, torch.Tensor],
+    camera_outputs: dict[str, Any],
     max_different_pixels_percentage: float,
     comparison_scores: list[dict],
 ) -> None:
@@ -587,6 +589,7 @@ def validate_camera_outputs(
     failed_data_types = {}
 
     for data_type, tensor in camera_outputs.items():
+        tensor = convert_to_torch(tensor)
         condition = torch.logical_or(torch.isinf(tensor), torch.isnan(tensor))
         corrected = torch.where(condition, torch.zeros_like(tensor), tensor)
         max_val = corrected.max()

From 45ee9e4a0d60a890af1516e41d91ea24dc4eb6cd Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 14:03:56 -0700
Subject: [PATCH 09/22] Fixes

---
 .../sensors/tacsl_sensor/visuotactile_sensor.py              | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py b/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
index dab39d44c2db..f2aa6036be2e 100644
--- a/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
+++ b/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
@@ -23,6 +23,7 @@
 from isaaclab.sensors.camera import Camera
 from isaaclab.sensors.sensor_base import SensorBase
 from isaaclab.sim import SimulationContext
+from isaaclab.utils.array import convert_to_torch
 from isaaclab.utils.math import quat_apply, quat_inv
 
 from .visuotactile_render import GelsightRender
@@ -227,7 +228,7 @@ def get_initial_render(self) -> dict | None:
         # Store the initial nominal tactile data
         self._nominal_tactile = dict()
         for key, value in initial_render.items():
-            self._nominal_tactile[key] = value.clone()
+            self._nominal_tactile[key] = convert_to_torch(value).clone()
 
         return self._nominal_tactile
 
@@ -583,7 +584,7 @@ def _update_camera_tactile(self, env_ids: Sequence[int] | slice):
             depth_key = "depth"
 
         if depth_key:
-            self._data.tactile_depth_image[env_ids] = wp.to_torch(camera_data.output[depth_key])[env_ids].clone()
+            self._data.tactile_depth_image[env_ids] = convert_to_torch(camera_data.output[depth_key])[env_ids].clone()
             diff = self._nominal_tactile[depth_key][env_ids] - self._data.tactile_depth_image[env_ids]
             self._data.tactile_rgb_image[env_ids] = self._tactile_rgb_render.render(diff.squeeze(-1))
 

From 3bd488661e0acdaf2338784f2a541d77eba11226 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 14:52:45 -0700
Subject: [PATCH 10/22] Fixes

---
 .../isaaclab/sensors/camera/camera.py         | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index bf7edd764c80..0c74fffa054d 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -312,25 +312,25 @@ def set_world_poses(
         # convert to backend warp array
         if positions is not None:
             if isinstance(positions, torch.Tensor):
-                positions = wp.from_torch(positions, dtype=wp.vec3, device=self._device)
+                positions = wp.from_torch(positions.to(device=self._device).contiguous(), dtype=wp.vec3)
             elif not isinstance(positions, wp.array):
                 positions = wp.array(positions, dtype=wp.vec3, device=self._device)
 
         # convert rotation matrix from input convention to OpenGL
         if orientations is not None:
             if isinstance(orientations, torch.Tensor):
-                orientations = wp.from_torch(orientations, dtype=wp.quat, device=self._device)
+                orientations = wp.from_torch(orientations.to(device=self._device).contiguous(), dtype=wp.quat)
             elif not isinstance(orientations, wp.array):
                 orientations = wp.array(orientations, dtype=wp.quat, device=self._device)
             orientations = wp.from_torch(
                 convert_camera_frame_orientation_convention(
                     wp.to_torch(orientations), origin=convention, target="opengl"
-                )
+                ).contiguous()
             )
 
         if env_ids is not None:
             if isinstance(env_ids, torch.Tensor):
-                env_ids = wp.from_torch(env_ids, dtype=wp.int32, device=self._device)
+                env_ids = wp.from_torch(env_ids.to(device=self._device, dtype=torch.int32).contiguous(), dtype=wp.int32)
             elif not isinstance(env_ids, wp.array):
                 env_ids = wp.array(env_ids, dtype=wp.int32, device=self._device)
 
@@ -356,12 +356,24 @@ def set_world_poses_from_view(
         # get up axis of current stage
         up_axis = UsdGeom.GetStageUpAxis(self.stage)
         # set camera poses using the view
+        if isinstance(eyes, torch.Tensor):
+            eyes_torch = eyes.to(device=self._device)
+            eyes = wp.from_torch(eyes_torch.contiguous(), dtype=wp.vec3)
+        else:
+            eyes_torch = wp.to_torch(eyes).to(device=self._device)
+
+        if isinstance(targets, torch.Tensor):
+            targets_torch = targets.to(device=self._device)
+        else:
+            targets_torch = wp.to_torch(targets).to(device=self._device)
+
         orientations = quat_from_matrix(
-            create_rotation_matrix_from_view(wp.to_torch(eyes), wp.to_torch(targets), up_axis, device=self._device)
+            create_rotation_matrix_from_view(eyes_torch, targets_torch, up_axis, device=self._device)
         )
+        orientations = wp.from_torch(orientations.contiguous(), dtype=wp.quat)
 
         if isinstance(env_ids, torch.Tensor):
-            env_ids = wp.from_torch(env_ids, dtype=wp.int32, device=self._device)
+            env_ids = wp.from_torch(env_ids.to(device=self._device, dtype=torch.int32).contiguous(), dtype=wp.int32)
         elif not isinstance(env_ids, wp.array):
             env_ids = wp.array(env_ids, dtype=wp.int32, device=self._device)
 

From e32bd5278695d047d173b75bc5e17a0ac7d718e1 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 15:05:52 -0700
Subject: [PATCH 11/22] Formatting

---
 source/isaaclab/isaaclab/sensors/camera/camera.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index 0c74fffa054d..edccccd16c5d 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -549,6 +549,7 @@ def _create_buffers(self):
                 type(self._renderer).__name__,
                 unsupported,
             )
+
         self._data = CameraData.allocate(
             data_types=known,
             height=self.cfg.height,

From d21fc52fb429df987bf81f04cfaa46a291cc0813 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 16:41:41 -0700
Subject: [PATCH 12/22] Formatting

---
 source/isaaclab/isaaclab/sensors/camera/camera.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index edccccd16c5d..0340251b38b5 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -400,7 +400,7 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None
         self._update_poses(env_ids)
         # Reset the frame count
         frame_torch = wp.to_torch(self._frame)
-        frame_torch[env_ids] = 0
+        frame_torch[wp.to_torch(env_ids)] = 0
 
     """
     Implementation.
@@ -485,7 +485,7 @@ def _update_buffers_impl(self, env_mask: wp.array):
 
         # Increment frame count
         frame_torch = wp.to_torch(self._frame)
-        frame_torch[env_ids] += 1
+        frame_torch[wp.to_torch(env_ids)] += 1
 
         # update latest camera pose if requested
         if self.cfg.update_latest_camera_pose:

From d91b9e1fc1ddf9b541d78a87ad8de4fd5e329011 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 16:49:53 -0700
Subject: [PATCH 13/22] Fixes

---
 source/isaaclab/isaaclab/sensors/camera/camera.py      | 2 +-
 source/isaaclab/test/sensors/test_ray_caster_camera.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index 0340251b38b5..74826ab971f2 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -235,7 +235,7 @@ def set_intrinsic_matrices(
         """
         # resolve env_ids
         if env_ids is None:
-            env_ids = self._ALL_INDICES
+            env_ids = self._ALL_INDICES.numpy()
         elif isinstance(env_ids, wp.array):
             env_ids = env_ids.numpy()
 
diff --git a/source/isaaclab/test/sensors/test_ray_caster_camera.py b/source/isaaclab/test/sensors/test_ray_caster_camera.py
index a913d38dd833..f4a58f5a932f 100644
--- a/source/isaaclab/test/sensors/test_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_ray_caster_camera.py
@@ -290,8 +290,8 @@ def test_camera_init_offset(setup_sim):
 
     # check if transform correctly set in output
     np.testing.assert_allclose(camera_ros.data.pos_w[0].cpu().numpy(), cam_cfg_offset_ros.offset.pos, rtol=1e-5)
-    np.testing.assert_allclose(camera_ros.data.quat_w_ros[0].cpu().numpy(), QUAT_ROS, rtol=1e-5)
-    np.testing.assert_allclose(camera_ros.data.quat_w_opengl[0].cpu().numpy(), QUAT_OPENGL, rtol=1e-5)
+    np.testing.assert_allclose(camera_ros.data.quat_w_ros[0].numpy(), QUAT_ROS, rtol=1e-5)
+    np.testing.assert_allclose(camera_ros.data.quat_w_opengl[0].numpy(), QUAT_OPENGL, rtol=1e-5)
     np.testing.assert_allclose(camera_ros.data.quat_w_world[0].cpu().numpy(), QUAT_WORLD, rtol=1e-5)
 
 

From 7556caba317d1afe63e7f9b26387af94aa24ac18 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Mon, 4 May 2026 18:03:12 -0700
Subject: [PATCH 14/22] Fixes

---
 source/isaaclab/isaaclab/sensors/camera/camera.py               | 2 +-
 .../isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index 74826ab971f2..caa28016e692 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -485,7 +485,7 @@ def _update_buffers_impl(self, env_mask: wp.array):
 
         # Increment frame count
         frame_torch = wp.to_torch(self._frame)
-        frame_torch[wp.to_torch(env_ids)] += 1
+        frame_torch[env_ids] += 1
 
         # update latest camera pose if requested
         if self.cfg.update_latest_camera_pose:
diff --git a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
index 8657c938c691..42749200e4ef 100644
--- a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
@@ -296,7 +296,7 @@ def test_camera_set_world_poses_from_view(setup_simulation):
 
     # check if transform correctly set in output
     torch.testing.assert_close(camera.data.pos_w, eyes)
-    torch.testing.assert_close(camera.data.quat_w_ros, quat_ros_gt)
+    torch.testing.assert_close(wp.to_torch(camera.data.quat_w_ros), quat_ros_gt)
 
     del camera
 

From b03830e63d39ad7a999f8512c9b1e44db2e04124 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Tue, 5 May 2026 11:23:59 -0700
Subject: [PATCH 15/22] Formatting

---
 .../isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
index 42749200e4ef..6e77a642c607 100644
--- a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
@@ -20,6 +20,7 @@
 import numpy as np
 import pytest
 import torch
+import warp as wp
 
 import omni.replicator.core as rep
 from pxr import Gf

From 0a5bf8d9d1e549687834680c7ede35ec168f7782 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Tue, 5 May 2026 13:23:07 -0700
Subject: [PATCH 16/22] test fixes

---
 source/isaaclab/test/sensors/test_camera.py   |  82 ++++-------
 .../test_multi_mesh_ray_caster_camera.py      |  56 ++++----
 .../test/sensors/test_ray_caster_camera.py    | 131 ++++++++----------
 .../sensors/test_ray_caster_integration.py    |  16 ++-
 .../test/test_shadow_hand_vision_presets.py   |   2 +
 5 files changed, 127 insertions(+), 160 deletions(-)

diff --git a/source/isaaclab/test/sensors/test_camera.py b/source/isaaclab/test/sensors/test_camera.py
index d5375c750740..fcde09393c46 100644
--- a/source/isaaclab/test/sensors/test_camera.py
+++ b/source/isaaclab/test/sensors/test_camera.py
@@ -447,68 +447,36 @@ def test_depth_clipping(setup_sim_camera):
     camera_none.update(dt)
     camera_max.update(dt)
 
+    none_d2c = wp.to_torch(camera_none.data.output["distance_to_camera"])
+    none_d2ip = wp.to_torch(camera_none.data.output["distance_to_image_plane"])
+    zero_d2c = wp.to_torch(camera_zero.data.output["distance_to_camera"])
+    zero_d2ip = wp.to_torch(camera_zero.data.output["distance_to_image_plane"])
+    max_d2c = wp.to_torch(camera_max.data.output["distance_to_camera"])
+    max_d2ip = wp.to_torch(camera_max.data.output["distance_to_image_plane"])
+
     # none clipping should contain inf values
-    assert torch.isinf(camera_none.data.output["distance_to_camera"]).any()
-    assert torch.isinf(camera_none.data.output["distance_to_image_plane"]).any()
-    assert (
-        camera_none.data.output["distance_to_camera"][~torch.isinf(camera_none.data.output["distance_to_camera"])].min()
-        >= camera_cfg_zero.spawn.clipping_range[0]
-    )
-    assert (
-        camera_none.data.output["distance_to_camera"][~torch.isinf(camera_none.data.output["distance_to_camera"])].max()
-        <= camera_cfg_zero.spawn.clipping_range[1]
-    )
-    assert (
-        camera_none.data.output["distance_to_image_plane"][
-            ~torch.isinf(camera_none.data.output["distance_to_image_plane"])
-        ].min()
-        >= camera_cfg_zero.spawn.clipping_range[0]
-    )
-    assert (
-        camera_none.data.output["distance_to_image_plane"][
-            ~torch.isinf(camera_none.data.output["distance_to_camera"])
-        ].max()
-        <= camera_cfg_zero.spawn.clipping_range[1]
-    )
+    assert torch.isinf(none_d2c).any()
+    assert torch.isinf(none_d2ip).any()
+    assert none_d2c[~torch.isinf(none_d2c)].min() >= camera_cfg_zero.spawn.clipping_range[0]
+    assert none_d2c[~torch.isinf(none_d2c)].max() <= camera_cfg_zero.spawn.clipping_range[1]
+    assert none_d2ip[~torch.isinf(none_d2ip)].min() >= camera_cfg_zero.spawn.clipping_range[0]
+    assert none_d2ip[~torch.isinf(none_d2c)].max() <= camera_cfg_zero.spawn.clipping_range[1]
 
     # zero clipping should result in zero values
-    assert torch.all(
-        camera_zero.data.output["distance_to_camera"][torch.isinf(camera_none.data.output["distance_to_camera"])] == 0.0
-    )
-    assert torch.all(
-        camera_zero.data.output["distance_to_image_plane"][
-            torch.isinf(camera_none.data.output["distance_to_image_plane"])
-        ]
-        == 0.0
-    )
-    assert (
-        camera_zero.data.output["distance_to_camera"][camera_zero.data.output["distance_to_camera"] != 0.0].min()
-        >= camera_cfg_zero.spawn.clipping_range[0]
-    )
-    assert camera_zero.data.output["distance_to_camera"].max() <= camera_cfg_zero.spawn.clipping_range[1]
-    assert (
-        camera_zero.data.output["distance_to_image_plane"][
-            camera_zero.data.output["distance_to_image_plane"] != 0.0
-        ].min()
-        >= camera_cfg_zero.spawn.clipping_range[0]
-    )
-    assert camera_zero.data.output["distance_to_image_plane"].max() <= camera_cfg_zero.spawn.clipping_range[1]
+    assert torch.all(zero_d2c[torch.isinf(none_d2c)] == 0.0)
+    assert torch.all(zero_d2ip[torch.isinf(none_d2ip)] == 0.0)
+    assert zero_d2c[zero_d2c != 0.0].min() >= camera_cfg_zero.spawn.clipping_range[0]
+    assert zero_d2c.max() <= camera_cfg_zero.spawn.clipping_range[1]
+    assert zero_d2ip[zero_d2ip != 0.0].min() >= camera_cfg_zero.spawn.clipping_range[0]
+    assert zero_d2ip.max() <= camera_cfg_zero.spawn.clipping_range[1]
 
     # max clipping should result in max values
-    assert torch.all(
-        camera_max.data.output["distance_to_camera"][torch.isinf(camera_none.data.output["distance_to_camera"])]
-        == camera_cfg_zero.spawn.clipping_range[1]
-    )
-    assert torch.all(
-        camera_max.data.output["distance_to_image_plane"][
-            torch.isinf(camera_none.data.output["distance_to_image_plane"])
-        ]
-        == camera_cfg_zero.spawn.clipping_range[1]
-    )
-    assert camera_max.data.output["distance_to_camera"].min() >= camera_cfg_zero.spawn.clipping_range[0]
-    assert camera_max.data.output["distance_to_camera"].max() <= camera_cfg_zero.spawn.clipping_range[1]
-    assert camera_max.data.output["distance_to_image_plane"].min() >= camera_cfg_zero.spawn.clipping_range[0]
-    assert camera_max.data.output["distance_to_image_plane"].max() <= camera_cfg_zero.spawn.clipping_range[1]
+    assert torch.all(max_d2c[torch.isinf(none_d2c)] == camera_cfg_zero.spawn.clipping_range[1])
+    assert torch.all(max_d2ip[torch.isinf(none_d2ip)] == camera_cfg_zero.spawn.clipping_range[1])
+    assert max_d2c.min() >= camera_cfg_zero.spawn.clipping_range[0]
+    assert max_d2c.max() <= camera_cfg_zero.spawn.clipping_range[1]
+    assert max_d2ip.min() >= camera_cfg_zero.spawn.clipping_range[0]
+    assert max_d2ip.max() <= camera_cfg_zero.spawn.clipping_range[1]
 
 
 def test_camera_resolution_all_colorize(setup_sim_camera):
diff --git a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
index 6e77a642c607..9914a0d86663 100644
--- a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
@@ -39,6 +39,10 @@
 QUAT_WORLD = [-0.27984815, -0.1159169, 0.88047623, -0.3647052]
 
 
+def _camera_output_to_torch(camera, data_type: str) -> torch.Tensor:
+    return wp.to_torch(camera.data.output[data_type])
+
+
 @pytest.fixture(scope="function")
 def setup_simulation():
     """Fixture to set up and tear down the simulation environment."""
@@ -208,8 +212,8 @@ def test_camera_init_intrinsic_matrix(setup_simulation):
 
     # check image data
     torch.testing.assert_close(
-        camera_1.data.output["distance_to_image_plane"],
-        camera_2.data.output["distance_to_image_plane"],
+        _camera_output_to_torch(camera_1, "distance_to_image_plane"),
+        _camera_output_to_torch(camera_2, "distance_to_image_plane"),
     )
     # check that both intrinsic matrices are the same
     torch.testing.assert_close(
@@ -410,25 +414,27 @@ def test_output_equal_to_usdcamera(setup_simulation, data_types):
     # check image data
     for data_type in data_types:
         if data_type in camera_usd.data.output and data_type in camera_warp.data.output:
+            usd_output = _camera_output_to_torch(camera_usd, data_type)
+            warp_output = _camera_output_to_torch(camera_warp, data_type)
             if data_type == "distance_to_camera" or data_type == "distance_to_image_plane":
                 torch.testing.assert_close(
-                    camera_usd.data.output[data_type],
-                    camera_warp.data.output[data_type],
+                    usd_output,
+                    warp_output,
                     atol=5e-5,
                     rtol=5e-6,
                 )
             elif data_type == "normals":
                 # NOTE: floating point issues of ~1e-5, so using atol and rtol in this case
                 torch.testing.assert_close(
-                    camera_usd.data.output[data_type][..., :3],
-                    camera_warp.data.output[data_type],
+                    usd_output[..., :3],
+                    warp_output,
                     rtol=1e-5,
                     atol=1e-4,
                 )
             else:
                 torch.testing.assert_close(
-                    camera_usd.data.output[data_type],
-                    camera_warp.data.output[data_type],
+                    usd_output,
+                    warp_output,
                 )
 
     del camera_usd, camera_warp
@@ -486,14 +492,14 @@ def test_output_equal_to_usdcamera_offset(setup_simulation):
 
     # check image data
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_image_plane"],
-        camera_warp.data.output["distance_to_image_plane"],
+        _camera_output_to_torch(camera_usd, "distance_to_image_plane"),
+        _camera_output_to_torch(camera_warp, "distance_to_image_plane"),
         atol=5e-5,
         rtol=5e-6,
     )
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_camera"],
-        camera_warp.data.output["distance_to_camera"],
+        _camera_output_to_torch(camera_usd, "distance_to_camera"),
+        _camera_output_to_torch(camera_warp, "distance_to_camera"),
         atol=5e-5,
         rtol=5e-6,
     )
@@ -501,8 +507,8 @@ def test_output_equal_to_usdcamera_offset(setup_simulation):
     # check normals
     # NOTE: floating point issues of ~1e-5, so using atol and rtol in this case
     torch.testing.assert_close(
-        camera_usd.data.output["normals"][..., :3],
-        camera_warp.data.output["normals"],
+        _camera_output_to_torch(camera_usd, "normals")[..., :3],
+        _camera_output_to_torch(camera_warp, "normals"),
         rtol=1e-5,
         atol=1e-4,
     )
@@ -582,14 +588,14 @@ def test_output_equal_to_usdcamera_prim_offset(setup_simulation):
 
     # check image data
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_image_plane"],
-        camera_warp.data.output["distance_to_image_plane"],
+        _camera_output_to_torch(camera_usd, "distance_to_image_plane"),
+        _camera_output_to_torch(camera_warp, "distance_to_image_plane"),
         atol=5e-5,
         rtol=5e-6,
     )
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_camera"],
-        camera_warp.data.output["distance_to_camera"],
+        _camera_output_to_torch(camera_usd, "distance_to_camera"),
+        _camera_output_to_torch(camera_warp, "distance_to_camera"),
         rtol=4e-6,
         atol=2e-5,
     )
@@ -597,8 +603,8 @@ def test_output_equal_to_usdcamera_prim_offset(setup_simulation):
     # check normals
     # NOTE: floating point issues of ~1e-5, so using atol and rtol in this case
     torch.testing.assert_close(
-        camera_usd.data.output["normals"][..., :3],
-        camera_warp.data.output["normals"],
+        _camera_output_to_torch(camera_usd, "normals")[..., :3],
+        _camera_output_to_torch(camera_warp, "normals"),
         rtol=1e-5,
         atol=1e-4,
     )
@@ -670,8 +676,8 @@ def test_output_equal_to_usd_camera_intrinsics(setup_simulation, height, width):
     camera_warp.update(dt)
 
     # filter nan and inf from output
-    cam_warp_output = camera_warp.data.output["distance_to_image_plane"].clone()
-    cam_usd_output = camera_usd.data.output["distance_to_image_plane"].clone()
+    cam_warp_output = _camera_output_to_torch(camera_warp, "distance_to_image_plane").clone()
+    cam_usd_output = _camera_output_to_torch(camera_usd, "distance_to_image_plane").clone()
     cam_warp_output[torch.isnan(cam_warp_output)] = 0
     cam_warp_output[torch.isinf(cam_warp_output)] = 0
     cam_usd_output[torch.isnan(cam_usd_output)] = 0
@@ -771,8 +777,8 @@ def test_output_equal_to_usd_camera_when_intrinsics_set(setup_simulation):
 
     # check image data
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_camera"],
-        camera_warp.data.output["distance_to_camera"],
+        _camera_output_to_torch(camera_usd, "distance_to_camera"),
+        _camera_output_to_torch(camera_warp, "distance_to_camera"),
         rtol=5e-3,
         atol=1e-4,
     )
@@ -802,7 +808,7 @@ def test_image_mesh_ids_identifies_hit_mesh(setup_simulation):
     # (the default), which leaves missed rays at the Warp-kernel fill value of inf.
     # Under "max" clipping, missed rays would be clamped to a finite max_distance, making
     # the inf comparison incorrect.
-    hit_mask = camera.data.output["distance_to_camera"][0, :, :, 0] < float("inf")
+    hit_mask = _camera_output_to_torch(camera, "distance_to_camera")[0, :, :, 0] < float("inf")
     assert hit_mask.any(), "Expected at least some rays to hit the ground plane"
 
     # All hits against the single registered mesh must carry mesh_id=0 (first mesh index).
diff --git a/source/isaaclab/test/sensors/test_ray_caster_camera.py b/source/isaaclab/test/sensors/test_ray_caster_camera.py
index f4a58f5a932f..755cb1f7b255 100644
--- a/source/isaaclab/test/sensors/test_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_ray_caster_camera.py
@@ -21,6 +21,7 @@
 import numpy as np
 import pytest
 import torch
+import warp as wp
 
 import omni.replicator.core as rep
 from pxr import Gf
@@ -41,6 +42,10 @@
 DEBUG_PLOTS = False
 
 
+def _camera_output_to_torch(camera, data_type: str) -> torch.Tensor:
+    return wp.to_torch(camera.data.output[data_type])
+
+
 def setup() -> tuple[sim_utils.SimulationContext, RayCasterCameraCfg, float]:
     # Create a blank new stage
     camera_pattern_cfg = patterns.PinholeCameraPatternCfg(
@@ -197,46 +202,30 @@ def test_depth_clipping(setup_sim):
     camera_none.update(dt)
     camera_max.update(dt)
 
+    none_d2c = _camera_output_to_torch(camera_none, "distance_to_camera")
+    none_d2ip = _camera_output_to_torch(camera_none, "distance_to_image_plane")
+    zero_d2c = _camera_output_to_torch(camera_zero, "distance_to_camera")
+    zero_d2ip = _camera_output_to_torch(camera_zero, "distance_to_image_plane")
+    max_d2c = _camera_output_to_torch(camera_max, "distance_to_camera")
+    max_d2ip = _camera_output_to_torch(camera_max, "distance_to_image_plane")
+
     # none clipping should contain inf values
-    assert torch.isinf(camera_none.data.output["distance_to_camera"]).any()
-    assert torch.isnan(camera_none.data.output["distance_to_image_plane"]).any()
-    assert (
-        camera_none.data.output["distance_to_camera"][~torch.isinf(camera_none.data.output["distance_to_camera"])].max()
-        > camera_cfg_zero.max_distance
-    )
-    assert (
-        camera_none.data.output["distance_to_image_plane"][
-            ~torch.isnan(camera_none.data.output["distance_to_image_plane"])
-        ].max()
-        > camera_cfg_zero.max_distance
-    )
+    assert torch.isinf(none_d2c).any()
+    assert torch.isnan(none_d2ip).any()
+    assert none_d2c[~torch.isinf(none_d2c)].max() > camera_cfg_zero.max_distance
+    assert none_d2ip[~torch.isnan(none_d2ip)].max() > camera_cfg_zero.max_distance
 
     # zero clipping should result in zero values
-    assert torch.all(
-        camera_zero.data.output["distance_to_camera"][torch.isinf(camera_none.data.output["distance_to_camera"])] == 0.0
-    )
-    assert torch.all(
-        camera_zero.data.output["distance_to_image_plane"][
-            torch.isnan(camera_none.data.output["distance_to_image_plane"])
-        ]
-        == 0.0
-    )
-    assert camera_zero.data.output["distance_to_camera"].max() <= camera_cfg_zero.max_distance
-    assert camera_zero.data.output["distance_to_image_plane"].max() <= camera_cfg_zero.max_distance
+    assert torch.all(zero_d2c[torch.isinf(none_d2c)] == 0.0)
+    assert torch.all(zero_d2ip[torch.isnan(none_d2ip)] == 0.0)
+    assert zero_d2c.max() <= camera_cfg_zero.max_distance
+    assert zero_d2ip.max() <= camera_cfg_zero.max_distance
 
     # max clipping should result in max values
-    assert torch.all(
-        camera_max.data.output["distance_to_camera"][torch.isinf(camera_none.data.output["distance_to_camera"])]
-        == camera_cfg_zero.max_distance
-    )
-    assert torch.all(
-        camera_max.data.output["distance_to_image_plane"][
-            torch.isnan(camera_none.data.output["distance_to_image_plane"])
-        ]
-        == camera_cfg_zero.max_distance
-    )
-    assert camera_max.data.output["distance_to_camera"].max() <= camera_cfg_zero.max_distance
-    assert camera_max.data.output["distance_to_image_plane"].max() <= camera_cfg_zero.max_distance
+    assert torch.all(max_d2c[torch.isinf(none_d2c)] == camera_cfg_zero.max_distance)
+    assert torch.all(max_d2ip[torch.isnan(none_d2ip)] == camera_cfg_zero.max_distance)
+    assert max_d2c.max() <= camera_cfg_zero.max_distance
+    assert max_d2ip.max() <= camera_cfg_zero.max_distance
 
 
 @pytest.mark.isaacsim_ci
@@ -337,8 +326,8 @@ def test_camera_init_intrinsic_matrix(setup_sim):
 
     # check image data
     torch.testing.assert_close(
-        camera_1.data.output["distance_to_image_plane"],
-        camera_2.data.output["distance_to_image_plane"],
+        _camera_output_to_torch(camera_1, "distance_to_image_plane"),
+        _camera_output_to_torch(camera_2, "distance_to_image_plane"),
     )
     # check that both intrinsic matrices are the same
     torch.testing.assert_close(
@@ -527,14 +516,14 @@ def test_output_equal_to_usdcamera(setup_sim):
 
     # check image data
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_image_plane"],
-        camera_warp.data.output["distance_to_image_plane"],
+        _camera_output_to_torch(camera_usd, "distance_to_image_plane"),
+        _camera_output_to_torch(camera_warp, "distance_to_image_plane"),
         rtol=1e-5,
         atol=1e-4,
     )
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_camera"],
-        camera_warp.data.output["distance_to_camera"],
+        _camera_output_to_torch(camera_usd, "distance_to_camera"),
+        _camera_output_to_torch(camera_warp, "distance_to_camera"),
         atol=5e-5,
         rtol=5e-6,
     )
@@ -542,8 +531,8 @@ def test_output_equal_to_usdcamera(setup_sim):
     # check normals
     # NOTE: floating point issues of ~1e-5, so using atol and rtol in this case
     torch.testing.assert_close(
-        camera_usd.data.output["normals"][..., :3],
-        camera_warp.data.output["normals"],
+        _camera_output_to_torch(camera_usd, "normals")[..., :3],
+        _camera_output_to_torch(camera_warp, "normals"),
         rtol=1e-5,
         atol=1e-4,
     )
@@ -603,14 +592,14 @@ def test_output_equal_to_usdcamera_offset(setup_sim):
 
     # check image data
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_image_plane"],
-        camera_warp.data.output["distance_to_image_plane"],
+        _camera_output_to_torch(camera_usd, "distance_to_image_plane"),
+        _camera_output_to_torch(camera_warp, "distance_to_image_plane"),
         rtol=1e-3,
         atol=1e-5,
     )
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_camera"],
-        camera_warp.data.output["distance_to_camera"],
+        _camera_output_to_torch(camera_usd, "distance_to_camera"),
+        _camera_output_to_torch(camera_warp, "distance_to_camera"),
         rtol=1e-3,
         atol=1e-5,
     )
@@ -618,8 +607,8 @@ def test_output_equal_to_usdcamera_offset(setup_sim):
     # check normals
     # NOTE: floating point issues of ~1e-5, so using atol and rtol in this case
     torch.testing.assert_close(
-        camera_usd.data.output["normals"][..., :3],
-        camera_warp.data.output["normals"],
+        _camera_output_to_torch(camera_usd, "normals")[..., :3],
+        _camera_output_to_torch(camera_warp, "normals"),
         rtol=1e-5,
         atol=1e-4,
     )
@@ -697,14 +686,14 @@ def test_output_equal_to_usdcamera_prim_offset(setup_sim):
 
     # check image data
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_image_plane"],
-        camera_warp.data.output["distance_to_image_plane"],
+        _camera_output_to_torch(camera_usd, "distance_to_image_plane"),
+        _camera_output_to_torch(camera_warp, "distance_to_image_plane"),
         rtol=1e-3,
         atol=1e-5,
     )
     torch.testing.assert_close(
-        camera_usd.data.output["distance_to_camera"],
-        camera_warp.data.output["distance_to_camera"],
+        _camera_output_to_torch(camera_usd, "distance_to_camera"),
+        _camera_output_to_torch(camera_warp, "distance_to_camera"),
         rtol=4e-6,
         atol=2e-5,
     )
@@ -712,8 +701,8 @@ def test_output_equal_to_usdcamera_prim_offset(setup_sim):
     # check normals
     # NOTE: floating point issues of ~1e-5, so using atol and rtol in this case
     torch.testing.assert_close(
-        camera_usd.data.output["normals"][..., :3],
-        camera_warp.data.output["normals"],
+        _camera_output_to_torch(camera_usd, "normals")[..., :3],
+        _camera_output_to_torch(camera_warp, "normals"),
         rtol=1e-5,
         atol=1e-4,
     )
@@ -787,8 +776,8 @@ def test_output_equal_to_usd_camera_intrinsics(setup_sim, focal_length):
     camera_warp.update(dt)
 
     # filter nan and inf from output
-    cam_warp_output = camera_warp.data.output["distance_to_image_plane"].clone()
-    cam_usd_output = camera_usd.data.output["distance_to_image_plane"].clone()
+    cam_warp_output = _camera_output_to_torch(camera_warp, "distance_to_image_plane").clone()
+    cam_usd_output = _camera_output_to_torch(camera_usd, "distance_to_image_plane").clone()
     cam_warp_output[torch.isnan(cam_warp_output)] = 0
     cam_warp_output[torch.isinf(cam_warp_output)] = 0
     cam_usd_output[torch.isnan(cam_usd_output)] = 0
@@ -913,23 +902,21 @@ def test_output_equal_to_usd_camera_when_intrinsics_set(setup_sim, focal_length_
     # update camera
     camera_usd.update(dt)
     camera_warp.update(dt)
+    camera_usd_output = _camera_output_to_torch(camera_usd, "distance_to_camera")
+    camera_warp_output = _camera_output_to_torch(camera_warp, "distance_to_camera")
 
     if DEBUG_PLOTS:
         # plot both images next to each other plus their difference in a 1x3 grid figure
         import matplotlib.pyplot as plt
 
         fig, axs = plt.subplots(1, 3, figsize=(15, 5))
-        usd_plt = axs[0].imshow(camera_usd.data.output["distance_to_camera"][0].cpu().numpy())
+        usd_plt = axs[0].imshow(camera_usd_output[0].cpu().numpy())
         fig.colorbar(usd_plt, ax=axs[0])
         axs[0].set_title("USD")
-        warp_plt = axs[1].imshow(camera_warp.data.output["distance_to_camera"][0].cpu().numpy())
+        warp_plt = axs[1].imshow(camera_warp_output[0].cpu().numpy())
         fig.colorbar(warp_plt, ax=axs[1])
         axs[1].set_title("WARP")
-        diff_plt = axs[2].imshow(
-            torch.abs(camera_usd.data.output["distance_to_camera"] - camera_warp.data.output["distance_to_camera"])[0]
-            .cpu()
-            .numpy()
-        )
+        diff_plt = axs[2].imshow(torch.abs(camera_usd_output - camera_warp_output)[0].cpu().numpy())
         fig.colorbar(diff_plt, ax=axs[2])
         axs[2].set_title("Difference")
         # save figure
@@ -943,8 +930,8 @@ def test_output_equal_to_usd_camera_when_intrinsics_set(setup_sim, focal_length_
     if focal_length != 0.193:
         # FIXME: 0.193 is not working on the IsaacSim/ UsdGeom side, add back once fixed
         torch.testing.assert_close(
-            camera_usd.data.output["distance_to_camera"],
-            camera_warp.data.output["distance_to_camera"],
+            camera_usd_output,
+            camera_warp_output,
             rtol=5e-3,
             atol=1e-4,
         )
@@ -1017,10 +1004,10 @@ def test_depth_clipping_d2ip_and_d2c_are_independent(setup_sim):
     cam_d2ip.update(dt)
     cam_d2c.update(dt)
 
-    d2ip_joint = cam_joint.data.output["distance_to_image_plane"]
-    d2c_joint = cam_joint.data.output["distance_to_camera"]
-    d2ip_solo = cam_d2ip.data.output["distance_to_image_plane"]
-    d2c_solo = cam_d2c.data.output["distance_to_camera"]
+    d2ip_joint = _camera_output_to_torch(cam_joint, "distance_to_image_plane")
+    d2c_joint = _camera_output_to_torch(cam_joint, "distance_to_camera")
+    d2ip_solo = _camera_output_to_torch(cam_d2ip, "distance_to_image_plane")
+    d2c_solo = _camera_output_to_torch(cam_d2c, "distance_to_camera")
 
     # Joint camera must match solo cameras (clipping one must not affect the other)
     torch.testing.assert_close(d2ip_joint, d2ip_solo, atol=1e-5, rtol=1e-5)
@@ -1078,7 +1065,7 @@ def test_set_intrinsic_matrices_updates_output(setup_sim):
     for _ in range(3):
         sim.step()
         camera.update(dt)
-    output_before = camera.data.output["distance_to_camera"].clone()
+    output_before = _camera_output_to_torch(camera, "distance_to_camera").clone()
 
     # Change to a very different focal length (longer → tighter FOV → depth values differ at edges)
     new_matrix = torch.tensor(
@@ -1090,7 +1077,7 @@ def test_set_intrinsic_matrices_updates_output(setup_sim):
     for _ in range(3):
         sim.step()
         camera.update(dt)
-    output_after = camera.data.output["distance_to_camera"].clone()
+    output_after = _camera_output_to_torch(camera, "distance_to_camera").clone()
 
     # Outputs must differ after intrinsics change (different ray angles → different depths)
     assert not torch.allclose(output_before, output_after, atol=1e-3), (
diff --git a/source/isaaclab/test/sensors/test_ray_caster_integration.py b/source/isaaclab/test/sensors/test_ray_caster_integration.py
index 36b6d8ff4692..800319412cd1 100644
--- a/source/isaaclab/test/sensors/test_ray_caster_integration.py
+++ b/source/isaaclab/test/sensors/test_ray_caster_integration.py
@@ -47,6 +47,10 @@
 _DT = 0.01
 
 
+def _camera_output_to_torch(camera, data_type: str) -> torch.Tensor:
+    return wp.to_torch(camera.data.output[data_type])
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -226,7 +230,7 @@ def test_multi_mesh_camera_set_intrinsic_matrices(sim_ground_camera):
     for _ in range(3):
         sim.step()
         camera.update(_DT)
-    output_before = camera.data.output["distance_to_camera"].clone()
+    output_before = _camera_output_to_torch(camera, "distance_to_camera").clone()
 
     # Change to a very different intrinsic matrix (different FOV)
     new_matrix = torch.tensor(
@@ -238,7 +242,7 @@ def test_multi_mesh_camera_set_intrinsic_matrices(sim_ground_camera):
     for _ in range(3):
         sim.step()
         camera.update(_DT)
-    output_after = camera.data.output["distance_to_camera"].clone()
+    output_after = _camera_output_to_torch(camera, "distance_to_camera").clone()
 
     assert not torch.allclose(output_before, output_after, atol=1e-3), (
         "MultiMeshRayCasterCamera: depth output must change after set_intrinsic_matrices; "
@@ -292,10 +296,10 @@ def test_multi_mesh_camera_d2ip_and_d2c_independent(sim_ground_camera):
     cam_d2ip.update(_DT)
     cam_d2c.update(_DT)
 
-    d2ip_joint = cam_joint.data.output["distance_to_image_plane"]
-    d2c_joint = cam_joint.data.output["distance_to_camera"]
-    d2ip_solo = cam_d2ip.data.output["distance_to_image_plane"]
-    d2c_solo = cam_d2c.data.output["distance_to_camera"]
+    d2ip_joint = _camera_output_to_torch(cam_joint, "distance_to_image_plane")
+    d2c_joint = _camera_output_to_torch(cam_joint, "distance_to_camera")
+    d2ip_solo = _camera_output_to_torch(cam_d2ip, "distance_to_image_plane")
+    d2c_solo = _camera_output_to_torch(cam_d2c, "distance_to_camera")
 
     # Joint camera must match solo cameras (clipping one must not corrupt the other)
     torch.testing.assert_close(d2ip_joint, d2ip_solo, atol=1e-5, rtol=1e-5)
diff --git a/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py b/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py
index a77f63ffe891..e7edc65758e1 100644
--- a/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py
+++ b/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py
@@ -36,6 +36,7 @@
 from isaaclab_ov.renderers import OVRTXRendererCfg  # noqa: E402
 from isaaclab_physx.renderers import IsaacRtxRendererCfg  # noqa: E402
 
+from isaaclab.utils.array import convert_to_torch  # noqa: E402
 from isaaclab_tasks.direct.shadow_hand.shadow_hand_vision_env import ShadowHandVisionEnv  # noqa: E402
 from isaaclab_tasks.direct.shadow_hand.shadow_hand_vision_env_cfg import (  # noqa: E402
     ShadowHandVisionBenchmarkEnvCfg,
@@ -416,6 +417,7 @@ def test_camera_renders_not_empty(render_correctness_env):
     camera_output = env._tiled_camera.data.output
     assert len(camera_output) > 0, f"[{label}] Camera produced no output tensors at all."
     for dt, tensor in camera_output.items():
+        tensor = convert_to_torch(tensor)
         finite = torch.where(torch.isinf(tensor), torch.zeros_like(tensor), tensor)
         # import pdb; pdb.set_trace()
         assert finite.max() > 0.2, (

From 6754f3c90dbc37d7bf2e54ea4f2351df2897b4fd Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Tue, 5 May 2026 13:27:30 -0700
Subject: [PATCH 17/22] Formatting

---
 source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py b/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py
index e7edc65758e1..34afdd57f634 100644
--- a/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py
+++ b/source/isaaclab_tasks/test/test_shadow_hand_vision_presets.py
@@ -37,6 +37,7 @@
 from isaaclab_physx.renderers import IsaacRtxRendererCfg  # noqa: E402
 
 from isaaclab.utils.array import convert_to_torch  # noqa: E402
+
 from isaaclab_tasks.direct.shadow_hand.shadow_hand_vision_env import ShadowHandVisionEnv  # noqa: E402
 from isaaclab_tasks.direct.shadow_hand.shadow_hand_vision_env_cfg import (  # noqa: E402
     ShadowHandVisionBenchmarkEnvCfg,

From 7bff7b1f925e4289f5f4363d46d6b07fa4f3999a Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Tue, 5 May 2026 13:51:13 -0700
Subject: [PATCH 18/22] Fixing

---
 source/isaaclab/isaaclab/sensors/camera/camera.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index caa28016e692..18a46e05fe4a 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -22,6 +22,7 @@
 from isaaclab.renderers.camera_render_spec import CameraRenderSpec
 from isaaclab.sim.views import FrameView
 from isaaclab.utils import to_camel_case
+from isaaclab.utils.array import convert_to_torch
 from isaaclab.utils.math import (
     convert_camera_frame_orientation_convention,
     create_rotation_matrix_from_view,
@@ -399,8 +400,8 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None
         # note: this recomputation is useful if one performs events such as randomizations on the camera poses.
         self._update_poses(env_ids)
         # Reset the frame count
-        frame_torch = wp.to_torch(self._frame)
-        frame_torch[wp.to_torch(env_ids)] = 0
+        frame_torch = convert_to_torch(self._frame)
+        frame_torch[convert_to_torch(env_ids)] = 0
 
     """
     Implementation.
@@ -484,8 +485,8 @@ def _update_buffers_impl(self, env_mask: wp.array):
             return
 
         # Increment frame count
-        frame_torch = wp.to_torch(self._frame)
-        frame_torch[env_ids] += 1
+        frame_torch = convert_to_torch(self._frame)
+        frame_torch[convert_to_torch(env_ids)] += 1
 
         # update latest camera pose if requested
         if self.cfg.update_latest_camera_pose:

From e20a14d4901deab027a65c627c5e3cf9b73f87af Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Tue, 5 May 2026 14:29:14 -0700
Subject: [PATCH 19/22] Fixes

---
 source/isaaclab/isaaclab/sensors/camera/camera.py      | 4 ++--
 source/isaaclab/test/sensors/test_ray_caster_camera.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index 18a46e05fe4a..fb62588e3e3d 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -401,7 +401,7 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None
         self._update_poses(env_ids)
         # Reset the frame count
         frame_torch = convert_to_torch(self._frame)
-        frame_torch[convert_to_torch(env_ids)] = 0
+        frame_torch[convert_to_torch(env_ids).to(torch.int32)] = 0
 
     """
     Implementation.
@@ -486,7 +486,7 @@ def _update_buffers_impl(self, env_mask: wp.array):
 
         # Increment frame count
         frame_torch = convert_to_torch(self._frame)
-        frame_torch[convert_to_torch(env_ids)] += 1
+        frame_torch[convert_to_torch(env_ids).to(torch.int32)] += 1
 
         # update latest camera pose if requested
         if self.cfg.update_latest_camera_pose:
diff --git a/source/isaaclab/test/sensors/test_ray_caster_camera.py b/source/isaaclab/test/sensors/test_ray_caster_camera.py
index 755cb1f7b255..8fc619be6326 100644
--- a/source/isaaclab/test/sensors/test_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_ray_caster_camera.py
@@ -21,7 +21,6 @@
 import numpy as np
 import pytest
 import torch
-import warp as wp
 
 import omni.replicator.core as rep
 from pxr import Gf
@@ -32,6 +31,7 @@
 from isaaclab.sim import PinholeCameraCfg
 from isaaclab.terrains.trimesh.utils import make_plane
 from isaaclab.terrains.utils import create_prim_from_mesh
+from isaaclab.utils.array import convert_to_torch
 
 # sample camera poses
 POSITION = [2.5, 2.5, 2.5]
@@ -43,7 +43,7 @@
 
 
 def _camera_output_to_torch(camera, data_type: str) -> torch.Tensor:
-    return wp.to_torch(camera.data.output[data_type])
+    return convert_to_torch(camera.data.output[data_type])
 
 
 def setup() -> tuple[sim_utils.SimulationContext, RayCasterCameraCfg, float]:

From 1ccc7c4d2da498eae814aac63f132600b06510e4 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Tue, 5 May 2026 15:39:11 -0700
Subject: [PATCH 20/22] Fixes

---
 .../isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py | 3 ++-
 source/isaaclab/test/sensors/test_ray_caster_integration.py    | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
index 9914a0d86663..4824e7e33210 100644
--- a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
@@ -31,6 +31,7 @@
 from isaaclab.sim import PinholeCameraCfg
 from isaaclab.terrains.trimesh.utils import make_plane
 from isaaclab.terrains.utils import create_prim_from_mesh
+from isaaclab.utils.array import convert_to_torch
 
 # sample camera poses (quaternions in xyzw format)
 POSITION = [2.5, 2.5, 2.5]
@@ -40,7 +41,7 @@
 
 
 def _camera_output_to_torch(camera, data_type: str) -> torch.Tensor:
-    return wp.to_torch(camera.data.output[data_type])
+    return convert_to_torch(camera.data.output[data_type])
 
 
 @pytest.fixture(scope="function")
diff --git a/source/isaaclab/test/sensors/test_ray_caster_integration.py b/source/isaaclab/test/sensors/test_ray_caster_integration.py
index 800319412cd1..a03e7bd32cf7 100644
--- a/source/isaaclab/test/sensors/test_ray_caster_integration.py
+++ b/source/isaaclab/test/sensors/test_ray_caster_integration.py
@@ -42,13 +42,14 @@
 )
 from isaaclab.terrains.trimesh.utils import make_plane
 from isaaclab.terrains.utils import create_prim_from_mesh
+from isaaclab.utils.array import convert_to_torch
 
 _GROUND_PATH = "/World/Ground"
 _DT = 0.01
 
 
 def _camera_output_to_torch(camera, data_type: str) -> torch.Tensor:
-    return wp.to_torch(camera.data.output[data_type])
+    return convert_to_torch(camera.data.output[data_type])
 
 
 # ---------------------------------------------------------------------------

From 25a49efe0cd416aceb1befbba4dda364f115d19f Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Tue, 5 May 2026 16:40:17 -0700
Subject: [PATCH 21/22] Fixing

---
 .../test/sensors/test_multi_mesh_ray_caster_camera.py         | 4 ++--
 source/isaaclab/test/sensors/test_ray_caster_camera.py        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
index 4824e7e33210..f3d2ffe597c6 100644
--- a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
@@ -402,8 +402,8 @@ def test_output_equal_to_usdcamera(setup_simulation, data_types):
 
     # check the intrinsic matrices
     torch.testing.assert_close(
-        camera_usd.data.intrinsic_matrices,
-        camera_warp.data.intrinsic_matrices,
+        convert_to_torch(camera_usd.data.intrinsic_matrices),
+        convert_to_torch(camera_warp.data.intrinsic_matrices),
     )
 
     # check the apertures
diff --git a/source/isaaclab/test/sensors/test_ray_caster_camera.py b/source/isaaclab/test/sensors/test_ray_caster_camera.py
index 8fc619be6326..80ce883201d5 100644
--- a/source/isaaclab/test/sensors/test_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_ray_caster_camera.py
@@ -411,7 +411,7 @@ def test_camera_set_world_poses_from_view(setup_sim):
 
     # check if transform correctly set in output
     torch.testing.assert_close(camera.data.pos_w, eyes)
-    torch.testing.assert_close(camera.data.quat_w_ros, quat_ros_gt)
+    torch.testing.assert_close(convert_to_torch(camera.data.quat_w_ros), convert_to_torch(quat_ros_gt))
 
 
 @pytest.mark.isaacsim_ci

From 5e6c512e29f92b0a232aa7955aafdb866c51dea8 Mon Sep 17 00:00:00 2001
From: Daniela Hasenbring <dhasenbring@nvidia.com>
Date: Tue, 5 May 2026 17:41:54 -0700
Subject: [PATCH 22/22] Fixes

---
 .../test_multi_mesh_ray_caster_camera.py      | 30 ++++++-----
 .../test/sensors/test_ray_caster_camera.py    | 51 +++++++++++--------
 2 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
index f3d2ffe597c6..48a95452f06f 100644
--- a/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_multi_mesh_ray_caster_camera.py
@@ -20,7 +20,6 @@
 import numpy as np
 import pytest
 import torch
-import warp as wp
 
 import omni.replicator.core as rep
 from pxr import Gf
@@ -121,7 +120,7 @@ def test_camera_init_offset(setup_simulation, convention, quat):
     camera.update(dt)
 
     # check that transform is set correctly
-    np.testing.assert_allclose(camera.data.pos_w[0].cpu().numpy(), cam_cfg_offset.offset.pos)
+    np.testing.assert_allclose(convert_to_torch(camera.data.pos_w)[0].cpu().numpy(), cam_cfg_offset.offset.pos)
 
     del camera
 
@@ -184,7 +183,7 @@ def test_camera_init_intrinsic_matrix(setup_simulation):
     camera_1 = MultiMeshRayCasterCamera(cfg=camera_cfg)
     # get intrinsic matrix
     sim.reset()
-    intrinsic_matrix = camera_1.data.intrinsic_matrices[0].cpu().flatten().tolist()
+    intrinsic_matrix = convert_to_torch(camera_1.data.intrinsic_matrices)[0].cpu().flatten().tolist()
 
     # initialize from intrinsic matrix
     intrinsic_camera_cfg = MultiMeshRayCasterCameraCfg(
@@ -218,8 +217,8 @@ def test_camera_init_intrinsic_matrix(setup_simulation):
     )
     # check that both intrinsic matrices are the same
     torch.testing.assert_close(
-        camera_1.data.intrinsic_matrices[0],
-        camera_2.data.intrinsic_matrices[0],
+        convert_to_torch(camera_1.data.intrinsic_matrices)[0],
+        convert_to_torch(camera_2.data.intrinsic_matrices)[0],
     )
 
     del camera_1, camera_2
@@ -278,8 +277,8 @@ def test_camera_set_world_poses(setup_simulation):
     camera.set_world_poses(position.clone(), orientation.clone(), convention="world")
 
     # check if transform correctly set in output
-    torch.testing.assert_close(camera.data.pos_w, position)
-    torch.testing.assert_close(camera.data.quat_w_world, orientation)
+    torch.testing.assert_close(convert_to_torch(camera.data.pos_w), position)
+    torch.testing.assert_close(convert_to_torch(camera.data.quat_w_world), orientation)
 
     del camera
 
@@ -301,8 +300,8 @@ def test_camera_set_world_poses_from_view(setup_simulation):
     camera.set_world_poses_from_view(eyes.clone(), targets.clone())
 
     # check if transform correctly set in output
-    torch.testing.assert_close(camera.data.pos_w, eyes)
-    torch.testing.assert_close(wp.to_torch(camera.data.quat_w_ros), quat_ros_gt)
+    torch.testing.assert_close(convert_to_torch(camera.data.pos_w), eyes)
+    torch.testing.assert_close(convert_to_torch(camera.data.quat_w_ros), quat_ros_gt)
 
     del camera
 
@@ -331,7 +330,7 @@ def test_intrinsic_matrix(setup_simulation, height, width):
         # update camera
         camera.update(dt)
         # Check that matrix is correct
-        torch.testing.assert_close(rs_intrinsic_matrix, camera.data.intrinsic_matrices)
+        torch.testing.assert_close(rs_intrinsic_matrix, convert_to_torch(camera.data.intrinsic_matrices))
 
     del camera
 
@@ -584,8 +583,10 @@ def test_output_equal_to_usdcamera_prim_offset(setup_simulation):
     camera_warp.update(dt)
 
     # check if pos and orientation are correct
-    torch.testing.assert_close(camera_warp.data.pos_w[0], camera_usd.data.pos_w[0])
-    torch.testing.assert_close(camera_warp.data.quat_w_ros[0], camera_usd.data.quat_w_ros[0])
+    torch.testing.assert_close(convert_to_torch(camera_warp.data.pos_w)[0], convert_to_torch(camera_usd.data.pos_w)[0])
+    torch.testing.assert_close(
+        convert_to_torch(camera_warp.data.quat_w_ros)[0], convert_to_torch(camera_usd.data.quat_w_ros)[0]
+    )
 
     # check image data
     torch.testing.assert_close(
@@ -685,7 +686,10 @@ def test_output_equal_to_usd_camera_intrinsics(setup_simulation, height, width):
     cam_usd_output[torch.isinf(cam_usd_output)] = 0
 
     # check that both have the same intrinsic matrices
-    torch.testing.assert_close(camera_warp.data.intrinsic_matrices[0], camera_usd.data.intrinsic_matrices[0])
+    torch.testing.assert_close(
+        convert_to_torch(camera_warp.data.intrinsic_matrices)[0],
+        convert_to_torch(camera_usd.data.intrinsic_matrices)[0],
+    )
 
     # check the apertures
     torch.testing.assert_close(
diff --git a/source/isaaclab/test/sensors/test_ray_caster_camera.py b/source/isaaclab/test/sensors/test_ray_caster_camera.py
index 80ce883201d5..4277f3ca45ec 100644
--- a/source/isaaclab/test/sensors/test_ray_caster_camera.py
+++ b/source/isaaclab/test/sensors/test_ray_caster_camera.py
@@ -273,15 +273,21 @@ def test_camera_init_offset(setup_sim):
     camera_ros.update(dt)
 
     # check that all transforms are set correctly
-    np.testing.assert_allclose(camera_ros.data.pos_w[0].cpu().numpy(), cam_cfg_offset_ros.offset.pos)
-    np.testing.assert_allclose(camera_opengl.data.pos_w[0].cpu().numpy(), cam_cfg_offset_opengl.offset.pos)
-    np.testing.assert_allclose(camera_world.data.pos_w[0].cpu().numpy(), cam_cfg_offset_world.offset.pos)
+    np.testing.assert_allclose(convert_to_torch(camera_ros.data.pos_w)[0].cpu().numpy(), cam_cfg_offset_ros.offset.pos)
+    np.testing.assert_allclose(
+        convert_to_torch(camera_opengl.data.pos_w)[0].cpu().numpy(), cam_cfg_offset_opengl.offset.pos
+    )
+    np.testing.assert_allclose(
+        convert_to_torch(camera_world.data.pos_w)[0].cpu().numpy(), cam_cfg_offset_world.offset.pos
+    )
 
     # check if transform correctly set in output
-    np.testing.assert_allclose(camera_ros.data.pos_w[0].cpu().numpy(), cam_cfg_offset_ros.offset.pos, rtol=1e-5)
-    np.testing.assert_allclose(camera_ros.data.quat_w_ros[0].numpy(), QUAT_ROS, rtol=1e-5)
-    np.testing.assert_allclose(camera_ros.data.quat_w_opengl[0].numpy(), QUAT_OPENGL, rtol=1e-5)
-    np.testing.assert_allclose(camera_ros.data.quat_w_world[0].cpu().numpy(), QUAT_WORLD, rtol=1e-5)
+    np.testing.assert_allclose(
+        convert_to_torch(camera_ros.data.pos_w)[0].cpu().numpy(), cam_cfg_offset_ros.offset.pos, rtol=1e-5
+    )
+    np.testing.assert_allclose(convert_to_torch(camera_ros.data.quat_w_ros)[0].cpu().numpy(), QUAT_ROS, rtol=1e-5)
+    np.testing.assert_allclose(convert_to_torch(camera_ros.data.quat_w_opengl)[0].cpu().numpy(), QUAT_OPENGL, rtol=1e-5)
+    np.testing.assert_allclose(convert_to_torch(camera_ros.data.quat_w_world)[0].cpu().numpy(), QUAT_WORLD, rtol=1e-5)
 
 
 @pytest.mark.isaacsim_ci
@@ -292,7 +298,7 @@ def test_camera_init_intrinsic_matrix(setup_sim):
     camera_1 = RayCasterCamera(cfg=camera_cfg)
     # get intrinsic matrix
     sim.reset()
-    intrinsic_matrix = camera_1.data.intrinsic_matrices[0].cpu().flatten().tolist()
+    intrinsic_matrix = convert_to_torch(camera_1.data.intrinsic_matrices)[0].cpu().flatten().tolist()
     teardown(sim)
     # reinit the first camera
     sim, camera_cfg, dt = setup()
@@ -331,8 +337,8 @@ def test_camera_init_intrinsic_matrix(setup_sim):
     )
     # check that both intrinsic matrices are the same
     torch.testing.assert_close(
-        camera_1.data.intrinsic_matrices[0],
-        camera_2.data.intrinsic_matrices[0],
+        convert_to_torch(camera_1.data.intrinsic_matrices)[0],
+        convert_to_torch(camera_2.data.intrinsic_matrices)[0],
     )
 
 
@@ -390,8 +396,8 @@ def test_camera_set_world_poses(setup_sim):
     camera.set_world_poses(position.clone(), orientation.clone(), convention="world")
 
     # check if transform correctly set in output
-    torch.testing.assert_close(camera.data.pos_w, position)
-    torch.testing.assert_close(camera.data.quat_w_world, orientation)
+    torch.testing.assert_close(convert_to_torch(camera.data.pos_w), position)
+    torch.testing.assert_close(convert_to_torch(camera.data.quat_w_world), orientation)
 
 
 @pytest.mark.isaacsim_ci
@@ -410,8 +416,8 @@ def test_camera_set_world_poses_from_view(setup_sim):
     camera.set_world_poses_from_view(eyes.clone(), targets.clone())
 
     # check if transform correctly set in output
-    torch.testing.assert_close(camera.data.pos_w, eyes)
-    torch.testing.assert_close(convert_to_torch(camera.data.quat_w_ros), convert_to_torch(quat_ros_gt))
+    torch.testing.assert_close(convert_to_torch(camera.data.pos_w), eyes)
+    torch.testing.assert_close(convert_to_torch(camera.data.quat_w_ros), quat_ros_gt)
 
 
 @pytest.mark.isaacsim_ci
@@ -436,7 +442,7 @@ def test_intrinsic_matrix(setup_sim):
         # update camera
         camera.update(dt)
         # Check that matrix is correct
-        torch.testing.assert_close(rs_intrinsic_matrix, camera.data.intrinsic_matrices)
+        torch.testing.assert_close(rs_intrinsic_matrix, convert_to_torch(camera.data.intrinsic_matrices))
 
 
 @pytest.mark.isaacsim_ci
@@ -496,8 +502,8 @@ def test_output_equal_to_usdcamera(setup_sim):
 
     # check the intrinsic matrices
     torch.testing.assert_close(
-        camera_usd.data.intrinsic_matrices,
-        camera_warp.data.intrinsic_matrices,
+        convert_to_torch(camera_usd.data.intrinsic_matrices),
+        convert_to_torch(camera_warp.data.intrinsic_matrices),
     )
 
     # check the apertures
@@ -681,8 +687,10 @@ def test_output_equal_to_usdcamera_prim_offset(setup_sim):
     camera_warp.update(dt)
 
     # check if pos and orientation are correct
-    torch.testing.assert_close(camera_warp.data.pos_w[0], camera_usd.data.pos_w[0])
-    torch.testing.assert_close(camera_warp.data.quat_w_ros[0], camera_usd.data.quat_w_ros[0])
+    torch.testing.assert_close(convert_to_torch(camera_warp.data.pos_w)[0], convert_to_torch(camera_usd.data.pos_w)[0])
+    torch.testing.assert_close(
+        convert_to_torch(camera_warp.data.quat_w_ros)[0], convert_to_torch(camera_usd.data.quat_w_ros)[0]
+    )
 
     # check image data
     torch.testing.assert_close(
@@ -784,7 +792,10 @@ def test_output_equal_to_usd_camera_intrinsics(setup_sim, focal_length):
     cam_usd_output[torch.isinf(cam_usd_output)] = 0
 
     # check that both have the same intrinsic matrices
-    torch.testing.assert_close(camera_warp.data.intrinsic_matrices[0], camera_usd.data.intrinsic_matrices[0])
+    torch.testing.assert_close(
+        convert_to_torch(camera_warp.data.intrinsic_matrices)[0],
+        convert_to_torch(camera_usd.data.intrinsic_matrices)[0],
+    )
 
     # check the apertures
     torch.testing.assert_close(