diff --git a/.gitignore b/.gitignore
index b3a54b003..9119d3cea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,6 +39,9 @@
 *.*~
 TAGS
 
+# Vim swap files
+*.sw*
+
 # Build files
 *build
 *install
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 8e4075da1..de64c7d30 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -2,17 +2,28 @@
     "python.analysis.extraPaths": [
         "${workspaceFolder}",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab",
+        "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_tasks",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_assets",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_rl",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_mimic",
+        "${workspaceFolder}/submodules/Isaac-GR00T"
+    ],
+    "python.autoComplete.extraPaths": [
+        "${workspaceFolder}",
+        "${workspaceFolder}/submodules/IsaacLab/source/isaaclab",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_tasks",
+        "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_assets",
+        "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_rl",
+        "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_mimic",
+        "${workspaceFolder}/submodules/Isaac-GR00T"
     ],
     "cursorpyright.analysis.extraPaths": [
         "${workspaceFolder}",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab",
+        "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_tasks",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_assets",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_rl",
         "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_mimic",
-        "${workspaceFolder}/submodules/IsaacLab/source/isaaclab_tasks",
+        "${workspaceFolder}/submodules/Isaac-GR00T"
     ]
 }
diff --git a/docker/Dockerfile.vln_server b/docker/Dockerfile.vln_server
new file mode 100644
index 000000000..c63459fa5
--- /dev/null
+++ b/docker/Dockerfile.vln_server
@@ -0,0 +1,38 @@
+FROM nvcr.io/nvidia/pytorch:24.02-py3
+
+WORKDIR /workspace
+
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir pyzmq msgpack Pillow
+
+# NaVILA / LLaVA — clone and install per upstream instructions:
+# https://github.com/yang-zj1026/NaVILA-Bench#vla-evaluation
+RUN git clone https://github.com/AnjieCheng/NaVILA.git /workspace/NaVILA && \
+    cd /workspace/NaVILA && \
+    pip install --no-cache-dir -e . && \
+    pip install --no-cache-dir -e ".[train]" && \
+    pip install --no-cache-dir -e ".[eval]" && \
+    pip install --no-cache-dir git+https://github.com/huggingface/transformers@v4.37.2 && \
+    SITE_PKG=$(python -c 'import site; print(site.getsitepackages()[0])') && \
+    cp -rv ./llava/train/transformers_replace/* "$SITE_PKG/transformers/" && \
+    cp -rv ./llava/train/deepspeed_replace/* "$SITE_PKG/deepspeed/"
+
+# NaVILA pins torch==2.3.0 (PyPI release) which replaces the pre-release
+# torch 2.3.0a0 from the base image, breaking any native extensions that
+# were compiled against the pre-release.  Remove them and rebuild.
+RUN pip uninstall -y transformer-engine transformer-engine-extensions 2>/dev/null; true
+
+# flash-attn must be installed AFTER NaVILA so it compiles against the
+# correct PyTorch version (2.3.0 release, not the pre-release 2.3.0a0).
+RUN pip install --no-cache-dir flash-attn==2.5.8 || \
+    echo "WARNING: flash-attn build failed, VLM will run without it"
+
+# Arena code last so that code-only changes rebuild in seconds
+COPY isaaclab_arena/remote_policy /workspace/isaaclab_arena/remote_policy
+COPY isaaclab_arena/__init__.py /workspace/isaaclab_arena/__init__.py
+COPY isaaclab_arena_navila /workspace/isaaclab_arena_navila
+
+ENV PYTHONPATH=/workspace
+
+ENTRYPOINT ["python", "-u", "-m", "isaaclab_arena.remote_policy.remote_policy_server_runner"]
diff --git a/docker/run_vln_server.sh b/docker/run_vln_server.sh
new file mode 100644
index 000000000..715134a40
--- /dev/null
+++ b/docker/run_vln_server.sh
@@ -0,0 +1,200 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# -------------------------
+# User-configurable defaults
+# -------------------------
+
+# Default mount directories on the host machine
+DATASETS_DIR="${DATASETS_DIR:-$HOME/datasets}"
+MODELS_DIR="${MODELS_DIR:-$HOME/models}"
+EVAL_DIR="${EVAL_DIR:-$HOME/eval}"
+
+# Docker image name and tag for the VLN policy server
+DOCKER_IMAGE_NAME="${DOCKER_IMAGE_NAME:-vln_policy_server}"
+DOCKER_VERSION_TAG="${DOCKER_VERSION_TAG:-latest}"
+
+# Rebuild controls
+FORCE_REBUILD="${FORCE_REBUILD:-false}"
+NO_CACHE=""
+
+# Server parameters (can also be overridden via environment variables)
+HOST="${HOST:-0.0.0.0}"
+PORT="${PORT:-5555}"
+API_TOKEN="${API_TOKEN:-}"
+TIMEOUT_MS="${TIMEOUT_MS:-15000}"
+POLICY_TYPE="${POLICY_TYPE:-isaaclab_arena_navila.navila_server_policy.NaVilaServerPolicy}"
+
+# GPU selection for docker --gpus (can also be overridden via environment variables)
+# Examples:
+#   all           -> use all GPUs
+#   1             -> use 1 GPU (count)
+#   "device=0"    -> use GPU 0
+#   "device=0,1"  -> use GPU 0 and 1
+GPUS="${GPUS:-all}"
+
+# -------------------------
+# Help message
+# -------------------------
+usage() {
+  script_name=$(basename "$0")
+  cat <<EOF
+Helper script to build and run the VLN policy server Docker environment.
+
+Usage:
+  $script_name [options] [-- server-args...]
+
+Options (Docker / paths; env vars with the same name take precedence):
+  -v                      Verbose output (set -x).
+  -d <datasets directory> Path to datasets on the host. Default: "$DATASETS_DIR".
+  -m <models directory>   Path to models on the host. Default: "$MODELS_DIR".
+  -e <eval directory>     Path to evaluation data on the host. Default: "$EVAL_DIR".
+  -n <docker name>        Docker image name. Default: "$DOCKER_IMAGE_NAME".
+  -g <gpus>               GPU selection for docker --gpus. Default: "all".
+                          Examples: "all", "1", "device=0", "device=0,1".
+  -r                      Force rebuilding of the Docker image.
+  -R                      Force rebuilding of the Docker image, without cache.
+
+Server-specific options (passed through to the policy server entrypoint):
+  --host HOST
+  --port PORT
+  --api_token TOKEN
+  --timeout_ms MS
+  --policy_type TYPE
+  --model_path PATH
+  --num_video_frames N
+  --conv_mode MODE
+
+Examples:
+  # Minimal: use all defaults (model at default path, all GPUs, port 5555)
+  bash $script_name
+
+  # Custom model path and single GPU
+  bash $script_name -m /path/to/navila-checkpoint -g "device=0"
+
+  # Custom port
+  bash $script_name --port 6000
+EOF
+}
+
+# -------------------------
+# Parse all options
+# -------------------------
+# Server parameters that can be overridden individually via --flags.
+# Unset values will be filled with defaults after parsing.
+MODEL_PATH=""
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    -v)               set -x;                   shift 1 ;;
+    -d)               DATASETS_DIR="$2";        shift 2 ;;
+    -m)               MODELS_DIR="$2";          shift 2 ;;
+    -e)               EVAL_DIR="$2";            shift 2 ;;
+    -n)               DOCKER_IMAGE_NAME="$2";   shift 2 ;;
+    -g)               GPUS="$2";                shift 2 ;;
+    -r)               FORCE_REBUILD="true";     shift 1 ;;
+    -R)               FORCE_REBUILD="true"; NO_CACHE="--no-cache"; shift 1 ;;
+    -h|--help)        usage; exit 0 ;;
+    --host)           HOST="$2";                shift 2 ;;
+    --port)           PORT="$2";                shift 2 ;;
+    --api_token)      API_TOKEN="$2";           shift 2 ;;
+    --timeout_ms)     TIMEOUT_MS="$2";          shift 2 ;;
+    --policy_type)    POLICY_TYPE="$2";         shift 2 ;;
+    --model_path)     MODEL_PATH="$2";          shift 2 ;;
+    --num_video_frames) NUM_VIDEO_FRAMES="$2";  shift 2 ;;
+    --conv_mode)      CONV_MODE="$2";           shift 2 ;;
+    --policy_device)  POLICY_DEVICE="$2";       shift 2 ;;
+    *)
+      echo "Unknown option: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+# Build server args: always include all parameters, using defaults for
+# anything the user did not override.
+SERVER_ARGS=(
+  --host "${HOST}"
+  --port "${PORT}"
+  --timeout_ms "${TIMEOUT_MS}"
+  --policy_type "${POLICY_TYPE}"
+  --model_path "${MODEL_PATH:-/models}"
+)
+[ -n "${API_TOKEN}" ]       && SERVER_ARGS+=(--api_token "${API_TOKEN}")
+[ -n "${NUM_VIDEO_FRAMES+x}" ] && SERVER_ARGS+=(--num_video_frames "${NUM_VIDEO_FRAMES}")
+[ -n "${CONV_MODE+x}" ]    && SERVER_ARGS+=(--conv_mode "${CONV_MODE}")
+[ -n "${POLICY_DEVICE+x}" ] && SERVER_ARGS+=(--policy_device "${POLICY_DEVICE}")
+
+echo "Host paths:"
+echo "  DATASETS_DIR = ${DATASETS_DIR}"
+echo "  MODELS_DIR   = ${MODELS_DIR}"
+echo "  EVAL_DIR     = ${EVAL_DIR}"
+echo "Docker image:"
+echo "  ${DOCKER_IMAGE_NAME}:${DOCKER_VERSION_TAG}"
+echo "GPU:"
+echo "  --gpus ${GPUS}"
+echo "Rebuild:"
+echo "  FORCE_REBUILD = ${FORCE_REBUILD}, NO_CACHE = '${NO_CACHE}'"
+echo "Server args:"
+printf '  %q ' "${SERVER_ARGS[@]}"; echo
+
+# -------------------------
+# 1) Build the Docker image
+# -------------------------
+
+IMAGE_TAG_FULL="${DOCKER_IMAGE_NAME}:${DOCKER_VERSION_TAG}"
+
+SHOULD_BUILD=false
+
+if [ "${FORCE_REBUILD}" = "true" ]; then
+  SHOULD_BUILD=true
+else
+  if [ -z "$(docker images -q "${IMAGE_TAG_FULL}")" ]; then
+    SHOULD_BUILD=true
+  fi
+fi
+
+if [ "${SHOULD_BUILD}" = "true" ]; then
+  echo "Building Docker image ${IMAGE_TAG_FULL}..."
+  # Use existing image layers as cache source (BuildKit may GC intermediate
+  # layer cache, but the final image layers can still be reused).
+  CACHE_FROM_ARGS=""
+  if [ -n "$(docker images -q "${IMAGE_TAG_FULL}" 2>/dev/null)" ]; then
+    CACHE_FROM_ARGS="--cache-from ${IMAGE_TAG_FULL}"
+  fi
+  docker build \
+    ${NO_CACHE} \
+    ${CACHE_FROM_ARGS} \
+    --network host \
+    -f docker/Dockerfile.vln_server \
+    -t "${IMAGE_TAG_FULL}" \
+    .
+else
+  echo "Docker image ${IMAGE_TAG_FULL} already exists. Skipping rebuild."
+  echo "Use -r or -R to force rebuilding the image."
+fi
+
+# -------------------------
+# 2) Run the container
+# -------------------------
+
+DOCKER_RUN_ARGS=(
+  --rm
+  --gpus "${GPUS}"
+  --net host
+  --name vln_policy_server_container
+  -v "${MODELS_DIR}":/models
+)
+
+if [ -d "${DATASETS_DIR}" ]; then
+  DOCKER_RUN_ARGS+=(-v "${DATASETS_DIR}":/datasets)
+fi
+
+if [ -d "${EVAL_DIR}" ]; then
+  DOCKER_RUN_ARGS+=(-v "${EVAL_DIR}":/eval)
+fi
+
+docker run "${DOCKER_RUN_ARGS[@]}" \
+  "${IMAGE_TAG_FULL}" \
+  "${SERVER_ARGS[@]}"
diff --git a/isaaclab_arena/assets/matterport_background.py b/isaaclab_arena/assets/matterport_background.py
new file mode 100644
index 000000000..06a8f896b
--- /dev/null
+++ b/isaaclab_arena/assets/matterport_background.py
@@ -0,0 +1,285 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Matterport 3D scene background for VLN tasks.
+
+This background can be spawned as a shared global scene at
+``/World/matterport`` so the Matterport USD is not cloned per environment.
+We keep the invisible ground plane as an optional fallback while validating
+whether the referenced USD mesh participates in PhysX collision reliably.
+
+For collision experiments we support:
+  - Explicit colliders on the referenced visual USD.
+  - A legacy single collision overlay.
+  - Split floor / obstacle collision overlays built from a cleaner raw mesh.
+"""
+
+from __future__ import annotations
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import AssetBaseCfg
+from isaaclab.sim.utils import clone
+from pxr import Usd, UsdGeom
+
+from isaaclab_arena.assets.background_library import LibraryBackground
+from isaaclab_arena.assets.register import register_asset
+from isaaclab_arena.utils.pose import Pose
+
+
+def _get_mesh_collision_cfg(mesh_collider_approximation: str):
+    """Return the IsaacLab mesh-collision config for the requested approximation."""
+    if mesh_collider_approximation == "triangle":
+        return sim_utils.TriangleMeshPropertiesCfg()
+    if mesh_collider_approximation == "convex_decomposition":
+        return sim_utils.ConvexDecompositionPropertiesCfg()
+    if mesh_collider_approximation == "sdf":
+        return sim_utils.SDFMeshPropertiesCfg()
+    raise ValueError(
+        "Unsupported mesh collider approximation "
+        f"'{mesh_collider_approximation}'. Expected one of: triangle, convex_decomposition, sdf."
+    )
+
+
+def _iter_descendant_prims(root_prim):
+    """Yield descendant prims, including children hidden behind instance proxies."""
+    for child in root_prim.GetFilteredChildren(Usd.TraverseInstanceProxies()):
+        yield child
+        yield from _iter_descendant_prims(child)
+
+
+def _apply_explicit_mesh_colliders(stage, root_prim_path: str, physics_material_path: str, approximation: str) -> int:
+    """Apply explicit collider schemas to all descendant Mesh prims."""
+    mesh_collision_cfg = _get_mesh_collision_cfg(approximation)
+    root_prim = stage.GetPrimAtPath(root_prim_path)
+    if not root_prim.IsValid():
+        return 0
+    mesh_count = 0
+    for prim in _iter_descendant_prims(root_prim):
+        if not prim.IsA(UsdGeom.Mesh):
+            continue
+        prim_path = str(prim.GetPath())
+        sim_utils.define_collision_properties(
+            prim_path,
+            sim_utils.CollisionPropertiesCfg(collision_enabled=True),
+            stage=stage,
+        )
+        sim_utils.define_mesh_collision_properties(prim_path, mesh_collision_cfg, stage=stage)
+        sim_utils.bind_physics_material(prim_path, physics_material_path, stage=stage)
+        mesh_count += 1
+    return mesh_count
+
+
+def _get_collision_overlay_specs(cfg) -> list[tuple[str, str, str]]:
+    """Return collision overlay specs as ``(prim_name, usd_path, label)`` tuples."""
+    combined_overlay = getattr(cfg, "collision_overlay_usd_path", None)
+    floor_overlay = getattr(cfg, "floor_collision_usd_path", None)
+    obstacle_overlay = getattr(cfg, "obstacle_collision_usd_path", None)
+
+    if combined_overlay and (floor_overlay or obstacle_overlay):
+        raise ValueError(
+            "Legacy combined collision overlay cannot be used together with split "
+            "floor / obstacle collision overlays."
+        )
+
+    specs: list[tuple[str, str, str]] = []
+    if combined_overlay:
+        specs.append(("collision", combined_overlay, "combined"))
+    if floor_overlay:
+        specs.append(("collisionFloor", floor_overlay, "floor"))
+    if obstacle_overlay:
+        specs.append(("collisionObstacle", obstacle_overlay, "obstacle"))
+    return specs
+
+
+def _spawn_collision_overlay(
+    prim_path: str,
+    stage,
+    usd_path: str,
+    physics_material_path: str,
+    approximation: str,
+    overlay_prim_name: str,
+) -> int:
+    """Spawn a hidden collision-only USD and apply mesh colliders to it."""
+    from isaaclab.sim.spawners.from_files.from_files import _spawn_from_usd_file
+
+    collision_prim_path = f"{prim_path}/{overlay_prim_name}"
+    collision_cfg = sim_utils.UsdFileCfg(usd_path=usd_path)
+    collision_prim = _spawn_from_usd_file(collision_prim_path, usd_path, collision_cfg)
+    if collision_prim.IsValid():
+        UsdGeom.Imageable(collision_prim).MakeInvisible()
+    sim_utils.define_collision_properties(
+        collision_prim_path,
+        sim_utils.CollisionPropertiesCfg(collision_enabled=True),
+        stage=stage,
+    )
+    sim_utils.bind_physics_material(collision_prim_path, physics_material_path, stage=stage)
+    return _apply_explicit_mesh_colliders(
+        stage=stage,
+        root_prim_path=collision_prim_path,
+        physics_material_path=physics_material_path,
+        approximation=approximation,
+    )
+
+
+@clone
+def _spawn_matterport_with_ground(prim_path, cfg, *args, **kwargs):
+    """Spawn Matterport USD + optional ground plane + indoor lighting.
+
+    Matterport 3D scans contain geometry and textures but no light
+    sources.  This function adds everything needed to render and
+    simulate inside a Matterport scene:
+      - The USD scene itself (visual mesh with textures).
+      - Optional invisible ground plane for fallback collision support.
+      - Indoor lighting matching NaVILA-Bench:
+          DomeLight(500)  — ambient fill under ceilings
+          DistantLight(1000) — directional light
+          DiskLight x2(10000) — area lights at ceiling height
+    """
+    from isaaclab.sim.spawners.from_files.from_files import _spawn_from_usd_file
+
+    prim = _spawn_from_usd_file(prim_path, cfg.usd_path, cfg, *args, **kwargs)
+
+    # Try the same top-level collider setup used by NaVILA-Bench.
+    collider_cfg = sim_utils.CollisionPropertiesCfg(collision_enabled=True)
+    sim_utils.define_collision_properties(prim.GetPrimPath(), collider_cfg)
+
+    physics_mat = sim_utils.RigidBodyMaterialCfg(
+        static_friction=1.0,
+        dynamic_friction=1.0,
+        friction_combine_mode="multiply",
+        restitution_combine_mode="multiply",
+    )
+    mat_path = f"{prim_path}/matterportPhysicsMaterial"
+    physics_mat.func(mat_path, physics_mat)
+    sim_utils.bind_physics_material(prim.GetPrimPath(), mat_path)
+
+    explicit_mesh_colliders = getattr(cfg, "explicit_mesh_colliders", False)
+    mesh_collider_approximation = getattr(cfg, "mesh_collider_approximation", "triangle")
+    explicit_mesh_count = 0
+    if explicit_mesh_colliders:
+        explicit_mesh_count = _apply_explicit_mesh_colliders(
+            stage=prim.GetStage(),
+            root_prim_path=str(prim.GetPrimPath()),
+            physics_material_path=mat_path,
+            approximation=mesh_collider_approximation,
+        )
+
+    collision_overlay_stats: list[str] = []
+    for overlay_prim_name, overlay_usd_path, overlay_label in _get_collision_overlay_specs(cfg):
+        collision_overlay_mesh_count = _spawn_collision_overlay(
+            prim_path=prim_path,
+            stage=prim.GetStage(),
+            usd_path=overlay_usd_path,
+            physics_material_path=mat_path,
+            approximation=mesh_collider_approximation,
+            overlay_prim_name=overlay_prim_name,
+        )
+        collision_overlay_stats.append(
+            f"{overlay_label}-overlay({collision_overlay_mesh_count}, {mesh_collider_approximation})"
+        )
+
+    ground_plane_z = getattr(cfg, "ground_plane_z", 0.0)
+    if ground_plane_z is not None:
+        ground_cfg = sim_utils.GroundPlaneCfg(
+            physics_material=physics_mat,
+            visible=False,
+        )
+        ground_cfg.func("/World/GroundPlane", ground_cfg)
+
+    # Indoor lighting — Matterport scenes have no built-in lights.
+    # Matches NaVILA-Bench h1_matterport_base_cfg.py lighting setup.
+    sim_utils.DomeLightCfg(intensity=500.0, color=(1.0, 1.0, 1.0)).func(
+        "/World/MatterportDomeLight", sim_utils.DomeLightCfg(intensity=500.0, color=(1.0, 1.0, 1.0))
+    )
+    sim_utils.DistantLightCfg(intensity=1000.0, color=(1.0, 1.0, 1.0)).func(
+        "/World/MatterportDistantLight", sim_utils.DistantLightCfg(intensity=1000.0, color=(1.0, 1.0, 1.0))
+    )
+    disk_cfg = sim_utils.DiskLightCfg(intensity=10000.0, color=(1.0, 1.0, 1.0), radius=50.0)
+    disk_cfg.func("/World/MatterportDisk1", disk_cfg)
+    disk_cfg.func("/World/MatterportDisk2", disk_cfg)
+    # Position the disk lights at ceiling height
+    from pxr import Gf
+    stage = prim.GetStage()
+    for path, pos in [("/World/MatterportDisk1", (0.0, 0.0, 2.6)), ("/World/MatterportDisk2", (-1.0, 0.0, 2.6))]:
+        disk_prim = stage.GetPrimAtPath(path)
+        if disk_prim.IsValid():
+            xformable = UsdGeom.Xformable(disk_prim)
+            ops = xformable.GetOrderedXformOps()
+            for op in ops:
+                if op.GetOpName() == "xformOp:translate":
+                    op.Set(Gf.Vec3d(*pos))
+                    break
+            else:
+                xformable.AddTranslateOp().Set(Gf.Vec3d(*pos))
+
+    ground_str = f"ground(z={ground_plane_z})" if ground_plane_z is not None else "ground(disabled)"
+    collider_str = "top-level collision"
+    if explicit_mesh_colliders:
+        collider_str += f" + explicit-mesh({explicit_mesh_count}, {mesh_collider_approximation})"
+    if collision_overlay_stats:
+        collider_str += " + " + " + ".join(collision_overlay_stats)
+    print(f"[MatterportBackground] {prim_path} + {collider_str} + {ground_str} + lighting")
+    return prim
+
+
+@register_asset
+class MatterportBackground(LibraryBackground):
+    """Matterport 3D scene shared globally across environments."""
+
+    name = "matterport"
+    tags = ["background"]
+    usd_path = None
+    initial_pose = Pose.identity()
+    object_min_z = -0.5
+
+    def __init__(
+        self,
+        usd_path: str,
+        ground_plane_z: float | None = 0.0,
+        use_global_prim: bool = False,
+        explicit_mesh_colliders: bool = False,
+        mesh_collider_approximation: str = "triangle",
+        collision_overlay_usd_path: str | None = None,
+        floor_collision_usd_path: str | None = None,
+        obstacle_collision_usd_path: str | None = None,
+    ):
+        if collision_overlay_usd_path and (floor_collision_usd_path or obstacle_collision_usd_path):
+            raise ValueError(
+                "Use either the legacy combined collision overlay or the split floor / "
+                "obstacle overlays, not both."
+            )
+        self.usd_path = usd_path
+        self.ground_plane_z = ground_plane_z
+        self.use_global_prim = use_global_prim
+        self.explicit_mesh_colliders = explicit_mesh_colliders
+        self.mesh_collider_approximation = mesh_collider_approximation
+        self.collision_overlay_usd_path = collision_overlay_usd_path
+        self.floor_collision_usd_path = floor_collision_usd_path
+        self.obstacle_collision_usd_path = obstacle_collision_usd_path
+        self.spawn_cfg_addon = {"func": _spawn_matterport_with_ground}
+        self.asset_cfg_addon = {"collision_group": -1} if use_global_prim else {}
+        prim_path = "/World/matterport" if use_global_prim else None
+        super().__init__(prim_path=prim_path)
+
+    def _generate_base_cfg(self) -> AssetBaseCfg:
+        """Generate an AssetBaseCfg, optionally using a shared global prim path."""
+        prim_path = "/World/matterport" if self.use_global_prim else self.prim_path
+        object_cfg = AssetBaseCfg(
+            prim_path=prim_path,
+            spawn=sim_utils.UsdFileCfg(
+                usd_path=self.usd_path,
+                scale=self.scale,
+                **self.spawn_cfg_addon,
+            ),
+            **self.asset_cfg_addon,
+        )
+        object_cfg = self._add_initial_pose_to_cfg(object_cfg)
+        object_cfg.spawn.ground_plane_z = self.ground_plane_z
+        object_cfg.spawn.explicit_mesh_colliders = self.explicit_mesh_colliders
+        object_cfg.spawn.mesh_collider_approximation = self.mesh_collider_approximation
+        object_cfg.spawn.collision_overlay_usd_path = self.collision_overlay_usd_path
+        object_cfg.spawn.floor_collision_usd_path = self.floor_collision_usd_path
+        object_cfg.spawn.obstacle_collision_usd_path = self.obstacle_collision_usd_path
+        return object_cfg
diff --git a/isaaclab_arena/embodiments/__init__.py b/isaaclab_arena/embodiments/__init__.py
index 5f3052fd2..7ab4b7da6 100644
--- a/isaaclab_arena/embodiments/__init__.py
+++ b/isaaclab_arena/embodiments/__init__.py
@@ -9,3 +9,4 @@
 from .g1.g1 import *
 from .galbot.galbot import *
 from .gr1t2.gr1t2 import *
+from .h1.h1 import *
diff --git a/isaaclab_arena/embodiments/h1/__init__.py b/isaaclab_arena/embodiments/h1/__init__.py
new file mode 100644
index 000000000..d373688a3
--- /dev/null
+++ b/isaaclab_arena/embodiments/h1/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
diff --git a/isaaclab_arena/embodiments/h1/h1.py b/isaaclab_arena/embodiments/h1/h1.py
new file mode 100644
index 000000000..24f40516f
--- /dev/null
+++ b/isaaclab_arena/embodiments/h1/h1.py
@@ -0,0 +1,143 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Standard H1 humanoid embodiment for IsaacLab Arena.
+
+Provides the Unitree H1 robot hardware configuration (articulation,
+actuators, contact sensors) that can be shared across different tasks.
+Task-specific extensions (observation layout, cameras, commands) are
+defined in task-specific modules (e.g. ``isaaclab_arena.embodiments.h1.h1_vln``).
+
+Usage::
+
+    from isaaclab_arena.embodiments.h1.h1 import H1SceneCfg, H1CameraCfg
+
+    class MyH1Embodiment(EmbodimentBase):
+        def __init__(self):
+            self.scene_config = H1SceneCfg()
+            self.camera_config = H1CameraCfg()
+            ...
+"""
+
+from __future__ import annotations
+
+from dataclasses import MISSING
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets.articulation.articulation_cfg import ArticulationCfg
+from isaaclab.sensors import CameraCfg, ContactSensorCfg, TiledCameraCfg
+from isaaclab.utils import configclass
+from isaaclab_assets import H1_MINIMAL_CFG
+
+from isaaclab_arena.utils.pose import Pose
+
+
+# ========================================================================== #
+# Scene: H1 robot articulation + contact sensors                             #
+# ========================================================================== #
+
+
+@configclass
+class H1SceneCfg:
+    """Scene configuration for the H1 humanoid.
+
+    Uses the official ``H1_MINIMAL_CFG`` from ``isaaclab_assets`` to ensure
+    joint names, init state, and actuator PD gains are consistent across
+    all H1-based tasks.
+    """
+
+    robot: ArticulationCfg = H1_MINIMAL_CFG.replace(
+        prim_path="{ENV_REGEX_NS}/Robot",
+    )
+
+    contact_forces: ContactSensorCfg = ContactSensorCfg(
+        prim_path="{ENV_REGEX_NS}/Robot/.*",
+        history_length=3,
+        track_air_time=True,
+        debug_vis=False,
+    )
+
+
+# ========================================================================== #
+# Camera: configurable head camera with optional follow camera               #
+# ========================================================================== #
+
+_DEFAULT_H1_CAMERA_OFFSET = Pose(
+    position_xyz=(0.1, 0.0, 0.5),
+    rotation_wxyz=(-0.5, 0.5, -0.5, 0.5),
+)
+
+_DEFAULT_H1_FOLLOW_CAMERA_OFFSET = Pose(
+    position_xyz=(-1.0, 0.0, 0.57),
+    rotation_wxyz=(-0.5, 0.5, -0.5, 0.5),
+)
+
+
+@configclass
+class H1CameraCfg:
+    """Configurable camera setup for the H1 humanoid.
+
+    Supports:
+      - ``robot_head_cam``: first-person camera on pelvis (always created).
+      - ``robot_follow_cam``: third-person follow camera behind the robot
+        (created only when ``_enable_follow_cam=True``).
+
+    Both camera positions can be customized via ``_camera_offset`` and
+    ``_follow_camera_offset``. Use ``_is_tiled_camera=True`` for parallel
+    multi-env evaluation.
+    """
+
+    robot_head_cam: CameraCfg | TiledCameraCfg = MISSING
+    robot_follow_cam: CameraCfg | TiledCameraCfg | None = None
+
+    def __post_init__(self):
+        is_tiled = getattr(self, "_is_tiled_camera", False)
+        cam_offset = getattr(self, "_camera_offset", _DEFAULT_H1_CAMERA_OFFSET)
+        enable_follow = getattr(self, "_enable_follow_cam", False)
+        follow_offset = getattr(self, "_follow_camera_offset", _DEFAULT_H1_FOLLOW_CAMERA_OFFSET)
+        head_camera_data_types = getattr(self, "_head_camera_data_types", ["rgb"])
+        follow_camera_data_types = getattr(self, "_follow_camera_data_types", ["rgb"])
+
+        CameraClass = TiledCameraCfg if is_tiled else CameraCfg
+        OffsetClass = CameraClass.OffsetCfg
+
+        cam_res = getattr(self, "_camera_resolution", 512)
+
+        # First-person head camera
+        self.robot_head_cam = CameraClass(
+            prim_path="{ENV_REGEX_NS}/Robot/pelvis/HeadCamera",
+            offset=OffsetClass(
+                pos=cam_offset.position_xyz,
+                rot=cam_offset.rotation_wxyz,
+                convention="ros",
+            ),
+            update_period=0.0,
+            height=cam_res,
+            width=cam_res,
+            data_types=list(head_camera_data_types),
+            spawn=sim_utils.PinholeCameraCfg(
+                horizontal_aperture=54.0,
+                clipping_range=(0.1, 10.0),
+            ),
+        )
+
+        # Third-person follow camera (optional)
+        if enable_follow:
+            self.robot_follow_cam = CameraClass(
+                prim_path="{ENV_REGEX_NS}/Robot/pelvis/FollowCamera",
+                offset=OffsetClass(
+                    pos=follow_offset.position_xyz,
+                    rot=follow_offset.rotation_wxyz,
+                    convention="ros",
+                ),
+                update_period=0.0,
+                height=cam_res,
+                width=cam_res,
+                data_types=list(follow_camera_data_types),
+                spawn=sim_utils.PinholeCameraCfg(
+                    horizontal_aperture=100.0,
+                    clipping_range=(0.1, 20.0),
+                ),
+            )
diff --git a/isaaclab_arena/embodiments/h1/h1_vln.py b/isaaclab_arena/embodiments/h1/h1_vln.py
new file mode 100644
index 000000000..196a70cf8
--- /dev/null
+++ b/isaaclab_arena/embodiments/h1/h1_vln.py
@@ -0,0 +1,253 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""H1 humanoid embodiment configured for VLN navigation.
+
+Extends the standard H1 embodiment (``isaaclab_arena.embodiments.h1``)
+with VLN-specific configuration:
+  - Observation layout matching the NaVILA low-level locomotion policy.
+  - Velocity command generator for VLM → LL policy integration.
+  - VLN-specific event handling.
+
+The observation vector layout is dictated by the pre-trained RSL-RL
+locomotion checkpoint and **cannot be reordered**::
+
+    [0:3]   base_lin_vel       (3)
+    [3:6]   base_ang_vel       (3)
+    [6:9]   projected_gravity  (3)
+    [9:12]  velocity_commands  (3)  ← VLM commands injected here
+    [12:31] joint_pos_rel      (19)
+    [31:50] joint_vel_rel      (19)
+    [50:69] last_action        (19)
+
+Camera and lighting are provided by the standard H1 and Matterport
+background respectively; this module only adds the observation/command
+configuration.
+"""
+
+from __future__ import annotations
+
+import math
+
+import isaaclab.envs.mdp as base_mdp
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import ObservationGroupCfg as ObsGroup
+from isaaclab.managers import ObservationTermCfg as ObsTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.sensors import RayCasterCfg, patterns
+from isaaclab.utils import configclass
+
+from isaaclab_arena.assets.register import register_asset
+from isaaclab_arena.embodiments.embodiment_base import EmbodimentBase
+from isaaclab_arena.embodiments.h1.h1 import (
+    H1CameraCfg,
+    H1SceneCfg,
+    _DEFAULT_H1_CAMERA_OFFSET,
+)
+from isaaclab_arena.utils.pose import Pose
+
+
+# ========================================================================== #
+# VLN observations                                                           #
+# ========================================================================== #
+# Layout must match the NaVILA RSL-RL locomotion policy training config.
+# See ``env.yaml`` in the training checkpoint directory for the canonical
+# definition.  Changing the order will break the pre-trained policy.
+
+
+@configclass
+class H1VlnObservationsCfg:
+    """Observation groups for the H1 VLN embodiment."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Proprioceptive observations consumed by the low-level locomotion policy."""
+
+        base_lin_vel = ObsTerm(func=base_mdp.base_lin_vel)
+        base_ang_vel = ObsTerm(func=base_mdp.base_ang_vel)
+        projected_gravity = ObsTerm(func=base_mdp.projected_gravity)
+        velocity_commands = ObsTerm(
+            func=base_mdp.generated_commands,
+            params={"command_name": "base_velocity"},
+        )
+        joint_pos = ObsTerm(func=base_mdp.joint_pos_rel)
+        joint_vel = ObsTerm(func=base_mdp.joint_vel_rel)
+        actions = ObsTerm(func=base_mdp.last_action)
+
+        def __post_init__(self):
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    @configclass
+    class ProprioCfg(ObsGroup):
+        """Duplicate proprio group for the history wrapper (NaVILA compatibility)."""
+
+        base_lin_vel = ObsTerm(func=base_mdp.base_lin_vel)
+        base_ang_vel = ObsTerm(func=base_mdp.base_ang_vel)
+        projected_gravity = ObsTerm(func=base_mdp.projected_gravity)
+        velocity_commands = ObsTerm(
+            func=base_mdp.generated_commands,
+            params={"command_name": "base_velocity"},
+        )
+        joint_pos = ObsTerm(func=base_mdp.joint_pos_rel)
+        joint_vel = ObsTerm(func=base_mdp.joint_vel_rel)
+        actions = ObsTerm(func=base_mdp.last_action)
+
+        def __post_init__(self):
+            self.enable_corruption = False
+            self.concatenate_terms = True
+
+    policy: PolicyCfg = PolicyCfg()
+    proprio: ProprioCfg = ProprioCfg()
+
+
+# ========================================================================== #
+# VLN actions                                                                #
+# ========================================================================== #
+
+
+@configclass
+class H1VlnActionCfg:
+    """Joint-position action space matching the NaVILA training config."""
+
+    joint_pos: base_mdp.JointPositionActionCfg = base_mdp.JointPositionActionCfg(
+        asset_name="robot",
+        joint_names=[".*"],
+        scale=0.5,
+        use_default_offset=True,
+    )
+
+
+# ========================================================================== #
+# VLN commands                                                               #
+# ========================================================================== #
+
+
+@configclass
+class H1VlnCommandsCfg:
+    """Velocity command generator.
+
+    Provides the ``base_velocity`` command term that the proprioceptive
+    observation ``velocity_commands`` reads from.  During VLN evaluation
+    the actual velocity command is injected by ``VlnPolicy`` directly
+    into the observation buffer (indices 9:12).
+    """
+
+    base_velocity: base_mdp.UniformVelocityCommandCfg = base_mdp.UniformVelocityCommandCfg(
+        asset_name="robot",
+        resampling_time_range=(10.0, 10.0),
+        rel_standing_envs=0.02,
+        rel_heading_envs=1.0,
+        heading_command=True,
+        heading_control_stiffness=0.5,
+        debug_vis=False,
+        ranges=base_mdp.UniformVelocityCommandCfg.Ranges(
+            lin_vel_x=(-1.0, 1.0),
+            lin_vel_y=(-1.0, 1.0),
+            ang_vel_z=(-1.0, 1.0),
+            heading=(-math.pi, math.pi),
+        ),
+    )
+
+
+# ========================================================================== #
+# VLN events                                                                 #
+# ========================================================================== #
+
+
+@configclass
+class H1VlnEventCfg:
+    """Reset events for the H1 VLN embodiment."""
+
+    reset_robot_joints: EventTerm = EventTerm(
+        func=base_mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "position_range": (0.0, 0.0),
+            "velocity_range": (0.0, 0.0),
+            "asset_cfg": SceneEntityCfg("robot"),
+        },
+    )
+
+
+@configclass
+class H1VlnSceneCfg(H1SceneCfg):
+    """H1 scene config with optional experimental debug sensors."""
+
+    height_scanner: RayCasterCfg | None = None
+
+
+# ========================================================================== #
+# VLN Embodiment                                                             #
+# ========================================================================== #
+
+
+@register_asset
+class H1VlnEmbodiment(EmbodimentBase):
+    """H1 humanoid embodiment for Vision-Language Navigation.
+
+    Extends the standard ``H1SceneCfg`` and ``H1CameraCfg`` from
+    ``isaaclab_arena.embodiments.h1`` with VLN-specific observations,
+    commands, and events.
+
+    Args:
+        enable_cameras: Enable camera sensors.
+        initial_pose: Robot initial pose.
+        camera_offset: First-person camera position on pelvis.
+        enable_follow_camera: Add a third-person follow camera for
+            visualization.  Default True for VLN.
+        follow_camera_offset: Follow camera position on pelvis.
+        use_tiled_camera: Use TiledCamera for parallel evaluation.
+    """
+
+    name = "h1_vln"
+
+    def __init__(
+        self,
+        enable_cameras: bool = True,
+        initial_pose: Pose | None = None,
+        camera_offset: Pose | None = _DEFAULT_H1_CAMERA_OFFSET,
+        enable_follow_camera: bool = True,
+        follow_camera_offset: Pose | None = None,
+        use_tiled_camera: bool = False,
+        camera_resolution: int = 512,
+        enable_head_depth: bool = False,
+        enable_height_scanner: bool = False,
+        height_scanner_debug_vis: bool = False,
+    ):
+        super().__init__(enable_cameras=enable_cameras, initial_pose=initial_pose)
+
+        # Robot hardware from standard H1
+        self.scene_config = H1VlnSceneCfg()
+        if enable_height_scanner:
+            self.scene_config.height_scanner = RayCasterCfg(
+                prim_path="{ENV_REGEX_NS}/Robot/pelvis",
+                offset=RayCasterCfg.OffsetCfg(pos=(0.0, 0.0, 0.5)),
+                attach_yaw_only=True,
+                pattern_cfg=patterns.GridPatternCfg(resolution=0.1, size=[1.6, 1.0]),
+                debug_vis=height_scanner_debug_vis,
+                mesh_prim_paths=["/World/matterport"],
+            )
+
+        # Cameras: head (required) + follow (optional).
+        # Private attributes must be set BEFORE construction because
+        # @configclass __post_init__ reads them at init time.
+        cam_cfg = H1CameraCfg.__new__(H1CameraCfg)
+        cam_cfg._is_tiled_camera = use_tiled_camera
+        cam_cfg._camera_resolution = camera_resolution
+        cam_cfg._camera_offset = camera_offset
+        cam_cfg._enable_follow_cam = enable_follow_camera
+        cam_cfg._head_camera_data_types = ["rgb", "distance_to_image_plane"] if enable_head_depth else ["rgb"]
+        cam_cfg._follow_camera_data_types = ["rgb"]
+        if follow_camera_offset is not None:
+            cam_cfg._follow_camera_offset = follow_camera_offset
+        cam_cfg.__init__()
+        self.camera_config = cam_cfg
+
+        # VLN-specific configs
+        self.action_config = H1VlnActionCfg()
+        self.observation_config = H1VlnObservationsCfg()
+        self.event_config = H1VlnEventCfg()
+        self.command_config = H1VlnCommandsCfg()
diff --git a/isaaclab_arena/evaluation/policy_runner.py b/isaaclab_arena/evaluation/policy_runner.py
index dec7e2ea2..6d03d760f 100644
--- a/isaaclab_arena/evaluation/policy_runner.py
+++ b/isaaclab_arena/evaluation/policy_runner.py
@@ -25,28 +25,32 @@ def get_policy_cls(policy_type: str) -> type["PolicyBase"]:
     """Get the policy class for the given policy type name.
 
     Note that this function:
-    - first: checks for a registered policy type in the PolicyRegistry
-    - if not found, it tries to dynamically import the policy class, treating
-      the policy_type argument as a string representing the module path and class name.
+    - prefers dynamically importing dotted module paths directly, which avoids
+      forcing unrelated asset libraries to register
+    - otherwise: checks for a registered policy type in the PolicyRegistry
 
     """
-    from isaaclab_arena.assets.asset_registry import PolicyRegistry
-
-    policy_registry = PolicyRegistry()
-    if policy_registry.is_registered(policy_type):
-        return policy_registry.get_policy(policy_type)
-    else:
-        print(f"Policy {policy_type} is not registered. Dynamically importing from path: {policy_type}")
+    if "." in policy_type:
+        print(f"Dynamically importing policy from path: {policy_type}")
         assert "." in policy_type, (
             "policy_type must be a dotted Python import path of the form 'module.submodule.ClassName', got:"
             f" {policy_type}"
         )
-        # Dynamically import the class from the string path
         module_path, class_name = policy_type.rsplit(".", 1)
         module = import_module(module_path)
         policy_cls = getattr(module, class_name)
         return policy_cls
 
+    from isaaclab_arena.assets.asset_registry import PolicyRegistry
+
+    policy_registry = PolicyRegistry()
+    if policy_registry.is_registered(policy_type):
+        return policy_registry.get_policy(policy_type)
+
+    raise ValueError(
+        f"Policy {policy_type!r} is neither a registered policy name nor a dotted Python import path."
+    )
+
 
 def is_distributed(args_cli: argparse.Namespace) -> bool:
     return (
diff --git a/isaaclab_arena/metrics/vln_metrics.py b/isaaclab_arena/metrics/vln_metrics.py
new file mode 100644
index 000000000..62b1308d9
--- /dev/null
+++ b/isaaclab_arena/metrics/vln_metrics.py
@@ -0,0 +1,427 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Standard Vision-Language Navigation metrics.
+
+Metrics implemented:
+  - PathLength:       mean cumulative Euclidean distance traversed in XY.
+  - DistanceToGoal:   approximate geodesic distance from the final robot
+                      position to the goal, estimated via the ground-truth
+                      reference path and a KDTree.
+  - OracleSuccess:    fraction of episodes where the trajectory comes within
+                      the success radius of the goal at least once.
+  - Success:          fraction of episodes where the agent explicitly
+                      issues STOP and the final DistanceToGoal < radius.
+  - SPL:              Success weighted by path efficiency.
+
+All metrics rely on a single recorder that logs the robot base position
+plus whether the agent has emitted STOP at every simulation step.
+"""
+
+from __future__ import annotations
+
+from typing import List, Sequence
+
+import numpy as np
+import torch
+from scipy.spatial import KDTree
+
+from isaaclab.envs.manager_based_rl_env import ManagerBasedEnv
+from isaaclab.managers.recorder_manager import RecorderTerm, RecorderTermCfg
+from isaaclab.utils import configclass
+
+from isaaclab_arena.metrics.metric_base import MetricBase
+
+
+# ========================================================================== #
+# Recorder: robot position per step                                          #
+# ========================================================================== #
+
+
+class RobotPositionRecorder(RecorderTerm):
+    """Record robot root position plus STOP state after each step.
+
+    The exported tensor has shape ``[N, 4]`` and stores:
+      - ``[..., 0:3]``: world-frame root position ``[x, y, z]``
+      - ``[..., 3]``:   STOP flag as ``0.0`` or ``1.0``
+
+    Keeping the STOP state in the same recorder lets offline metrics follow
+    the Anderson et al. protocol: an episode is successful only if the agent
+    explicitly signals completion.
+    """
+
+    name = "robot_position_w"
+
+    def __init__(self, cfg: RecorderTermCfg, env: ManagerBasedEnv):
+        super().__init__(cfg, env)
+
+    def record_post_step(self):
+        """Return (key, data) after each simulation step."""
+        root_pos = self._env.scene["robot"].data.root_pos_w
+        stop_flag = self._env.extras.get("vln_stop_called")
+        if stop_flag is None:
+            stop_flag = np.zeros((self._env.num_envs,), dtype=np.float32)
+        if hasattr(stop_flag, "to"):
+            stop_tensor = stop_flag.to(device=root_pos.device, dtype=root_pos.dtype)
+        else:
+            stop_tensor = torch.as_tensor(stop_flag, device=root_pos.device, dtype=root_pos.dtype)
+        stop_tensor = stop_tensor.unsqueeze(-1)
+        return self.name, torch.cat((root_pos, stop_tensor), dim=-1).clone()
+
+
+@configclass
+class RobotPositionRecorderCfg(RecorderTermCfg):
+    """Config for :class:`RobotPositionRecorder`."""
+
+    class_type: type[RecorderTerm] = RobotPositionRecorder
+
+
+# ========================================================================== #
+# Helpers                                                                    #
+# ========================================================================== #
+
+
+def _episode_state_array(episode_data: np.ndarray) -> np.ndarray:
+    """Normalize recorded per-step state data to ``[T, D]``."""
+    arr = np.asarray(episode_data)
+    if arr.ndim == 3:
+        # [T, num_envs, D] -> take env 0
+        arr = arr[:, 0, :]
+    return arr
+
+
+def _episode_pos_array(episode_data: np.ndarray) -> np.ndarray:
+    """Extract recorded positions as an array of shape ``[T, 3]``."""
+    arr = _episode_state_array(episode_data)
+    if arr.ndim != 2 or arr.shape[-1] < 3:
+        raise ValueError(f"Expected episode data with at least 3 columns, got shape {arr.shape}")
+    return arr[:, :3]
+
+
+def _episode_stop_called(episode_data: np.ndarray) -> bool | None:
+    """Return whether STOP was emitted in this episode.
+
+    Returns ``None`` when reading legacy recordings that do not contain the
+    STOP flag column. New recordings export STOP in column 3.
+    """
+    arr = _episode_state_array(episode_data)
+    if arr.ndim != 2 or arr.shape[0] == 0 or arr.shape[-1] < 4:
+        return None
+    return bool(np.any(arr[:, 3] > 0.5))
+
+
+def _xy(arr: np.ndarray) -> np.ndarray:
+    """Project positions to the XY (horizontal) plane.
+
+    **Why XY only?**
+    The robot's ``root_pos_w`` reports the *pelvis* height (~0.9m above
+    ground), while the reference-path waypoints from the VLN-CE dataset
+    record *floor-level* positions (~0.17m).  Using full 3-D distance
+    would inflate the KDTree matching error and make geodesic distances
+    unreliable.
+
+    **Current dataset**: ``vln_ce_isaac_v1.json.gz`` contains only
+    single-floor navigation episodes (start-z values vary by < 0.1m
+    within each scene).  XY distance is the standard metric for
+    single-floor VLN benchmarks (consistent with Habitat VLN-CE).
+
+    **Future multi-floor support**: If episodes with stairs or
+    multiple floors are added, this function should be replaced by
+    a proper 3-D geodesic computation that accounts for the
+    pelvis-to-floor offset (subtract ~0.88m from robot z before
+    comparing with waypoint z).
+    """
+    return arr[..., :2]
+
+
+def _path_length_xy(pos: np.ndarray) -> float:
+    """Return cumulative XY path length for one episode."""
+    pos_xy = _xy(pos)
+    if pos_xy.shape[0] < 2:
+        return 0.0
+    return float(np.linalg.norm(pos_xy[1:] - pos_xy[:-1], axis=-1).sum())
+
+
+def _reference_path_suffix_lengths(gt_wps_xy: np.ndarray) -> np.ndarray:
+    """Return suffix lengths from each waypoint to the goal."""
+    if gt_wps_xy.shape[0] == 0:
+        return np.zeros((0,), dtype=np.float32)
+    suffix = np.zeros((gt_wps_xy.shape[0],), dtype=np.float32)
+    if gt_wps_xy.shape[0] > 1:
+        segment_lengths = np.linalg.norm(gt_wps_xy[1:] - gt_wps_xy[:-1], axis=-1)
+        suffix[:-1] = np.cumsum(segment_lengths[::-1], dtype=np.float32)[::-1]
+    return suffix
+
+
+def _approx_goal_distances_along_reference(pos: np.ndarray, gt_wps: np.ndarray) -> np.ndarray:
+    """Approximate geodesic distances from episode positions to the goal.
+
+    For each recorded position, we:
+      1. Find the closest reference waypoint in XY via KDTree.
+      2. Add the remaining reference-path suffix length from that waypoint.
+    """
+    pos_xy = _xy(pos)
+    if pos_xy.shape[0] == 0:
+        return np.zeros((0,), dtype=np.float32)
+
+    gt_wps_xy = _xy(gt_wps)
+    if gt_wps_xy.shape[0] == 0:
+        return np.zeros((pos_xy.shape[0],), dtype=np.float32)
+
+    tree = KDTree(gt_wps_xy)
+    suffix_lengths = _reference_path_suffix_lengths(gt_wps_xy)
+    closest_dist, closest_idx = tree.query(pos_xy)
+    closest_idx = np.asarray(closest_idx, dtype=np.int64)
+    return np.asarray(closest_dist, dtype=np.float32) + suffix_lengths[closest_idx]
+
+
+# ========================================================================== #
+# Path Length                                                                #
+# ========================================================================== #
+
+
+class PathLengthMetric(MetricBase):
+    """Mean cumulative Euclidean path length over recorded episodes."""
+
+    name = "path_length"
+    recorder_term_name = RobotPositionRecorder.name
+
+    def get_recorder_term_cfg(self) -> RecorderTermCfg:
+        return RobotPositionRecorderCfg()
+
+    def compute_metric_from_recording(self, recorded_metric_data: List[np.ndarray]) -> float:
+        if not recorded_metric_data:
+            return 0.0
+
+        lengths: List[float] = []
+        for ep_data in recorded_metric_data:
+            pos = _episode_pos_array(ep_data)
+            lengths.append(_path_length_xy(pos))
+        return float(np.mean(lengths))
+
+
+# ========================================================================== #
+# Distance-To-Goal                                                           #
+# ========================================================================== #
+
+
+class DistanceToGoalMetric(MetricBase):
+    """Approximate geodesic distance from the final robot position to the goal.
+
+    Habitat-Lab defines distance-to-goal as a simulator geodesic query. In the
+    current Isaac Sim benchmark we do not yet have an equivalent navmesh
+    backend, so we estimate the distance by:
+      1. Finding the closest ground-truth waypoint to the robot via KDTree.
+      2. Summing the segment lengths from that waypoint to the last waypoint.
+    """
+
+    name = "distance_to_goal"
+    recorder_term_name = RobotPositionRecorder.name
+
+    def __init__(self, gt_waypoints_per_episode: Sequence[Sequence[Sequence[float]]]):
+        """
+        Args:
+            gt_waypoints_per_episode: For each episode ``i``, a list of 3-D
+                waypoints ``[[x, y, z], ...]`` describing the reference path.
+        """
+        super().__init__()
+        self._gt_waypoints = [np.asarray(wps, dtype=np.float32) for wps in gt_waypoints_per_episode]
+
+    def get_recorder_term_cfg(self) -> RecorderTermCfg:
+        return RobotPositionRecorderCfg()
+
+    def compute_metric_from_recording(self, recorded_metric_data: List[np.ndarray]) -> float:
+        if not recorded_metric_data:
+            return 0.0
+        num_eps = min(len(recorded_metric_data), len(self._gt_waypoints))
+
+        distances: List[float] = []
+        for ep_idx in range(num_eps):
+            ep_data = recorded_metric_data[ep_idx]
+            pos = _episode_pos_array(ep_data)
+            dists = _approx_goal_distances_along_reference(pos, self._gt_waypoints[ep_idx])
+            distances.append(float(dists[-1]) if dists.size > 0 else 0.0)
+
+        return float(np.mean(distances))
+
+
+# ========================================================================== #
+# Oracle Success                                                             #
+# ========================================================================== #
+
+
+class OracleSuccessMetric(MetricBase):
+    """Oracle Success from Habitat/VLN-CE-style distance-to-goal traces.
+
+    This follows the standard embodied-navigation definition:
+
+    ``oracle_success = 1[min_t d_t < r]``
+
+    where ``d_t`` is the per-step distance-to-goal measure and ``r`` is the
+    success radius. In this Arena benchmark, ``d_t`` currently comes from the
+    same approximate distance surrogate used by :class:`DistanceToGoalMetric`.
+    """
+
+    name = "oracle_success"
+    recorder_term_name = RobotPositionRecorder.name
+
+    def __init__(
+        self,
+        gt_waypoints_per_episode: Sequence[Sequence[Sequence[float]]],
+        success_radius_per_episode: Sequence[float],
+    ):
+        super().__init__()
+        assert len(gt_waypoints_per_episode) == len(success_radius_per_episode)
+        self._gt_waypoints = [np.asarray(wps, dtype=np.float32) for wps in gt_waypoints_per_episode]
+        self._radii = list(success_radius_per_episode)
+
+    def get_recorder_term_cfg(self) -> RecorderTermCfg:
+        return RobotPositionRecorderCfg()
+
+    def compute_metric_from_recording(self, recorded_metric_data: List[np.ndarray]) -> float:
+        if not recorded_metric_data:
+            return 0.0
+        num_eps = min(len(recorded_metric_data), len(self._radii))
+
+        successes: List[float] = []
+        for ep_idx in range(num_eps):
+            pos = _episode_pos_array(recorded_metric_data[ep_idx])
+            dists = _approx_goal_distances_along_reference(pos, self._gt_waypoints[ep_idx])
+            if dists.size == 0:
+                successes.append(0.0)
+                continue
+            oracle_dist = float(np.min(dists))
+            successes.append(1.0 if oracle_dist < self._radii[ep_idx] else 0.0)
+        return float(np.mean(successes))
+
+
+# ========================================================================== #
+# Success                                                                    #
+# ========================================================================== #
+
+
+class SuccessMetric(MetricBase):
+    """Fraction of episodes that STOP near the goal.
+
+    By default this follows the embodied-navigation recommendation from
+    Anderson et al.: an episode is successful only if the agent explicitly
+    emits STOP and its final distance-to-goal is within the success radius.
+    In this benchmark the final distance is currently the same approximate
+    surrogate used by :class:`DistanceToGoalMetric`.
+    """
+
+    name = "success"
+    recorder_term_name = RobotPositionRecorder.name
+
+    def __init__(
+        self,
+        gt_waypoints_per_episode: Sequence[Sequence[Sequence[float]]],
+        success_radius_per_episode: Sequence[float],
+        require_stop_signal: bool = True,
+    ):
+        super().__init__()
+        assert len(gt_waypoints_per_episode) == len(success_radius_per_episode)
+        self._dist_metric = DistanceToGoalMetric(gt_waypoints_per_episode)
+        self._radii = list(success_radius_per_episode)
+        self._require_stop_signal = require_stop_signal
+
+    def get_recorder_term_cfg(self) -> RecorderTermCfg:
+        return self._dist_metric.get_recorder_term_cfg()
+
+    def _compute_episode_success(self, ep_idx: int, ep_data: np.ndarray) -> float:
+        """Return success for one episode using the final recorded position."""
+        stop_called = _episode_stop_called(ep_data)
+        if self._require_stop_signal and stop_called is False:
+            return 0.0
+
+        pos = _episode_pos_array(ep_data)
+        dists = _approx_goal_distances_along_reference(pos, self._dist_metric._gt_waypoints[ep_idx])
+        if dists.size == 0:
+            return 0.0
+        final_dist = float(dists[-1])
+        return 1.0 if final_dist < self._radii[ep_idx] else 0.0
+
+    def compute_metric_from_recording(self, recorded_metric_data: List[np.ndarray]) -> float:
+        if not recorded_metric_data:
+            return 0.0
+        num_eps = min(len(recorded_metric_data), len(self._radii))
+
+        successes: List[float] = []
+        for ep_idx in range(num_eps):
+            successes.append(self._compute_episode_success(ep_idx, recorded_metric_data[ep_idx]))
+        return float(np.mean(successes))
+
+
+# ========================================================================== #
+# SPL (Success weighted by Path Length)                                      #
+# ========================================================================== #
+
+
+class SPLMetric(MetricBase):
+    r"""SPL = mean_i [ S_i * d_i / max(d_i, l_i) ].
+
+    Where:
+      - S_i: binary success indicator
+      - d_i: shortest-path distance from episode start to goal
+      - l_i: actual path length
+    """
+
+    name = "spl"
+    recorder_term_name = RobotPositionRecorder.name
+
+    def __init__(
+        self,
+        gt_waypoints_per_episode: Sequence[Sequence[Sequence[float]]],
+        success_radius_per_episode: Sequence[float],
+        shortest_path_distance_per_episode: Sequence[float] | None = None,
+        require_stop_signal: bool = True,
+    ):
+        super().__init__()
+        assert len(gt_waypoints_per_episode) == len(success_radius_per_episode)
+
+        self._shortest: List[float] = []
+        if shortest_path_distance_per_episode is not None:
+            assert len(shortest_path_distance_per_episode) == len(gt_waypoints_per_episode)
+            self._shortest = [max(0.0, float(d)) for d in shortest_path_distance_per_episode]
+        else:
+            # Fallback for datasets that do not expose the official start-goal
+            # geodesic distance. This uses the reference path length as a proxy.
+            for wps in gt_waypoints_per_episode:
+                arr = _xy(np.asarray(wps, dtype=np.float32))
+                suffix_lengths = _reference_path_suffix_lengths(arr)
+                self._shortest.append(float(suffix_lengths[0]) if suffix_lengths.size > 0 else 0.0)
+
+        self._success_metric = SuccessMetric(
+            gt_waypoints_per_episode,
+            success_radius_per_episode,
+            require_stop_signal=require_stop_signal,
+        )
+
+    def get_recorder_term_cfg(self) -> RecorderTermCfg:
+        return RobotPositionRecorderCfg()
+
+    def compute_metric_from_recording(self, recorded_metric_data: List[np.ndarray]) -> float:
+        if not recorded_metric_data:
+            return 0.0
+        num_eps = min(len(recorded_metric_data), len(self._shortest))
+
+        spl_values: List[float] = []
+        for i in range(num_eps):
+            ep_data = recorded_metric_data[i]
+            pos = _episode_pos_array(ep_data)
+
+            # Actual path length l_i (XY)
+            l_i = _path_length_xy(pos)
+
+            # Success S_i
+            s_i = self._success_metric._compute_episode_success(i, ep_data)
+
+            d_i = self._shortest[i]
+            if d_i <= 0.0:
+                spl_values.append(0.0)
+            else:
+                spl_values.append(float(s_i * d_i / max(d_i, l_i if l_i > 0.0 else d_i)))
+
+        return float(np.mean(spl_values))
diff --git a/isaaclab_arena/policy/__init__.py b/isaaclab_arena/policy/__init__.py
index 66caa51a1..bb2b811e8 100644
--- a/isaaclab_arena/policy/__init__.py
+++ b/isaaclab_arena/policy/__init__.py
@@ -3,8 +3,39 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+"""Policy package exports.
+
+This module avoids eager importing of all policy implementations because some
+policies trigger asset registration side effects during import. Training code
+that only needs lightweight helpers (for example `get_agent_cfg`) should not
+pull in remote asset registry dependencies.
+"""
+
+from __future__ import annotations
+
+from importlib import import_module
+
 from .action_chunking import ActionChunkingState
-from .action_chunking_client import *
-from .replay_action_policy import *
-from .rsl_rl_action_policy import *
-from .zero_action_policy import *
+
+__all__ = [
+    "ActionChunkingState",
+    "ActionChunkingClientSidePolicy",
+    "ReplayActionPolicy",
+    "RslRlActionPolicy",
+    "ZeroActionPolicy",
+]
+
+
+_LAZY_IMPORTS = {
+    "ActionChunkingClientSidePolicy": "isaaclab_arena.policy.action_chunking_client",
+    "ReplayActionPolicy": "isaaclab_arena.policy.replay_action_policy",
+    "RslRlActionPolicy": "isaaclab_arena.policy.rsl_rl_action_policy",
+    "ZeroActionPolicy": "isaaclab_arena.policy.zero_action_policy",
+}
+
+
+def __getattr__(name: str):
+    if name not in _LAZY_IMPORTS:
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+    module = import_module(_LAZY_IMPORTS[name])
+    return getattr(module, name)
diff --git a/isaaclab_arena/policy/vln/__init__.py b/isaaclab_arena/policy/vln/__init__.py
new file mode 100644
index 000000000..fb5d6f3b3
--- /dev/null
+++ b/isaaclab_arena/policy/vln/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from isaaclab_arena.policy.vln.vln_vlm_locomotion_policy import VlnVlmLocomotionPolicy  # noqa: F401
diff --git a/isaaclab_arena/policy/vln/pretrained/h1_navila_agent.yaml b/isaaclab_arena/policy/vln/pretrained/h1_navila_agent.yaml
new file mode 100644
index 000000000..32ed38c5b
--- /dev/null
+++ b/isaaclab_arena/policy/vln/pretrained/h1_navila_agent.yaml
@@ -0,0 +1,53 @@
+seed: 42
+device: cuda:0
+num_steps_per_env: 32
+max_iterations: 1000
+empirical_normalization: null
+obs_groups:
+  policy:
+  - policy
+  critic:
+  - policy
+clip_actions: null
+save_interval: 50
+experiment_name: h1_base_rough
+run_name: ''
+logger: tensorboard
+neptune_project: isaaclab
+wandb_project: isaaclab
+resume: false
+load_run: .*
+load_checkpoint: model_.*.pt
+class_name: OnPolicyRunner
+policy:
+  class_name: ActorCritic
+  init_noise_std: 1.0
+  noise_std_type: scalar
+  actor_obs_normalization: false
+  critic_obs_normalization: false
+  actor_hidden_dims:
+  - 512
+  - 256
+  - 128
+  critic_hidden_dims:
+  - 512
+  - 256
+  - 128
+  activation: elu
+algorithm:
+  class_name: PPO
+  num_learning_epochs: 5
+  num_mini_batches: 4
+  learning_rate: 0.001
+  schedule: adaptive
+  gamma: 0.99
+  lam: 0.95
+  entropy_coef: 0.005
+  desired_kl: 0.01
+  max_grad_norm: 1.0
+  value_loss_coef: 1.0
+  use_clipped_value_loss: true
+  clip_param: 0.2
+  normalize_advantage_per_mini_batch: false
+  rnd_cfg: null
+  symmetry_cfg: null
diff --git a/isaaclab_arena/policy/vln/pretrained/h1_navila_env.yaml b/isaaclab_arena/policy/vln/pretrained/h1_navila_env.yaml
new file mode 100644
index 000000000..31a8869e7
--- /dev/null
+++ b/isaaclab_arena/policy/vln/pretrained/h1_navila_env.yaml
@@ -0,0 +1,1962 @@
+viewer:
+  eye: !!python/tuple
+  - 7.5
+  - 7.5
+  - 7.5
+  lookat: !!python/tuple
+  - 0.0
+  - 0.0
+  - 0.0
+  cam_prim_path: /OmniverseKit_Persp
+  resolution: !!python/tuple
+  - 1280
+  - 720
+  origin_type: world
+  env_index: 0
+  asset_name: null
+  body_name: null
+sim:
+  physics_prim_path: /physicsScene
+  device: cuda:0
+  dt: 0.005
+  render_interval: 4
+  gravity: !!python/tuple
+  - 0.0
+  - 0.0
+  - -9.81
+  enable_scene_query_support: false
+  use_fabric: true
+  physx:
+    solver_type: 1
+    min_position_iteration_count: 1
+    max_position_iteration_count: 255
+    min_velocity_iteration_count: 0
+    max_velocity_iteration_count: 255
+    enable_ccd: false
+    enable_stabilization: true
+    enable_enhanced_determinism: false
+    bounce_threshold_velocity: 0.5
+    friction_offset_threshold: 0.04
+    friction_correlation_distance: 0.025
+    gpu_max_rigid_contact_count: 8388608
+    gpu_max_rigid_patch_count: 327680
+    gpu_found_lost_pairs_capacity: 2097152
+    gpu_found_lost_aggregate_pairs_capacity: 33554432
+    gpu_total_aggregate_pairs_capacity: 2097152
+    gpu_collision_stack_size: 67108864
+    gpu_heap_capacity: 67108864
+    gpu_temp_buffer_capacity: 16777216
+    gpu_max_num_partitions: 8
+    gpu_max_soft_body_contacts: 1048576
+    gpu_max_particle_contacts: 1048576
+    solve_articulation_contact_last: false
+  physics_material:
+    func: isaaclab.sim.spawners.materials.physics_materials:spawn_rigid_body_material
+    static_friction: 1.0
+    dynamic_friction: 1.0
+    restitution: 0.0
+    friction_combine_mode: multiply
+    restitution_combine_mode: multiply
+    compliant_contact_stiffness: 0.0
+    compliant_contact_damping: 0.0
+  render:
+    enable_translucency: null
+    enable_reflections: null
+    enable_global_illumination: null
+    antialiasing_mode: null
+    enable_dlssg: null
+    enable_dl_denoiser: null
+    dlss_mode: null
+    enable_direct_lighting: null
+    samples_per_pixel: null
+    enable_shadows: null
+    enable_ambient_occlusion: null
+    dome_light_upper_lower_strategy: null
+    carb_settings: null
+    rendering_mode: null
+  create_stage_in_memory: false
+ui_window_class_type: isaaclab.envs.ui.manager_based_rl_env_window:ManagerBasedRLEnvWindow
+seed: 42
+decimation: 4
+scene:
+  num_envs: 4096
+  env_spacing: 2.5
+  lazy_sensor_update: true
+  replicate_physics: false
+  filter_collisions: true
+  clone_in_fabric: false
+  terrain:
+    class_type: isaaclab.terrains.terrain_importer:TerrainImporter
+    collision_group: -1
+    prim_path: /World/ground
+    num_envs: 4096
+    terrain_type: generator
+    terrain_generator:
+      class_type: isaaclab.terrains.terrain_generator:TerrainGenerator
+      seed: null
+      curriculum: true
+      size: !!python/tuple
+      - 8.0
+      - 8.0
+      border_width: 20.0
+      border_height: 1.0
+      num_rows: 10
+      num_cols: 20
+      color_scheme: none
+      horizontal_scale: 0.1
+      vertical_scale: 0.005
+      slope_threshold: 0.75
+      sub_terrains:
+        random_rough:
+          function: isaaclab.terrains.height_field.hf_terrains:random_uniform_terrain
+          proportion: 1.0
+          size: !!python/tuple
+          - 8.0
+          - 8.0
+          flat_patch_sampling: null
+          border_width: 0.0
+          horizontal_scale: 0.1
+          vertical_scale: 0.005
+          slope_threshold: 0.75
+          noise_range: !!python/tuple
+          - 0.02
+          - 0.1
+          noise_step: 0.02
+          downsampled_scale: null
+      difficulty_range: !!python/tuple
+      - 0.0
+      - 1.0
+      use_cache: false
+      cache_dir: /tmp/isaaclab/terrains
+    usd_path: null
+    env_spacing: 2.5
+    visual_material:
+      func: isaaclab.sim.spawners.materials.visual_materials:spawn_from_mdl_file
+      mdl_path: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/IsaacLab/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl
+      project_uvw: true
+      albedo_brightness: null
+      texture_scale: !!python/tuple
+      - 0.25
+      - 0.25
+    physics_material:
+      func: isaaclab.sim.spawners.materials.physics_materials:spawn_rigid_body_material
+      static_friction: 1.0
+      dynamic_friction: 1.0
+      restitution: 0.0
+      friction_combine_mode: multiply
+      restitution_combine_mode: multiply
+      compliant_contact_stiffness: 0.0
+      compliant_contact_damping: 0.0
+    max_init_terrain_level: 5
+    debug_vis: false
+  robot:
+    class_type: isaaclab.assets.articulation.articulation:Articulation
+    prim_path: /World/envs/env_.*/Robot
+    spawn:
+      func: isaaclab.sim.spawners.from_files.from_files:spawn_from_usd
+      visible: true
+      semantic_tags: null
+      copy_from_source: true
+      mass_props: null
+      deformable_props: null
+      rigid_props:
+        rigid_body_enabled: null
+        kinematic_enabled: null
+        disable_gravity: false
+        linear_damping: 0.0
+        angular_damping: 0.0
+        max_linear_velocity: 1000.0
+        max_angular_velocity: 1000.0
+        max_depenetration_velocity: 1.0
+        max_contact_impulse: null
+        enable_gyroscopic_forces: null
+        retain_accelerations: false
+        solver_position_iteration_count: null
+        solver_velocity_iteration_count: null
+        sleep_threshold: null
+        stabilization_threshold: null
+      collision_props: null
+      activate_contact_sensors: true
+      scale: null
+      articulation_props:
+        articulation_enabled: null
+        enabled_self_collisions: false
+        solver_position_iteration_count: 4
+        solver_velocity_iteration_count: 4
+        sleep_threshold: null
+        stabilization_threshold: null
+        fix_root_link: null
+      fixed_tendons_props: null
+      spatial_tendons_props: null
+      joint_drive_props: null
+      visual_material_path: material
+      visual_material: null
+      usd_path: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/IsaacLab/Robots/Unitree/H1/h1_minimal.usd
+      variants: null
+    init_state:
+      pos: !!python/tuple
+      - 0.0
+      - 0.0
+      - 1.05
+      rot: !!python/tuple
+      - 1.0
+      - 0.0
+      - 0.0
+      - 0.0
+      lin_vel: !!python/tuple
+      - 0.0
+      - 0.0
+      - 0.0
+      ang_vel: !!python/tuple
+      - 0.0
+      - 0.0
+      - 0.0
+      joint_pos:
+        .*_hip_yaw: 0.0
+        .*_hip_roll: 0.0
+        .*_hip_pitch: -0.28
+        .*_knee: 0.79
+        .*_ankle: -0.52
+        torso: 0.0
+        .*_shoulder_pitch: 0.28
+        .*_shoulder_roll: 0.0
+        .*_shoulder_yaw: 0.0
+        .*_elbow: 0.52
+      joint_vel:
+        .*: 0.0
+    collision_group: 0
+    debug_vis: false
+    articulation_root_prim_path: null
+    soft_joint_pos_limit_factor: 0.9
+    actuators:
+      legs:
+        class_type: isaaclab.actuators.actuator_pd:ImplicitActuator
+        joint_names_expr:
+        - .*_hip_yaw
+        - .*_hip_roll
+        - .*_hip_pitch
+        - .*_knee
+        - torso
+        effort_limit: null
+        velocity_limit: null
+        effort_limit_sim: 300
+        velocity_limit_sim: null
+        stiffness:
+          .*_hip_yaw: 150.0
+          .*_hip_roll: 150.0
+          .*_hip_pitch: 200.0
+          .*_knee: 200.0
+          torso: 200.0
+        damping:
+          .*_hip_yaw: 5.0
+          .*_hip_roll: 5.0
+          .*_hip_pitch: 5.0
+          .*_knee: 5.0
+          torso: 5.0
+        armature: null
+        friction: null
+        dynamic_friction: null
+        viscous_friction: null
+      feet:
+        class_type: isaaclab.actuators.actuator_pd:ImplicitActuator
+        joint_names_expr:
+        - .*_ankle
+        effort_limit: null
+        velocity_limit: null
+        effort_limit_sim: 100
+        velocity_limit_sim: null
+        stiffness:
+          .*_ankle: 20.0
+        damping:
+          .*_ankle: 4.0
+        armature: null
+        friction: null
+        dynamic_friction: null
+        viscous_friction: null
+      arms:
+        class_type: isaaclab.actuators.actuator_pd:ImplicitActuator
+        joint_names_expr:
+        - .*_shoulder_pitch
+        - .*_shoulder_roll
+        - .*_shoulder_yaw
+        - .*_elbow
+        effort_limit: null
+        velocity_limit: null
+        effort_limit_sim: 300
+        velocity_limit_sim: null
+        stiffness:
+          .*_shoulder_pitch: 40.0
+          .*_shoulder_roll: 40.0
+          .*_shoulder_yaw: 40.0
+          .*_elbow: 40.0
+        damping:
+          .*_shoulder_pitch: 10.0
+          .*_shoulder_roll: 10.0
+          .*_shoulder_yaw: 10.0
+          .*_elbow: 10.0
+        armature: null
+        friction: null
+        dynamic_friction: null
+        viscous_friction: null
+    actuator_value_resolution_debug_print: false
+  contact_forces:
+    class_type: isaaclab.sensors.contact_sensor.contact_sensor:ContactSensor
+    prim_path: /World/envs/env_.*/Robot/.*
+    update_period: 0.0
+    history_length: 3
+    debug_vis: false
+    track_pose: false
+    track_contact_points: false
+    max_contact_data_count_per_prim: 4
+    track_air_time: true
+    force_threshold: 1.0
+    filter_prim_paths_expr: []
+    visualizer_cfg:
+      prim_path: /Visuals/ContactSensor
+      markers:
+        contact:
+          func: isaaclab.sim.spawners.shapes.shapes:spawn_sphere
+          visible: true
+          semantic_tags: null
+          copy_from_source: true
+          mass_props: null
+          rigid_props: null
+          collision_props: null
+          activate_contact_sensors: false
+          visual_material_path: material
+          visual_material:
+            func: isaaclab.sim.spawners.materials.visual_materials:spawn_preview_surface
+            diffuse_color: !!python/tuple
+            - 1.0
+            - 0.0
+            - 0.0
+            emissive_color: !!python/tuple
+            - 0.0
+            - 0.0
+            - 0.0
+            roughness: 0.5
+            metallic: 0.0
+            opacity: 1.0
+          physics_material_path: material
+          physics_material: null
+          radius: 0.02
+        no_contact:
+          func: isaaclab.sim.spawners.shapes.shapes:spawn_sphere
+          visible: false
+          semantic_tags: null
+          copy_from_source: true
+          mass_props: null
+          rigid_props: null
+          collision_props: null
+          activate_contact_sensors: false
+          visual_material_path: material
+          visual_material:
+            func: isaaclab.sim.spawners.materials.visual_materials:spawn_preview_surface
+            diffuse_color: !!python/tuple
+            - 0.0
+            - 1.0
+            - 0.0
+            emissive_color: !!python/tuple
+            - 0.0
+            - 0.0
+            - 0.0
+            roughness: 0.5
+            metallic: 0.0
+            opacity: 1.0
+          physics_material_path: material
+          physics_material: null
+          radius: 0.02
+  sky_light:
+    class_type: null
+    prim_path: /World/skyLight
+    spawn:
+      func: isaaclab.sim.spawners.lights.lights:spawn_light
+      visible: true
+      semantic_tags: null
+      copy_from_source: true
+      prim_type: DomeLight
+      color: !!python/tuple
+      - 1.0
+      - 1.0
+      - 1.0
+      enable_color_temperature: false
+      color_temperature: 6500.0
+      normalize: false
+      exposure: 0.0
+      intensity: 750.0
+      texture_file: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr
+      texture_format: automatic
+      visible_in_primary_ray: true
+    init_state:
+      pos: !!python/tuple
+      - 0.0
+      - 0.0
+      - 0.0
+      rot: !!python/tuple
+      - 1.0
+      - 0.0
+      - 0.0
+      - 0.0
+    collision_group: 0
+    debug_vis: false
+recorders:
+  dataset_file_handler_class_type: isaaclab.utils.datasets.hdf5_dataset_file_handler:HDF5DatasetFileHandler
+  dataset_export_dir_path: /tmp/isaaclab/logs
+  dataset_filename: dataset
+  dataset_export_mode:
+    _value_: 1
+    _name_: EXPORT_ALL
+    _sort_order_: 1
+  export_in_record_pre_reset: true
+observations:
+  policy:
+    concatenate_terms: true
+    concatenate_dim: -1
+    enable_corruption: true
+    history_length: null
+    flatten_history_dim: true
+    base_lin_vel:
+      func: isaaclab.envs.mdp.observations:base_lin_vel
+      params: {}
+      modifiers: null
+      noise:
+        func: isaaclab.utils.noise.noise_model:uniform_noise
+        operation: add
+        n_min: -0.1
+        n_max: 0.1
+      clip: null
+      scale: null
+      history_length: 0
+      flatten_history_dim: true
+    base_ang_vel:
+      func: isaaclab.envs.mdp.observations:base_ang_vel
+      params: {}
+      modifiers: null
+      noise:
+        func: isaaclab.utils.noise.noise_model:uniform_noise
+        operation: add
+        n_min: -0.2
+        n_max: 0.2
+      clip: null
+      scale: null
+      history_length: 0
+      flatten_history_dim: true
+    projected_gravity:
+      func: isaaclab.envs.mdp.observations:projected_gravity
+      params: {}
+      modifiers: null
+      noise:
+        func: isaaclab.utils.noise.noise_model:uniform_noise
+        operation: add
+        n_min: -0.05
+        n_max: 0.05
+      clip: null
+      scale: null
+      history_length: 0
+      flatten_history_dim: true
+    velocity_commands:
+      func: isaaclab.envs.mdp.observations:generated_commands
+      params:
+        command_name: base_velocity
+      modifiers: null
+      noise: null
+      clip: null
+      scale: null
+      history_length: 0
+      flatten_history_dim: true
+    joint_pos:
+      func: isaaclab.envs.mdp.observations:joint_pos_rel
+      params: {}
+      modifiers: null
+      noise:
+        func: isaaclab.utils.noise.noise_model:uniform_noise
+        operation: add
+        n_min: -0.01
+        n_max: 0.01
+      clip: null
+      scale: null
+      history_length: 0
+      flatten_history_dim: true
+    joint_vel:
+      func: isaaclab.envs.mdp.observations:joint_vel_rel
+      params: {}
+      modifiers: null
+      noise:
+        func: isaaclab.utils.noise.noise_model:uniform_noise
+        operation: add
+        n_min: -1.5
+        n_max: 1.5
+      clip: null
+      scale: null
+      history_length: 0
+      flatten_history_dim: true
+    actions:
+      func: isaaclab.envs.mdp.observations:last_action
+      params: {}
+      modifiers: null
+      noise: null
+      clip: null
+      scale: null
+      history_length: 0
+      flatten_history_dim: true
+actions:
+  joint_pos:
+    class_type: isaaclab.envs.mdp.actions.joint_actions:JointPositionAction
+    asset_name: robot
+    debug_vis: false
+    clip: null
+    joint_names:
+    - .*
+    scale: 0.5
+    offset: 0.0
+    preserve_order: false
+    use_default_offset: true
+events:
+  physics_material:
+    func: isaaclab.envs.mdp.events:randomize_rigid_body_material
+    params:
+      asset_cfg:
+        name: robot
+        joint_names: null
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: .*
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+      static_friction_range: !!python/tuple
+      - 0.8
+      - 0.8
+      dynamic_friction_range: !!python/tuple
+      - 0.6
+      - 0.6
+      restitution_range: !!python/tuple
+      - 0.0
+      - 0.0
+      num_buckets: 64
+    mode: startup
+    interval_range_s: null
+    is_global_time: false
+    min_step_count_between_reset: 0
+  add_base_mass: null
+  base_com: null
+  base_external_force_torque:
+    func: isaaclab.envs.mdp.events:apply_external_force_torque
+    params:
+      asset_cfg:
+        name: robot
+        joint_names: null
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names:
+        - .*torso_link
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+      force_range: !!python/tuple
+      - 0.0
+      - 0.0
+      torque_range: !!python/tuple
+      - -0.0
+      - 0.0
+    mode: reset
+    interval_range_s: null
+    is_global_time: false
+    min_step_count_between_reset: 0
+  reset_base:
+    func: isaaclab.envs.mdp.events:reset_root_state_uniform
+    params:
+      pose_range:
+        x: !!python/tuple
+        - -0.5
+        - 0.5
+        y: !!python/tuple
+        - -0.5
+        - 0.5
+        yaw: !!python/tuple
+        - -3.14
+        - 3.14
+      velocity_range:
+        x: !!python/tuple
+        - 0.0
+        - 0.0
+        y: !!python/tuple
+        - 0.0
+        - 0.0
+        z: !!python/tuple
+        - 0.0
+        - 0.0
+        roll: !!python/tuple
+        - 0.0
+        - 0.0
+        pitch: !!python/tuple
+        - 0.0
+        - 0.0
+        yaw: !!python/tuple
+        - 0.0
+        - 0.0
+    mode: reset
+    interval_range_s: null
+    is_global_time: false
+    min_step_count_between_reset: 0
+  reset_robot_joints:
+    func: isaaclab.envs.mdp.events:reset_joints_by_scale
+    params:
+      position_range: !!python/tuple
+      - 1.0
+      - 1.0
+      velocity_range: !!python/tuple
+      - 0.0
+      - 0.0
+    mode: reset
+    interval_range_s: null
+    is_global_time: false
+    min_step_count_between_reset: 0
+  push_robot: null
+rerender_on_reset: false
+wait_for_textures: false
+xr: null
+teleop_devices: null
+export_io_descriptors: false
+log_dir: /workspaces/isaaclab_arena/logs/rsl_rl/h1_base_rough/2026-03-06_10-34-21
+is_finite_horizon: false
+episode_length_s: 20.0
+rewards:
+  track_lin_vel_xy_exp:
+    func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.rewards:track_lin_vel_xy_yaw_frame_exp
+    params:
+      command_name: base_velocity
+      std: 0.5
+    weight: 1.0
+  track_ang_vel_z_exp:
+    func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.rewards:track_ang_vel_z_world_exp
+    params:
+      command_name: base_velocity
+      std: 0.5
+    weight: 1.0
+  lin_vel_z_l2: null
+  ang_vel_xy_l2:
+    func: isaaclab.envs.mdp.rewards:ang_vel_xy_l2
+    params: {}
+    weight: -0.05
+  dof_torques_l2:
+    func: isaaclab.envs.mdp.rewards:joint_torques_l2
+    params: {}
+    weight: 0.0
+  dof_acc_l2:
+    func: isaaclab.envs.mdp.rewards:joint_acc_l2
+    params: {}
+    weight: -1.25e-07
+  action_rate_l2:
+    func: isaaclab.envs.mdp.rewards:action_rate_l2
+    params: {}
+    weight: -0.005
+  feet_air_time:
+    func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.rewards:feet_air_time_positive_biped
+    params:
+      command_name: base_velocity
+      sensor_cfg:
+        name: contact_forces
+        joint_names: null
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: .*ankle_link
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+      threshold: 0.4
+    weight: 0.25
+  undesired_contacts: null
+  flat_orientation_l2:
+    func: isaaclab.envs.mdp.rewards:flat_orientation_l2
+    params: {}
+    weight: -1.0
+  dof_pos_limits:
+    func: isaaclab.envs.mdp.rewards:joint_pos_limits
+    params:
+      asset_cfg:
+        name: robot
+        joint_names: .*_ankle
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: null
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+    weight: -1.0
+  termination_penalty:
+    func: isaaclab.envs.mdp.rewards:is_terminated
+    params: {}
+    weight: -200.0
+  feet_slide:
+    func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.rewards:feet_slide
+    params:
+      sensor_cfg:
+        name: contact_forces
+        joint_names: null
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: .*ankle_link
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+      asset_cfg:
+        name: robot
+        joint_names: null
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: .*ankle_link
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+    weight: -0.25
+  joint_deviation_hip:
+    func: isaaclab.envs.mdp.rewards:joint_deviation_l1
+    params:
+      asset_cfg:
+        name: robot
+        joint_names:
+        - .*_hip_yaw
+        - .*_hip_roll
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: null
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+    weight: -0.2
+  joint_deviation_arms:
+    func: isaaclab.envs.mdp.rewards:joint_deviation_l1
+    params:
+      asset_cfg:
+        name: robot
+        joint_names:
+        - .*_shoulder_.*
+        - .*_elbow
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: null
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+    weight: -0.2
+  joint_deviation_torso:
+    func: isaaclab.envs.mdp.rewards:joint_deviation_l1
+    params:
+      asset_cfg:
+        name: robot
+        joint_names: torso
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: null
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+    weight: -0.1
+terminations:
+  time_out:
+    func: isaaclab.envs.mdp.terminations:time_out
+    params: {}
+    time_out: true
+  base_contact:
+    func: isaaclab.envs.mdp.terminations:illegal_contact
+    params:
+      sensor_cfg:
+        name: contact_forces
+        joint_names: null
+        joint_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        fixed_tendon_names: null
+        fixed_tendon_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        body_names: .*torso_link
+        body_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        object_collection_names: null
+        object_collection_ids: !!python/object/apply:builtins.slice
+        - null
+        - null
+        - null
+        preserve_order: false
+      threshold: 1.0
+    time_out: false
+curriculum:
+  terrain_levels:
+    func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.curriculums:terrain_levels_vel
+    params: {}
+commands:
+  base_velocity:
+    class_type: isaaclab.envs.mdp.commands.velocity_command:UniformVelocityCommand
+    resampling_time_range: !!python/tuple
+    - 10.0
+    - 10.0
+    debug_vis: false
+    asset_name: robot
+    heading_command: true
+    heading_control_stiffness: 0.5
+    rel_standing_envs: 0.02
+    rel_heading_envs: 1.0
+    ranges:
+      lin_vel_x: !!python/tuple
+      - 0.0
+      - 1.0
+      lin_vel_y: !!python/tuple
+      - 0.0
+      - 0.0
+      ang_vel_z: !!python/tuple
+      - -1.0
+      - 1.0
+      heading: !!python/tuple
+      - -3.141592653589793
+      - 3.141592653589793
+    goal_vel_visualizer_cfg:
+      prim_path: /Visuals/Command/velocity_goal
+      markers:
+        arrow:
+          func: isaaclab.sim.spawners.from_files.from_files:spawn_from_usd
+          visible: true
+          semantic_tags: null
+          copy_from_source: true
+          mass_props: null
+          deformable_props: null
+          rigid_props: null
+          collision_props: null
+          activate_contact_sensors: false
+          scale: !!python/tuple
+          - 0.5
+          - 0.5
+          - 0.5
+          articulation_props: null
+          fixed_tendons_props: null
+          spatial_tendons_props: null
+          joint_drive_props: null
+          visual_material_path: material
+          visual_material:
+            func: isaaclab.sim.spawners.materials.visual_materials:spawn_preview_surface
+            diffuse_color: !!python/tuple
+            - 0.0
+            - 1.0
+            - 0.0
+            emissive_color: !!python/tuple
+            - 0.0
+            - 0.0
+            - 0.0
+            roughness: 0.5
+            metallic: 0.0
+            opacity: 1.0
+          usd_path: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/Props/UIElements/arrow_x.usd
+          variants: null
+    current_vel_visualizer_cfg:
+      prim_path: /Visuals/Command/velocity_current
+      markers:
+        arrow:
+          func: isaaclab.sim.spawners.from_files.from_files:spawn_from_usd
+          visible: true
+          semantic_tags: null
+          copy_from_source: true
+          mass_props: null
+          deformable_props: null
+          rigid_props: null
+          collision_props: null
+          activate_contact_sensors: false
+          scale: !!python/tuple
+          - 0.5
+          - 0.5
+          - 0.5
+          articulation_props: null
+          fixed_tendons_props: null
+          spatial_tendons_props: null
+          joint_drive_props: null
+          visual_material_path: material
+          visual_material:
+            func: isaaclab.sim.spawners.materials.visual_materials:spawn_preview_surface
+            diffuse_color: !!python/tuple
+            - 0.0
+            - 0.0
+            - 1.0
+            emissive_color: !!python/tuple
+            - 0.0
+            - 0.0
+            - 0.0
+            roughness: 0.5
+            metallic: 0.0
+            opacity: 1.0
+          usd_path: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/Props/UIElements/arrow_x.usd
+          variants: null
+metrics: []
+isaaclab_arena_env:
+  name: h1_base_rough
+  scene:
+    assets: {}
+    observation_cfg: null
+    events_cfg: null
+    termination_cfg: null
+    rewards_cfg: null
+    curriculum_cfg: null
+    commands_cfg: null
+  embodiment:
+    enable_cameras: false
+    initial_pose: null
+    concatenate_observation_terms: false
+    arm_mode: null
+    scene_config:
+      terrain:
+        class_type: isaaclab.terrains.terrain_importer:TerrainImporter
+        collision_group: -1
+        prim_path: /World/ground
+        num_envs: 1
+        terrain_type: generator
+        terrain_generator:
+          class_type: isaaclab.terrains.terrain_generator:TerrainGenerator
+          seed: null
+          curriculum: true
+          size: !!python/tuple
+          - 8.0
+          - 8.0
+          border_width: 20.0
+          border_height: 1.0
+          num_rows: 10
+          num_cols: 20
+          color_scheme: none
+          horizontal_scale: 0.1
+          vertical_scale: 0.005
+          slope_threshold: 0.75
+          sub_terrains:
+            random_rough:
+              function: isaaclab.terrains.height_field.hf_terrains:random_uniform_terrain
+              proportion: 1.0
+              size: !!python/tuple
+              - 8.0
+              - 8.0
+              flat_patch_sampling: null
+              border_width: 0.0
+              horizontal_scale: 0.1
+              vertical_scale: 0.005
+              slope_threshold: null
+              noise_range: !!python/tuple
+              - 0.02
+              - 0.1
+              noise_step: 0.02
+              downsampled_scale: null
+          difficulty_range: !!python/tuple
+          - 0.0
+          - 1.0
+          use_cache: false
+          cache_dir: /tmp/isaaclab/terrains
+        usd_path: null
+        env_spacing: null
+        visual_material:
+          func: isaaclab.sim.spawners.materials.visual_materials:spawn_from_mdl_file
+          mdl_path: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/IsaacLab/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl
+          project_uvw: true
+          albedo_brightness: null
+          texture_scale: !!python/tuple
+          - 0.25
+          - 0.25
+        physics_material:
+          func: isaaclab.sim.spawners.materials.physics_materials:spawn_rigid_body_material
+          static_friction: 1.0
+          dynamic_friction: 1.0
+          restitution: 0.0
+          friction_combine_mode: multiply
+          restitution_combine_mode: multiply
+          compliant_contact_stiffness: 0.0
+          compliant_contact_damping: 0.0
+        max_init_terrain_level: 5
+        debug_vis: false
+      robot:
+        class_type: isaaclab.assets.articulation.articulation:Articulation
+        prim_path: '{ENV_REGEX_NS}/Robot'
+        spawn:
+          func: isaaclab.sim.spawners.from_files.from_files:spawn_from_usd
+          visible: true
+          semantic_tags: null
+          copy_from_source: true
+          mass_props: null
+          deformable_props: null
+          rigid_props:
+            rigid_body_enabled: null
+            kinematic_enabled: null
+            disable_gravity: false
+            linear_damping: 0.0
+            angular_damping: 0.0
+            max_linear_velocity: 1000.0
+            max_angular_velocity: 1000.0
+            max_depenetration_velocity: 1.0
+            max_contact_impulse: null
+            enable_gyroscopic_forces: null
+            retain_accelerations: false
+            solver_position_iteration_count: null
+            solver_velocity_iteration_count: null
+            sleep_threshold: null
+            stabilization_threshold: null
+          collision_props: null
+          activate_contact_sensors: true
+          scale: null
+          articulation_props:
+            articulation_enabled: null
+            enabled_self_collisions: false
+            solver_position_iteration_count: 4
+            solver_velocity_iteration_count: 4
+            sleep_threshold: null
+            stabilization_threshold: null
+            fix_root_link: null
+          fixed_tendons_props: null
+          spatial_tendons_props: null
+          joint_drive_props: null
+          visual_material_path: material
+          visual_material: null
+          usd_path: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/IsaacLab/Robots/Unitree/H1/h1_minimal.usd
+          variants: null
+        init_state:
+          pos: !!python/tuple
+          - 0.0
+          - 0.0
+          - 1.05
+          rot: !!python/tuple
+          - 1.0
+          - 0.0
+          - 0.0
+          - 0.0
+          lin_vel: !!python/tuple
+          - 0.0
+          - 0.0
+          - 0.0
+          ang_vel: !!python/tuple
+          - 0.0
+          - 0.0
+          - 0.0
+          joint_pos:
+            .*_hip_yaw: 0.0
+            .*_hip_roll: 0.0
+            .*_hip_pitch: -0.28
+            .*_knee: 0.79
+            .*_ankle: -0.52
+            torso: 0.0
+            .*_shoulder_pitch: 0.28
+            .*_shoulder_roll: 0.0
+            .*_shoulder_yaw: 0.0
+            .*_elbow: 0.52
+          joint_vel:
+            .*: 0.0
+        collision_group: 0
+        debug_vis: false
+        articulation_root_prim_path: null
+        soft_joint_pos_limit_factor: 0.9
+        actuators:
+          legs:
+            class_type: isaaclab.actuators.actuator_pd:ImplicitActuator
+            joint_names_expr:
+            - .*_hip_yaw
+            - .*_hip_roll
+            - .*_hip_pitch
+            - .*_knee
+            - torso
+            effort_limit: null
+            velocity_limit: null
+            effort_limit_sim: 300
+            velocity_limit_sim: null
+            stiffness:
+              .*_hip_yaw: 150.0
+              .*_hip_roll: 150.0
+              .*_hip_pitch: 200.0
+              .*_knee: 200.0
+              torso: 200.0
+            damping:
+              .*_hip_yaw: 5.0
+              .*_hip_roll: 5.0
+              .*_hip_pitch: 5.0
+              .*_knee: 5.0
+              torso: 5.0
+            armature: null
+            friction: null
+            dynamic_friction: null
+            viscous_friction: null
+          feet:
+            class_type: isaaclab.actuators.actuator_pd:ImplicitActuator
+            joint_names_expr:
+            - .*_ankle
+            effort_limit: null
+            velocity_limit: null
+            effort_limit_sim: 100
+            velocity_limit_sim: null
+            stiffness:
+              .*_ankle: 20.0
+            damping:
+              .*_ankle: 4.0
+            armature: null
+            friction: null
+            dynamic_friction: null
+            viscous_friction: null
+          arms:
+            class_type: isaaclab.actuators.actuator_pd:ImplicitActuator
+            joint_names_expr:
+            - .*_shoulder_pitch
+            - .*_shoulder_roll
+            - .*_shoulder_yaw
+            - .*_elbow
+            effort_limit: null
+            velocity_limit: null
+            effort_limit_sim: 300
+            velocity_limit_sim: null
+            stiffness:
+              .*_shoulder_pitch: 40.0
+              .*_shoulder_roll: 40.0
+              .*_shoulder_yaw: 40.0
+              .*_elbow: 40.0
+            damping:
+              .*_shoulder_pitch: 10.0
+              .*_shoulder_roll: 10.0
+              .*_shoulder_yaw: 10.0
+              .*_elbow: 10.0
+            armature: null
+            friction: null
+            dynamic_friction: null
+            viscous_friction: null
+        actuator_value_resolution_debug_print: false
+      contact_forces:
+        class_type: isaaclab.sensors.contact_sensor.contact_sensor:ContactSensor
+        prim_path: '{ENV_REGEX_NS}/Robot/.*'
+        update_period: 0.0
+        history_length: 3
+        debug_vis: false
+        track_pose: false
+        track_contact_points: false
+        max_contact_data_count_per_prim: 4
+        track_air_time: true
+        force_threshold: 1.0
+        filter_prim_paths_expr: []
+        visualizer_cfg:
+          prim_path: /Visuals/ContactSensor
+          markers:
+            contact:
+              func: isaaclab.sim.spawners.shapes.shapes:spawn_sphere
+              visible: true
+              semantic_tags: null
+              copy_from_source: true
+              mass_props: null
+              rigid_props: null
+              collision_props: null
+              activate_contact_sensors: false
+              visual_material_path: material
+              visual_material:
+                func: isaaclab.sim.spawners.materials.visual_materials:spawn_preview_surface
+                diffuse_color: !!python/tuple
+                - 1.0
+                - 0.0
+                - 0.0
+                emissive_color: !!python/tuple
+                - 0.0
+                - 0.0
+                - 0.0
+                roughness: 0.5
+                metallic: 0.0
+                opacity: 1.0
+              physics_material_path: material
+              physics_material: null
+              radius: 0.02
+            no_contact:
+              func: isaaclab.sim.spawners.shapes.shapes:spawn_sphere
+              visible: false
+              semantic_tags: null
+              copy_from_source: true
+              mass_props: null
+              rigid_props: null
+              collision_props: null
+              activate_contact_sensors: false
+              visual_material_path: material
+              visual_material:
+                func: isaaclab.sim.spawners.materials.visual_materials:spawn_preview_surface
+                diffuse_color: !!python/tuple
+                - 0.0
+                - 1.0
+                - 0.0
+                emissive_color: !!python/tuple
+                - 0.0
+                - 0.0
+                - 0.0
+                roughness: 0.5
+                metallic: 0.0
+                opacity: 1.0
+              physics_material_path: material
+              physics_material: null
+              radius: 0.02
+      sky_light:
+        class_type: null
+        prim_path: /World/skyLight
+        spawn:
+          func: isaaclab.sim.spawners.lights.lights:spawn_light
+          visible: true
+          semantic_tags: null
+          copy_from_source: true
+          prim_type: DomeLight
+          color: !!python/tuple
+          - 1.0
+          - 1.0
+          - 1.0
+          enable_color_temperature: false
+          color_temperature: 6500.0
+          normalize: false
+          exposure: 0.0
+          intensity: 750.0
+          texture_file: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr
+          texture_format: automatic
+          visible_in_primary_ray: true
+        init_state:
+          pos: !!python/tuple
+          - 0.0
+          - 0.0
+          - 0.0
+          rot: !!python/tuple
+          - 1.0
+          - 0.0
+          - 0.0
+          - 0.0
+        collision_group: 0
+        debug_vis: false
+    camera_config: null
+    action_config:
+      joint_pos:
+        class_type: isaaclab.envs.mdp.actions.joint_actions:JointPositionAction
+        asset_name: robot
+        debug_vis: false
+        clip: null
+        joint_names:
+        - .*
+        scale: 0.5
+        offset: 0.0
+        preserve_order: false
+        use_default_offset: true
+    observation_config:
+      policy:
+        concatenate_terms: true
+        concatenate_dim: -1
+        enable_corruption: true
+        history_length: null
+        flatten_history_dim: true
+        base_lin_vel:
+          func: isaaclab.envs.mdp.observations:base_lin_vel
+          params: {}
+          modifiers: null
+          noise:
+            func: isaaclab.utils.noise.noise_model:uniform_noise
+            operation: add
+            n_min: -0.1
+            n_max: 0.1
+          clip: null
+          scale: null
+          history_length: 0
+          flatten_history_dim: true
+        base_ang_vel:
+          func: isaaclab.envs.mdp.observations:base_ang_vel
+          params: {}
+          modifiers: null
+          noise:
+            func: isaaclab.utils.noise.noise_model:uniform_noise
+            operation: add
+            n_min: -0.2
+            n_max: 0.2
+          clip: null
+          scale: null
+          history_length: 0
+          flatten_history_dim: true
+        projected_gravity:
+          func: isaaclab.envs.mdp.observations:projected_gravity
+          params: {}
+          modifiers: null
+          noise:
+            func: isaaclab.utils.noise.noise_model:uniform_noise
+            operation: add
+            n_min: -0.05
+            n_max: 0.05
+          clip: null
+          scale: null
+          history_length: 0
+          flatten_history_dim: true
+        velocity_commands:
+          func: isaaclab.envs.mdp.observations:generated_commands
+          params:
+            command_name: base_velocity
+          modifiers: null
+          noise: null
+          clip: null
+          scale: null
+          history_length: 0
+          flatten_history_dim: true
+        joint_pos:
+          func: isaaclab.envs.mdp.observations:joint_pos_rel
+          params: {}
+          modifiers: null
+          noise:
+            func: isaaclab.utils.noise.noise_model:uniform_noise
+            operation: add
+            n_min: -0.01
+            n_max: 0.01
+          clip: null
+          scale: null
+          history_length: 0
+          flatten_history_dim: true
+        joint_vel:
+          func: isaaclab.envs.mdp.observations:joint_vel_rel
+          params: {}
+          modifiers: null
+          noise:
+            func: isaaclab.utils.noise.noise_model:uniform_noise
+            operation: add
+            n_min: -1.5
+            n_max: 1.5
+          clip: null
+          scale: null
+          history_length: 0
+          flatten_history_dim: true
+        actions:
+          func: isaaclab.envs.mdp.observations:last_action
+          params: {}
+          modifiers: null
+          noise: null
+          clip: null
+          scale: null
+          history_length: 0
+          flatten_history_dim: true
+    event_config: null
+    reward_config: null
+    curriculum_config: null
+    command_config: null
+    mimic_env: null
+    xr: null
+    termination_cfg: null
+  task:
+    episode_length_s: 20.0
+    task_description: Train a rough-terrain velocity tracking locomotion policy for
+      H1.
+    _events_cfg:
+      physics_material:
+        func: isaaclab.envs.mdp.events:randomize_rigid_body_material
+        params:
+          asset_cfg:
+            name: robot
+            joint_names: null
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: .*
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+          static_friction_range: !!python/tuple
+          - 0.8
+          - 0.8
+          dynamic_friction_range: !!python/tuple
+          - 0.6
+          - 0.6
+          restitution_range: !!python/tuple
+          - 0.0
+          - 0.0
+          num_buckets: 64
+        mode: startup
+        interval_range_s: null
+        is_global_time: false
+        min_step_count_between_reset: 0
+      add_base_mass: null
+      base_com: null
+      base_external_force_torque:
+        func: isaaclab.envs.mdp.events:apply_external_force_torque
+        params:
+          asset_cfg:
+            name: robot
+            joint_names: null
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names:
+            - .*torso_link
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+          force_range: !!python/tuple
+          - 0.0
+          - 0.0
+          torque_range: !!python/tuple
+          - -0.0
+          - 0.0
+        mode: reset
+        interval_range_s: null
+        is_global_time: false
+        min_step_count_between_reset: 0
+      reset_base:
+        func: isaaclab.envs.mdp.events:reset_root_state_uniform
+        params:
+          pose_range:
+            x: !!python/tuple
+            - -0.5
+            - 0.5
+            y: !!python/tuple
+            - -0.5
+            - 0.5
+            yaw: !!python/tuple
+            - -3.14
+            - 3.14
+          velocity_range:
+            x: !!python/tuple
+            - 0.0
+            - 0.0
+            y: !!python/tuple
+            - 0.0
+            - 0.0
+            z: !!python/tuple
+            - 0.0
+            - 0.0
+            roll: !!python/tuple
+            - 0.0
+            - 0.0
+            pitch: !!python/tuple
+            - 0.0
+            - 0.0
+            yaw: !!python/tuple
+            - 0.0
+            - 0.0
+        mode: reset
+        interval_range_s: null
+        is_global_time: false
+        min_step_count_between_reset: 0
+      reset_robot_joints:
+        func: isaaclab.envs.mdp.events:reset_joints_by_scale
+        params:
+          position_range: !!python/tuple
+          - 1.0
+          - 1.0
+          velocity_range: !!python/tuple
+          - 0.0
+          - 0.0
+        mode: reset
+        interval_range_s: null
+        is_global_time: false
+        min_step_count_between_reset: 0
+      push_robot: null
+    _rewards_cfg:
+      track_lin_vel_xy_exp:
+        func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.rewards:track_lin_vel_xy_yaw_frame_exp
+        params:
+          command_name: base_velocity
+          std: 0.5
+        weight: 1.0
+      track_ang_vel_z_exp:
+        func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.rewards:track_ang_vel_z_world_exp
+        params:
+          command_name: base_velocity
+          std: 0.5
+        weight: 1.0
+      lin_vel_z_l2: null
+      ang_vel_xy_l2:
+        func: isaaclab.envs.mdp.rewards:ang_vel_xy_l2
+        params: {}
+        weight: -0.05
+      dof_torques_l2:
+        func: isaaclab.envs.mdp.rewards:joint_torques_l2
+        params: {}
+        weight: 0.0
+      dof_acc_l2:
+        func: isaaclab.envs.mdp.rewards:joint_acc_l2
+        params: {}
+        weight: -1.25e-07
+      action_rate_l2:
+        func: isaaclab.envs.mdp.rewards:action_rate_l2
+        params: {}
+        weight: -0.005
+      feet_air_time:
+        func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.rewards:feet_air_time_positive_biped
+        params:
+          command_name: base_velocity
+          sensor_cfg:
+            name: contact_forces
+            joint_names: null
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: .*ankle_link
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+          threshold: 0.4
+        weight: 0.25
+      undesired_contacts: null
+      flat_orientation_l2:
+        func: isaaclab.envs.mdp.rewards:flat_orientation_l2
+        params: {}
+        weight: -1.0
+      dof_pos_limits:
+        func: isaaclab.envs.mdp.rewards:joint_pos_limits
+        params:
+          asset_cfg:
+            name: robot
+            joint_names: .*_ankle
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: null
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+        weight: -1.0
+      termination_penalty:
+        func: isaaclab.envs.mdp.rewards:is_terminated
+        params: {}
+        weight: -200.0
+      feet_slide:
+        func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.rewards:feet_slide
+        params:
+          sensor_cfg:
+            name: contact_forces
+            joint_names: null
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: .*ankle_link
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+          asset_cfg:
+            name: robot
+            joint_names: null
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: .*ankle_link
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+        weight: -0.25
+      joint_deviation_hip:
+        func: isaaclab.envs.mdp.rewards:joint_deviation_l1
+        params:
+          asset_cfg:
+            name: robot
+            joint_names:
+            - .*_hip_yaw
+            - .*_hip_roll
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: null
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+        weight: -0.2
+      joint_deviation_arms:
+        func: isaaclab.envs.mdp.rewards:joint_deviation_l1
+        params:
+          asset_cfg:
+            name: robot
+            joint_names:
+            - .*_shoulder_.*
+            - .*_elbow
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: null
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+        weight: -0.2
+      joint_deviation_torso:
+        func: isaaclab.envs.mdp.rewards:joint_deviation_l1
+        params:
+          asset_cfg:
+            name: robot
+            joint_names: torso
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: null
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+        weight: -0.1
+    _terminations_cfg:
+      time_out:
+        func: isaaclab.envs.mdp.terminations:time_out
+        params: {}
+        time_out: true
+      base_contact:
+        func: isaaclab.envs.mdp.terminations:illegal_contact
+        params:
+          sensor_cfg:
+            name: contact_forces
+            joint_names: null
+            joint_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            fixed_tendon_names: null
+            fixed_tendon_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            body_names: .*torso_link
+            body_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            object_collection_names: null
+            object_collection_ids: !!python/object/apply:builtins.slice
+            - null
+            - null
+            - null
+            preserve_order: false
+          threshold: 1.0
+        time_out: false
+    _curriculum_cfg:
+      terrain_levels:
+        func: isaaclab_tasks.manager_based.locomotion.velocity.mdp.curriculums:terrain_levels_vel
+        params: {}
+    _commands_cfg:
+      base_velocity:
+        class_type: isaaclab.envs.mdp.commands.velocity_command:UniformVelocityCommand
+        resampling_time_range: !!python/tuple
+        - 10.0
+        - 10.0
+        debug_vis: false
+        asset_name: robot
+        heading_command: true
+        heading_control_stiffness: 0.5
+        rel_standing_envs: 0.02
+        rel_heading_envs: 1.0
+        ranges:
+          lin_vel_x: !!python/tuple
+          - 0.0
+          - 1.0
+          lin_vel_y: !!python/tuple
+          - 0.0
+          - 0.0
+          ang_vel_z: !!python/tuple
+          - -1.0
+          - 1.0
+          heading: !!python/tuple
+          - -3.141592653589793
+          - 3.141592653589793
+        goal_vel_visualizer_cfg:
+          prim_path: /Visuals/Command/velocity_goal
+          markers:
+            arrow:
+              func: isaaclab.sim.spawners.from_files.from_files:spawn_from_usd
+              visible: true
+              semantic_tags: null
+              copy_from_source: true
+              mass_props: null
+              deformable_props: null
+              rigid_props: null
+              collision_props: null
+              activate_contact_sensors: false
+              scale: !!python/tuple
+              - 0.5
+              - 0.5
+              - 0.5
+              articulation_props: null
+              fixed_tendons_props: null
+              spatial_tendons_props: null
+              joint_drive_props: null
+              visual_material_path: material
+              visual_material:
+                func: isaaclab.sim.spawners.materials.visual_materials:spawn_preview_surface
+                diffuse_color: !!python/tuple
+                - 0.0
+                - 1.0
+                - 0.0
+                emissive_color: !!python/tuple
+                - 0.0
+                - 0.0
+                - 0.0
+                roughness: 0.5
+                metallic: 0.0
+                opacity: 1.0
+              usd_path: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/Props/UIElements/arrow_x.usd
+              variants: null
+        current_vel_visualizer_cfg:
+          prim_path: /Visuals/Command/velocity_current
+          markers:
+            arrow:
+              func: isaaclab.sim.spawners.from_files.from_files:spawn_from_usd
+              visible: true
+              semantic_tags: null
+              copy_from_source: true
+              mass_props: null
+              deformable_props: null
+              rigid_props: null
+              collision_props: null
+              activate_contact_sensors: false
+              scale: !!python/tuple
+              - 0.5
+              - 0.5
+              - 0.5
+              articulation_props: null
+              fixed_tendons_props: null
+              spatial_tendons_props: null
+              joint_drive_props: null
+              visual_material_path: material
+              visual_material:
+                func: isaaclab.sim.spawners.materials.visual_materials:spawn_preview_surface
+                diffuse_color: !!python/tuple
+                - 0.0
+                - 0.0
+                - 1.0
+                emissive_color: !!python/tuple
+                - 0.0
+                - 0.0
+                - 0.0
+                roughness: 0.5
+                metallic: 0.0
+                opacity: 1.0
+              usd_path: https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/5.1/Isaac/Props/UIElements/arrow_x.usd
+              variants: null
+  teleop_device: null
+  orchestrator: null
+  env_cfg_callback: null
diff --git a/isaaclab_arena/policy/vln/pretrained/h1_navila_locomotion.pt b/isaaclab_arena/policy/vln/pretrained/h1_navila_locomotion.pt
new file mode 100644
index 000000000..7b45f45f3
Binary files /dev/null and b/isaaclab_arena/policy/vln/pretrained/h1_navila_locomotion.pt differ
diff --git a/isaaclab_arena/policy/vln/vln_action_protocol.py b/isaaclab_arena/policy/vln/vln_action_protocol.py
new file mode 100644
index 000000000..39f62297d
--- /dev/null
+++ b/isaaclab_arena/policy/vln/vln_action_protocol.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Re-export VlnVelocityActionProtocol from the Arena core package.
+
+The VLN velocity-command ActionProtocol is defined in
+``isaaclab_arena.remote_policy.action_protocol`` alongside the other
+protocol types (ChunkingActionProtocol, etc.).  This module re-exports
+it for convenience.
+"""
+
+from isaaclab_arena.remote_policy.action_protocol import (  # noqa: F401
+    ActionMode,
+    VlnVelocityActionProtocol,
+)
diff --git a/isaaclab_arena/policy/vln/vln_vlm_locomotion_policy.py b/isaaclab_arena/policy/vln/vln_vlm_locomotion_policy.py
new file mode 100644
index 000000000..472faa1fd
--- /dev/null
+++ b/isaaclab_arena/policy/vln/vln_vlm_locomotion_policy.py
@@ -0,0 +1,588 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""VLN hierarchical policy: remote VLM + local RSL-RL locomotion.
+
+This composite policy has two layers:
+  - **High-level**: A remote VLM server (e.g. NaVILA, GR00T) that
+    produces velocity commands ``[vx, vy, yaw_rate]`` from RGB images.
+  - **Low-level**: A local RSL-RL locomotion policy that converts
+    velocity commands into joint-position actions.
+
+Works with Arena's ``policy_runner.py`` like any other policy.
+
+Usage::
+
+    python -m isaaclab_arena.evaluation.policy_runner \\
+        --policy_type isaaclab_arena.policy.vln.vln_vlm_locomotion_policy.VlnVlmLocomotionPolicy \\
+        --remote_host localhost --remote_port 5555 \\
+        --ll_checkpoint_path /path/to/rsl_rl/model.pt \\
+        --ll_agent_cfg /path/to/agent.yaml \\
+        --num_episodes 10 \\
+        VLN_Benchmark \\
+        --usd_path /path/to/scene.usd \\
+        --r2r_dataset_path /path/to/dataset.json.gz
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import time
+from typing import Any
+
+import gymnasium as gym
+import numpy as np
+import torch
+from gymnasium.spaces.dict import Dict as GymSpacesDict
+from PIL import Image as PILImage
+from rsl_rl.runners import OnPolicyRunner
+
+from isaaclab.utils.io import load_yaml
+from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
+
+from isaaclab_arena.policy.client_side_policy import ClientSidePolicy
+from isaaclab_arena.remote_policy.action_protocol import VlnVelocityActionProtocol
+from isaaclab_arena.remote_policy.remote_policy_config import RemotePolicyConfig
+
+
+class VlnVlmLocomotionPolicy(ClientSidePolicy):
+    """VLN hierarchical policy: remote VLM + local RSL-RL locomotion.
+
+    High-level layer queries a remote VLM server for velocity commands.
+    Low-level layer runs a pre-trained RSL-RL locomotion policy to
+    convert velocity commands into joint-position actions.
+
+    Inherits from ``ClientSidePolicy`` to reuse the ZeroMQ handshake,
+    observation packing, and remote lifecycle management.
+    """
+
+    def __init__(
+        self,
+        remote_config: RemotePolicyConfig,
+        ll_checkpoint_path: str,
+        ll_agent_cfg: str,
+        device: str = "cuda",
+        vel_cmd_obs_indices: tuple[int, int] = (9, 12),
+        warmup_steps: int = 200,
+        debug: bool = False,
+        save_interval: int = 0,
+        save_dir: str = "",
+        ignore_vlm_stop: bool = False,
+        min_vlm_stop_distance: float = -1.0,
+    ):
+        super().__init__(
+            config=None,
+            remote_config=remote_config,
+            protocol_cls=VlnVelocityActionProtocol,
+        )
+        self._device = device
+        self._vel_cmd_indices = vel_cmd_obs_indices
+        self._warmup_steps = warmup_steps
+        self._debug = debug
+        self._save_interval = save_interval
+        self._save_dir = save_dir
+        self._ignore_vlm_stop = ignore_vlm_stop
+        self._min_vlm_stop_distance = min_vlm_stop_distance
+
+        # RSL-RL low-level policy (loaded lazily in first get_action)
+        self._ll_checkpoint_path = ll_checkpoint_path
+        self._ll_agent_cfg = ll_agent_cfg
+        self._ll_policy = None
+        self._ll_obs_td = None
+        self._ll_vec_env = None
+
+        # VLM scheduling state
+        self._step_count: int = 0
+        self._target_step: int = 0
+        self._last_vel_cmd = np.zeros(self.action_dim, dtype=np.float32)
+        self._env_dt: float | None = None
+
+        # Track current instruction to detect episode changes
+        self._current_instruction: str | None = None
+
+        self._vlm_query_count: int = 0
+
+    # ------------------------------------------------------------------ #
+    # PolicyBase interface                                                #
+    # ------------------------------------------------------------------ #
+
+    def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor:
+        """Return joint-position actions for the environment.
+
+        Internally: VLM query → velocity command → inject into obs →
+        RSL-RL forward pass → joint actions.
+        """
+        unwrapped = env.unwrapped if hasattr(env, "unwrapped") else env
+        self._last_unwrapped = unwrapped
+
+        # Lazy init
+        if self._ll_policy is None:
+            self._load_low_level_policy(env)
+            if self._debug:
+                self._debug_save_dir = "/tmp/vln_debug_frames"
+                os.makedirs(self._debug_save_dir, exist_ok=True)
+            if self._save_interval > 0 and self._save_dir:
+                ts = time.strftime("%Y%m%d_%H%M%S")
+                self._save_dir = os.path.join(self._save_dir, ts)
+                os.makedirs(self._save_dir, exist_ok=True)
+                print(f"[VlnPolicy] Saving frames every {self._save_interval} steps to {self._save_dir}")
+
+        if self._env_dt is None:
+            try:
+                self._env_dt = float(unwrapped.cfg.sim.dt * unwrapped.cfg.decimation)
+            except Exception:
+                self._env_dt = 0.02
+
+        # Detect per-episode instruction changes from env.extras
+        self._check_instruction_update(unwrapped)
+
+        goal_dist = self._get_distance_to_goal(unwrapped)
+
+        # Query VLM if scheduling says it's time
+        if self._step_count >= self._target_step:
+            packed_obs = self.pack_observation_for_server(observation)
+            resp = self.remote_client.get_action(observation=packed_obs)
+
+            vel_cmd = np.asarray(
+                resp.get("action", np.zeros(self.action_dim)), dtype=np.float32
+            )
+            duration = float(resp.get("duration", self.protocol.default_duration))
+            vlm_text = resp.get("vlm_text", "")
+            self._last_vel_cmd = vel_cmd
+
+            self._vlm_query_count += 1
+            if self._debug:
+                dist_str = f" dist={goal_dist:.2f}" if goal_dist is not None else ""
+                print(
+                    f"[VlnPolicy VLM #{self._vlm_query_count}] step={self._step_count}"
+                    f" cmd={vel_cmd} dur={duration:.1f}s{dist_str}"
+                    f" vlm=\"{vlm_text[-80:] if vlm_text else ''}\""
+                )
+
+            if self._env_dt > 0.0 and duration > 0.0:
+                steps_to_hold = max(1, int(duration / self._env_dt))
+            else:
+                steps_to_hold = 1
+            self._target_step = self._step_count + steps_to_hold
+
+            # STOP: VLM returns zero velocity + zero duration
+            if np.allclose(vel_cmd, 0.0) and duration <= 0.0:
+                accept_stop, stop_reason = self._should_accept_vlm_stop(goal_dist)
+                if accept_stop:
+                    extras = getattr(unwrapped, "extras", {})
+                    if "vln_stop_called" in extras:
+                        extras["vln_stop_called"][:] = True
+                elif self._debug:
+                    print(
+                        f"[VlnPolicy STOP ignored] step={self._step_count} "
+                        f"dist={goal_dist if goal_dist is not None else 'unknown'} "
+                        f"reason={stop_reason}"
+                    )
+
+                if self._debug:
+                    self._save_debug_frame(observation, self._step_count)
+                if self._save_dir:
+                    self._save_frame_to_dir(observation, unwrapped)
+
+            if self._debug and self._step_count % 200 == 0:
+                self._save_debug_frame(observation, self._step_count)
+
+        # Get the latest proprioceptive observation from the RSL-RL wrapper.
+        self._ll_obs_td = self._ll_vec_env.get_observations()
+
+        # Inject velocity command into the policy observation in-place
+        i, j = self._vel_cmd_indices
+        cmd_tensor = torch.tensor(self._last_vel_cmd, device=self._device, dtype=torch.float32)
+        self._ll_obs_td["policy"][:, i:j] = cmd_tensor
+
+        # Run low-level policy (pass full TensorDict, RSL-RL indexes by group)
+        with torch.inference_mode():
+            joint_actions = self._ll_policy(self._ll_obs_td)
+
+        if self._debug and self._step_count % 500 == 0:
+            try:
+                pos = unwrapped.scene["robot"].data.root_pos_w[0].cpu().numpy()
+                goal = unwrapped.extras.get("current_goal_pos")
+                dist_str = ""
+                if goal is not None:
+                    goal_np = np.asarray(goal)
+                    if goal_np.ndim == 2:
+                        goal_np = goal_np[0]
+                    dist_str = f" dist_to_goal={np.linalg.norm(pos - goal_np):.2f}"
+                ep_id = ""
+                ep_ids = unwrapped.extras.get("current_episode_id")
+                if ep_ids is not None:
+                    ep_id = f" ep={ep_ids[0]}"
+                print(f"[VlnPolicy] step={self._step_count} robot=[{pos[0]:.1f},{pos[1]:.1f},{pos[2]:.2f}] cmd={self._last_vel_cmd}{dist_str}{ep_id}")
+            except Exception:
+                pass
+
+        if self._save_interval > 0 and self._save_dir and self._step_count % self._save_interval == 0:
+            self._save_frame_to_dir(observation, unwrapped)
+
+        self._step_count += 1
+        return joint_actions
+
+    def _save_debug_frame(self, observation, step):
+        """Save camera images with distance and instruction overlay."""
+        try:
+            cam_dict = self._get_camera_obs_dict(observation, getattr(self, "_last_unwrapped", None))
+            if cam_dict is None:
+                return
+
+            goal_dist = self._get_distance_to_goal(
+                getattr(self, "_last_unwrapped", None)
+            )
+            instruction = self._current_instruction or ""
+
+            for cam_name, cam_data in cam_dict.items():
+                if "rgb" not in cam_name:
+                    continue
+                img_tensor = cam_data
+                if hasattr(img_tensor, 'cpu'):
+                    img_np = img_tensor[0].cpu().numpy() if img_tensor.ndim == 4 else img_tensor.cpu().numpy()
+                else:
+                    img_np = np.asarray(img_tensor)
+                    if img_np.ndim == 4:
+                        img_np = img_np[0]
+                if img_np.dtype in (np.float32, np.float64):
+                    img_np = np.clip(img_np * 255.0 if img_np.max() <= 1.5 else img_np, 0, 255).astype(np.uint8)
+                else:
+                    img_np = img_np.astype(np.uint8)
+
+                img = PILImage.fromarray(img_np)
+                self._draw_overlay(img, goal_dist, instruction, step)
+
+                tag = "head" if "head" in cam_name else "follow"
+                path = f"{self._debug_save_dir}/{tag}_{step:06d}.png"
+                img.save(path)
+        except Exception as e:
+            print(f"[VlnPolicy DEBUG] Failed to save frame: {e}")
+
+    @staticmethod
+    def _get_overlay_fonts(img):
+        """Return readable overlay fonts scaled to the image resolution."""
+        from PIL import ImageFont
+
+        title_size = max(28, img.width // 28)
+        body_size = max(20, img.width // 45)
+        try:
+            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", title_size)
+            font_sm = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", body_size)
+        except OSError:
+            font = ImageFont.load_default()
+            font_sm = font
+        return font, font_sm
+
+    @staticmethod
+    def _draw_text_box(draw, xy, text, font, text_fill, box_fill=(0, 0, 0), padding=6):
+        """Draw text with a solid background box for video readability."""
+        left, top, right, bottom = draw.textbbox(xy, text, font=font)
+        draw.rectangle(
+            (left - padding, top - padding, right + padding, bottom + padding),
+            fill=box_fill,
+        )
+        draw.text(xy, text, fill=text_fill, font=font)
+        return bottom + padding
+
+    @staticmethod
+    def _wrap_overlay_text(draw, text: str, font, max_width: int) -> list[str]:
+        """Wrap text by rendered width so long instructions stay readable."""
+        lines: list[str] = []
+        paragraphs = text.replace("\r", "\n").split("\n")
+        for paragraph in paragraphs:
+            words = paragraph.split()
+            if not words:
+                continue
+            line = words[0]
+            for word in words[1:]:
+                candidate = f"{line} {word}"
+                if draw.textlength(candidate, font=font) <= max_width:
+                    line = candidate
+                else:
+                    lines.append(line)
+                    line = word
+            lines.append(line)
+        return lines
+
+    @staticmethod
+    def _draw_overlay(img, goal_dist, instruction, step, pos_str: str | None = None):
+        """Draw readable distance, step, position, and instruction overlays."""
+        from PIL import ImageDraw
+
+        draw = ImageDraw.Draw(img)
+        font, font_sm = VlnVlmLocomotionPolicy._get_overlay_fonts(img)
+        margin = max(10, img.width // 64)
+        padding = max(6, img.width // 170)
+        gap = max(8, img.width // 128)
+
+        # Top-left status stack
+        dist_text = f"Dist: {goal_dist:.2f}m" if goal_dist is not None else "Dist: --"
+        y = margin
+        y = VlnVlmLocomotionPolicy._draw_text_box(
+            draw, (margin, y), dist_text, font, (255, 80, 80), padding=padding
+        ) + gap
+        y = VlnVlmLocomotionPolicy._draw_text_box(
+            draw, (margin, y), f"Step: {step}", font_sm, (255, 255, 255), padding=padding
+        ) + gap
+        if pos_str is not None:
+            VlnVlmLocomotionPolicy._draw_text_box(
+                draw, (margin, y), f"Pos: {pos_str}", font_sm, (255, 255, 255), padding=padding
+            )
+
+        # Bottom instruction panel
+        if instruction:
+            max_width = img.width - 2 * margin
+            lines = VlnVlmLocomotionPolicy._wrap_overlay_text(draw, instruction, font_sm, max_width - 2 * padding)
+            lines = lines[:4]
+            line_boxes = [draw.textbbox((0, 0), line, font=font_sm) for line in lines]
+            line_height = max((box[3] - box[1] for box in line_boxes), default=font_sm.size) + gap // 2
+            panel_height = padding * 2 + line_height * len(lines)
+            panel_top = img.height - margin - panel_height
+            draw.rectangle(
+                (margin - padding, panel_top, img.width - margin + padding, img.height - margin + padding),
+                fill=(0, 0, 0),
+            )
+            text_y = panel_top + padding
+            for line in lines:
+                draw.text((margin, text_y), line, fill=(255, 110, 110), font=font_sm)
+                text_y += line_height
+
+    def _save_frame_to_dir(self, observation, unwrapped) -> None:
+        """Save camera frames to the configured save directory."""
+        try:
+            cam_dict = self._get_camera_obs_dict(observation, unwrapped)
+            if cam_dict is None:
+                return
+            goal_dist = self._get_distance_to_goal(unwrapped)
+            instruction = self._current_instruction or ""
+            try:
+                pos = unwrapped.scene["robot"].data.root_pos_w[0].cpu().numpy()
+                pos_str = f"[{pos[0]:.1f},{pos[1]:.1f},{pos[2]:.2f}]"
+            except Exception:
+                pos_str = "?"
+            ep_id = 0
+            ep_ids = getattr(unwrapped, "extras", {}).get("current_episode_id")
+            if ep_ids is not None:
+                ep_id = ep_ids[0]
+            for cam_name, cam_data in cam_dict.items():
+                if "rgb" not in cam_name:
+                    continue
+                img_tensor = cam_data
+                if hasattr(img_tensor, 'cpu'):
+                    img_np = img_tensor[0].cpu().numpy() if img_tensor.ndim == 4 else img_tensor.cpu().numpy()
+                else:
+                    img_np = np.asarray(img_tensor)
+                    if img_np.ndim == 4:
+                        img_np = img_np[0]
+                if img_np.dtype in (np.float32, np.float64):
+                    img_np = np.clip(img_np * 255.0 if img_np.max() <= 1.5 else img_np, 0, 255).astype(np.uint8)
+                else:
+                    img_np = img_np.astype(np.uint8)
+                img = PILImage.fromarray(img_np)
+                self._draw_overlay(img, goal_dist, instruction, self._step_count, pos_str=pos_str)
+                tag = "head" if "head" in cam_name else "follow"
+                path = os.path.join(self._save_dir, f"ep{ep_id}_{tag}_{self._step_count:06d}.png")
+                img.save(path)
+        except Exception as e:
+            print(f"[VlnPolicy] frame save error: {e}")
+
+    def _get_camera_obs_dict(self, observation, unwrapped) -> dict[str, Any] | None:
+        """Return a camera_obs dict from either the step observation or the env."""
+        if isinstance(observation, dict):
+            cam_dict = observation.get("camera_obs")
+            if isinstance(cam_dict, dict):
+                return cam_dict
+
+        obs_manager = getattr(unwrapped, "observation_manager", None)
+        if obs_manager is None:
+            return None
+
+        try:
+            cam_dict = obs_manager.compute_group("camera_obs", update_history=False)
+        except Exception:
+            return None
+        return cam_dict if isinstance(cam_dict, dict) else None
+
+    def _should_accept_vlm_stop(self, goal_dist: float | None) -> tuple[bool, str]:
+        """Return whether a zero-duration stop should terminate the episode."""
+        reasons: list[str] = []
+        if self._ignore_vlm_stop:
+            reasons.append("ignore_vlm_stop")
+        if (
+            self._min_vlm_stop_distance >= 0.0
+            and goal_dist is not None
+            and goal_dist > self._min_vlm_stop_distance
+        ):
+            reasons.append(
+                f"goal_dist={goal_dist:.2f} > min_vlm_stop_distance={self._min_vlm_stop_distance:.2f}"
+            )
+        return (not reasons, ", ".join(reasons) if reasons else "accepted")
+
+    def reset(self, env_ids: torch.Tensor | None = None) -> None:
+        """Reset VLM scheduling state and notify server."""
+        super().reset(env_ids)
+        self._step_count = 0
+        self._target_step = 0
+        self._last_vel_cmd[:] = 0.0
+        self._current_instruction = None
+
+    def set_task_description(self, task_description: str | None) -> str:
+        """Forward task description to the VLM server."""
+        self.task_description = task_description
+        if task_description is not None:
+            self.remote_client.call_endpoint(
+                "set_task_description",
+                data={"task_description": task_description},
+                requires_input=True,
+            )
+        return self.task_description or ""
+
+    # ------------------------------------------------------------------ #
+    # Low-level policy loading                                            #
+    # ------------------------------------------------------------------ #
+
+    def _load_low_level_policy(self, env) -> None:
+        """Load the RSL-RL locomotion policy and do warmup."""
+        unwrapped = env.unwrapped if hasattr(env, "unwrapped") else env
+
+        if isinstance(env, RslRlVecEnvWrapper):
+            vec_env = env
+        else:
+            vec_env = RslRlVecEnvWrapper(unwrapped)
+        self._ll_vec_env = vec_env
+
+        agent_cfg_dict = load_yaml(self._ll_agent_cfg)
+        device = agent_cfg_dict.get("device", "cuda")
+
+        runner = OnPolicyRunner(vec_env, agent_cfg_dict, log_dir=None, device=device)
+        runner.load(self._ll_checkpoint_path)
+        self._ll_policy = runner.get_inference_policy(device=vec_env.unwrapped.device)
+
+        # Warmup: do forward-only dry runs so we don't mutate the first episode
+        # before metrics/recording start. The previous implementation stepped
+        # the environment here, which polluted path-length metrics and could
+        # physically bump the robot into nearby geometry before the first VLM
+        # query was issued.
+        self._ll_obs_td = vec_env.get_observations()
+        zero_cmd = torch.zeros(self.action_dim, device=self._device)
+        i, j = self._vel_cmd_indices
+
+        print(f"[VlnPolicy] Warming up ({self._warmup_steps} steps)...")
+        for step in range(self._warmup_steps):
+            self._ll_obs_td["policy"][:, i:j] = zero_cmd
+            with torch.inference_mode():
+                _ = self._ll_policy(self._ll_obs_td)
+        print("[VlnPolicy] Warmup complete.")
+
+    # ------------------------------------------------------------------ #
+    # Distance helpers                                                    #
+    # ------------------------------------------------------------------ #
+
+    def _get_distance_to_goal(self, unwrapped) -> float | None:
+        """Return Euclidean distance from robot to goal, or None."""
+        try:
+            pos = unwrapped.scene["robot"].data.root_pos_w[0].cpu().numpy()
+            goal = unwrapped.extras.get("current_goal_pos")
+            if goal is None:
+                return None
+            goal_np = np.asarray(goal)
+            if goal_np.ndim == 2:
+                goal_np = goal_np[0]
+            return float(np.linalg.norm(pos - goal_np))
+        except Exception:
+            return None
+
+    # ------------------------------------------------------------------ #
+    # Instruction tracking                                                #
+    # ------------------------------------------------------------------ #
+
+    def _check_instruction_update(self, unwrapped) -> None:
+        """Detect per-episode instruction changes from env.extras."""
+        extras = getattr(unwrapped, "extras", {})
+        instruction = extras.get("current_instruction")
+        if instruction is None:
+            return
+        if isinstance(instruction, list):
+            instruction = instruction[0]
+
+        if instruction != self._current_instruction:
+            self._current_instruction = instruction
+            self.remote_client.call_endpoint(
+                "set_task_description",
+                data={"task_description": instruction},
+                requires_input=True,
+            )
+            self._step_count = 0
+            self._target_step = 0
+            self._last_vel_cmd[:] = 0.0
+
+    # ------------------------------------------------------------------ #
+    # CLI helpers                                                         #
+    # ------------------------------------------------------------------ #
+
+    @staticmethod
+    def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
+        parser = ClientSidePolicy.add_remote_args_to_parser(parser)
+
+        ll = parser.add_argument_group("Low-Level Locomotion Policy")
+        ll.add_argument(
+            "--ll_checkpoint_path", type=str, required=True,
+            help="Path to the RSL-RL checkpoint (e.g. model_0.pt).",
+        )
+        ll.add_argument(
+            "--ll_agent_cfg", type=str, required=True,
+            help="Path to the RSL-RL agent config YAML.",
+        )
+        ll.add_argument(
+            "--warmup_steps", type=int, default=200,
+            help="Low-level policy warmup steps (default: 200).",
+        )
+        ll.add_argument(
+            "--policy_device", type=str, default="cuda",
+            help="Device for policy inference (default: cuda).",
+        )
+        ll.add_argument(
+            "--debug_vln", action="store_true", default=False,
+            help="Enable VLN debug logging and camera frame saving (default: off).",
+        )
+        ll.add_argument(
+            "--save_interval", type=int, default=0,
+            help="Save camera frames every N steps to --save_dir (0=disabled).",
+        )
+        ll.add_argument(
+            "--save_dir", type=str, default="",
+            help="Directory for saved camera frames.",
+        )
+        ll.add_argument(
+            "--ignore_vlm_stop",
+            action="store_true",
+            default=False,
+            help="Diagnostic mode: never let VLM STOP terminate the episode.",
+        )
+        ll.add_argument(
+            "--min_vlm_stop_distance",
+            type=float,
+            default=-1.0,
+            help="Diagnostic mode: ignore STOP while distance-to-goal is above this threshold; negative disables.",
+        )
+        return parser
+
+    @staticmethod
+    def from_args(args: argparse.Namespace) -> VlnVlmLocomotionPolicy:
+        remote_config = ClientSidePolicy.build_remote_config_from_args(args)
+        return VlnVlmLocomotionPolicy(
+            remote_config=remote_config,
+            ll_checkpoint_path=args.ll_checkpoint_path,
+            ll_agent_cfg=args.ll_agent_cfg,
+            device=getattr(args, "policy_device", "cuda"),
+            warmup_steps=getattr(args, "warmup_steps", 200),
+            debug=getattr(args, "debug_vln", False),
+            save_interval=getattr(args, "save_interval", 0),
+            save_dir=getattr(args, "save_dir", ""),
+            ignore_vlm_stop=getattr(args, "ignore_vlm_stop", False),
+            min_vlm_stop_distance=getattr(args, "min_vlm_stop_distance", -1.0),
+        )
diff --git a/isaaclab_arena/remote_policy/action_protocol.py b/isaaclab_arena/remote_policy/action_protocol.py
index 5d4df2203..d6faf8b6a 100644
--- a/isaaclab_arena/remote_policy/action_protocol.py
+++ b/isaaclab_arena/remote_policy/action_protocol.py
@@ -14,11 +14,17 @@
 class ActionMode(str, Enum):
     """Action output mode of a policy.
 
-    Currently only CHUNK is used.
-    Other modes can be added later if needed.
+    Attributes:
+        CHUNK: Action chunking mode (e.g. GR00T).  The server returns a
+            sequence of ``action_horizon`` actions; the client consumes
+            ``action_chunk_length`` at a time.
+        VLN_VELOCITY: VLN velocity-command mode.  The server returns a
+            single velocity command ``[vx, vy, yaw_rate]`` with a
+            ``duration`` (seconds) to hold it before re-querying.
     """
 
     CHUNK = "chunk"
+    VLN_VELOCITY = "vln_velocity"
 
 
 @dataclass
@@ -92,3 +98,36 @@ def to_dict(self) -> dict[str, Any]:
             "action_chunk_length": self.action_chunk_length,
             "action_horizon": self.action_horizon,
         }
+
+
+@dataclass
+class VlnVelocityActionProtocol(ActionProtocol):
+    """ActionProtocol for VLN_VELOCITY mode.
+
+    The server returns a single velocity command ``[vx, vy, yaw_rate]`` and
+    a ``duration`` (seconds) for which the client should hold the command
+    before re-querying the server.
+
+    default_duration:
+        Fallback duration when the server response does not include one.
+    """
+
+    MODE: ClassVar[ActionMode] = ActionMode.VLN_VELOCITY
+
+    default_duration: float = 0.5
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> VlnVelocityActionProtocol:
+        return cls(
+            action_dim=int(data["action_dim"]),
+            observation_keys=list(data["observation_keys"]),
+            default_duration=float(data.get("default_duration", 0.5)),
+        )
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "action_mode": self.mode.value,
+            "action_dim": self.action_dim,
+            "observation_keys": self.observation_keys,
+            "default_duration": self.default_duration,
+        }
diff --git a/isaaclab_arena/scripts/assets/build_matterport_collision_layers.py b/isaaclab_arena/scripts/assets/build_matterport_collision_layers.py
new file mode 100644
index 000000000..4aa03d81c
--- /dev/null
+++ b/isaaclab_arena/scripts/assets/build_matterport_collision_layers.py
@@ -0,0 +1,340 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Build collision-only USDA layers from a cleaned Matterport OBJ mesh.
+
+This script is intended for the recommended collision workflow:
+
+1. Start from a raw Matterport / MP3D mesh, not the already-converted visual USD.
+2. Export or convert that raw mesh to a triangulated OBJ.
+3. Optionally clean / simplify the OBJ in a mesh tool.
+4. Run this script to emit floor-only, obstacle-only, and/or combined collision USDA assets.
+
+The floor split is heuristic and tuned for single-floor indoor scenes:
+  - triangle centroid z must lie within ``[floor_min_z, floor_max_z]``
+  - triangle normal must point mostly upward
+  - tiny triangles can be filtered out via ``min_face_area``
+
+For multi-floor scenes or heavily tilted geometry, treat this as a starting point
+and adjust thresholds or manually clean the exported mesh.
+"""
+
+from __future__ import annotations
+
+import argparse
+import math
+from pathlib import Path
+
+
+def _parse_obj(obj_path: Path) -> tuple[list[tuple[float, float, float]], list[tuple[int, int, int]]]:
+    """Parse a triangulated OBJ file into vertices and triangle faces."""
+    vertices: list[tuple[float, float, float]] = []
+    faces: list[tuple[int, int, int]] = []
+
+    with obj_path.open("r", encoding="utf-8", errors="ignore") as file:
+        for line in file:
+            if line.startswith("v "):
+                _, x_str, y_str, z_str = line.strip().split()[:4]
+                vertices.append((float(x_str), float(y_str), float(z_str)))
+            elif line.startswith("f "):
+                tokens = line.strip().split()[1:]
+                face: list[int] = []
+                for token in tokens:
+                    vertex_index_str = token.split("/")[0]
+                    if not vertex_index_str:
+                        continue
+                    vertex_index = int(vertex_index_str)
+                    if vertex_index < 0:
+                        vertex_index = len(vertices) + vertex_index
+                    else:
+                        vertex_index -= 1
+                    face.append(vertex_index)
+                if len(face) < 3:
+                    continue
+                for i in range(1, len(face) - 1):
+                    faces.append((face[0], face[i], face[i + 1]))
+
+    if not vertices or not faces:
+        raise RuntimeError(f"OBJ parse produced no usable mesh data: {obj_path}")
+
+    return vertices, faces
+
+
+def _sub(a: tuple[float, float, float], b: tuple[float, float, float]) -> tuple[float, float, float]:
+    return (a[0] - b[0], a[1] - b[1], a[2] - b[2])
+
+
+def _cross(a: tuple[float, float, float], b: tuple[float, float, float]) -> tuple[float, float, float]:
+    return (
+        a[1] * b[2] - a[2] * b[1],
+        a[2] * b[0] - a[0] * b[2],
+        a[0] * b[1] - a[1] * b[0],
+    )
+
+
+def _norm(v: tuple[float, float, float]) -> float:
+    return math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2])
+
+
+def _triangle_area(
+    vertices: list[tuple[float, float, float]], face: tuple[int, int, int]
+) -> float:
+    p0, p1, p2 = (vertices[idx] for idx in face)
+    return 0.5 * _norm(_cross(_sub(p1, p0), _sub(p2, p0)))
+
+
+def _triangle_centroid_z(
+    vertices: list[tuple[float, float, float]], face: tuple[int, int, int]
+) -> float:
+    p0, p1, p2 = (vertices[idx] for idx in face)
+    return (p0[2] + p1[2] + p2[2]) / 3.0
+
+
+def _triangle_normal_z(
+    vertices: list[tuple[float, float, float]], face: tuple[int, int, int]
+) -> float:
+    p0, p1, p2 = (vertices[idx] for idx in face)
+    cross = _cross(_sub(p1, p0), _sub(p2, p0))
+    length = _norm(cross)
+    if length <= 1e-12:
+        return 0.0
+    return cross[2] / length
+
+
+def _split_floor_and_obstacles(
+    vertices: list[tuple[float, float, float]],
+    faces: list[tuple[int, int, int]],
+    *,
+    floor_min_z: float,
+    floor_max_z: float,
+    floor_normal_min_z: float,
+    min_face_area: float,
+) -> tuple[list[tuple[int, int, int]], list[tuple[int, int, int]]]:
+    """Split triangles into floor-like and obstacle-like sets."""
+    floor_faces: list[tuple[int, int, int]] = []
+    obstacle_faces: list[tuple[int, int, int]] = []
+
+    for face in faces:
+        area = _triangle_area(vertices, face)
+        if area < min_face_area:
+            continue
+
+        centroid_z = _triangle_centroid_z(vertices, face)
+        normal_z = _triangle_normal_z(vertices, face)
+        is_floor_like = (
+            floor_min_z <= centroid_z <= floor_max_z and normal_z >= floor_normal_min_z
+        )
+        if is_floor_like:
+            floor_faces.append(face)
+        else:
+            obstacle_faces.append(face)
+
+    return floor_faces, obstacle_faces
+
+
+def _compact_mesh(
+    vertices: list[tuple[float, float, float]], faces: list[tuple[int, int, int]]
+) -> tuple[list[tuple[float, float, float]], list[int], list[int]]:
+    """Compact a mesh selection into a new vertex array and face indices."""
+    used_indices: dict[int, int] = {}
+    compact_vertices: list[tuple[float, float, float]] = []
+    compact_face_indices: list[int] = []
+    compact_face_counts: list[int] = []
+
+    for face in faces:
+        compact_face_counts.append(len(face))
+        for old_idx in face:
+            if old_idx not in used_indices:
+                used_indices[old_idx] = len(compact_vertices)
+                compact_vertices.append(vertices[old_idx])
+            compact_face_indices.append(used_indices[old_idx])
+
+    return compact_vertices, compact_face_counts, compact_face_indices
+
+
+def _format_int_array(values: list[int]) -> str:
+    return "[" + ", ".join(str(value) for value in values) + "]"
+
+
+def _format_points(vertices: list[tuple[float, float, float]]) -> str:
+    return "[" + ", ".join(f"({x:.8f}, {y:.8f}, {z:.8f})" for x, y, z in vertices) + "]"
+
+
+def _write_usda(
+    usd_path: Path,
+    vertices: list[tuple[float, float, float]],
+    face_vertex_counts: list[int],
+    face_vertex_indices: list[int],
+    mesh_prim_path: str,
+) -> None:
+    """Write a simple USDA stage containing a single mesh."""
+    prim_parts = [part for part in mesh_prim_path.split("/") if part]
+    if len(prim_parts) < 2 or prim_parts[0] != "World":
+        raise RuntimeError("mesh_prim_path must start with /World and include a mesh name.")
+
+    mesh_name = prim_parts[-1]
+    xform_parts = prim_parts[:-1]
+
+    lines: list[str] = [
+        "#usda 1.0",
+        "(",
+        '    defaultPrim = "World"',
+        '    upAxis = "Z"',
+        ")",
+        "",
+    ]
+
+    indent = ""
+    for index, prim_name in enumerate(xform_parts):
+        lines.append(f'{indent}def Xform "{prim_name}"')
+        lines.append(f"{indent}" + "{")
+        indent += "    "
+        if index == len(xform_parts) - 1:
+            lines.append(f'{indent}def Mesh "{mesh_name}"')
+            lines.append(f"{indent}" + "{")
+            lines.append(f"{indent}    int[] faceVertexCounts = {_format_int_array(face_vertex_counts)}")
+            lines.append(f"{indent}    int[] faceVertexIndices = {_format_int_array(face_vertex_indices)}")
+            lines.append(f"{indent}    point3f[] points = {_format_points(vertices)}")
+            lines.append(f'{indent}    uniform token subdivisionScheme = "none"')
+            lines.append(f"{indent}" + "}")
+
+    for close_idx in range(len(xform_parts)):
+        close_indent = "    " * (len(xform_parts) - close_idx - 1)
+        lines.append(f"{close_indent}" + "}")
+
+    usd_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+
+def _write_selection(
+    *,
+    output_path: Path | None,
+    vertices: list[tuple[float, float, float]],
+    faces: list[tuple[int, int, int]],
+    mesh_prim_path: str,
+    label: str,
+) -> None:
+    if output_path is None:
+        return
+    if not faces:
+        print(f"[build_matterport_collision_layers] skipping {label}: no faces selected")
+        return
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    compact_vertices, face_vertex_counts, face_vertex_indices = _compact_mesh(vertices, faces)
+    _write_usda(
+        usd_path=output_path,
+        vertices=compact_vertices,
+        face_vertex_counts=face_vertex_counts,
+        face_vertex_indices=face_vertex_indices,
+        mesh_prim_path=mesh_prim_path,
+    )
+    print(
+        "[build_matterport_collision_layers] wrote "
+        f"{label} -> {output_path} "
+        f"(vertices={len(compact_vertices)} faces={len(face_vertex_counts)})"
+    )
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Build floor / obstacle / combined collision USDA files from a cleaned OBJ mesh."
+    )
+    parser.add_argument("--input_obj", type=Path, required=True, help="Input triangulated OBJ mesh path.")
+    parser.add_argument(
+        "--output_combined_usd",
+        type=Path,
+        default=None,
+        help="Optional path for a combined collision USDA output.",
+    )
+    parser.add_argument(
+        "--output_floor_usd",
+        type=Path,
+        default=None,
+        help="Optional path for a floor-only collision USDA output.",
+    )
+    parser.add_argument(
+        "--output_obstacle_usd",
+        type=Path,
+        default=None,
+        help="Optional path for an obstacle-only collision USDA output.",
+    )
+    parser.add_argument(
+        "--mesh_prim_prefix",
+        type=str,
+        default="/World/CollisionProxy",
+        help="Prefix used for generated mesh prims.",
+    )
+    parser.add_argument(
+        "--floor_min_z",
+        type=float,
+        default=-0.25,
+        help="Minimum triangle centroid z to be considered floor-like.",
+    )
+    parser.add_argument(
+        "--floor_max_z",
+        type=float,
+        default=0.35,
+        help="Maximum triangle centroid z to be considered floor-like.",
+    )
+    parser.add_argument(
+        "--floor_normal_min_z",
+        type=float,
+        default=0.9,
+        help="Minimum triangle normal z-component for floor classification.",
+    )
+    parser.add_argument(
+        "--min_face_area",
+        type=float,
+        default=1e-6,
+        help="Drop tiny triangles below this area threshold before classification.",
+    )
+    args = parser.parse_args()
+
+    if not (args.output_combined_usd or args.output_floor_usd or args.output_obstacle_usd):
+        raise RuntimeError("At least one output path must be provided.")
+
+    input_obj = args.input_obj.expanduser().resolve()
+    vertices, faces = _parse_obj(input_obj)
+    floor_faces, obstacle_faces = _split_floor_and_obstacles(
+        vertices,
+        faces,
+        floor_min_z=args.floor_min_z,
+        floor_max_z=args.floor_max_z,
+        floor_normal_min_z=args.floor_normal_min_z,
+        min_face_area=args.min_face_area,
+    )
+
+    print(
+        "[build_matterport_collision_layers] input="
+        f"{input_obj} faces={len(faces)} floor_faces={len(floor_faces)} obstacle_faces={len(obstacle_faces)}"
+    )
+
+    prim_prefix = args.mesh_prim_prefix.rstrip("/")
+    _write_selection(
+        output_path=args.output_combined_usd.expanduser().resolve() if args.output_combined_usd else None,
+        vertices=vertices,
+        faces=faces,
+        mesh_prim_path=f"{prim_prefix}/combined",
+        label="combined",
+    )
+    _write_selection(
+        output_path=args.output_floor_usd.expanduser().resolve() if args.output_floor_usd else None,
+        vertices=vertices,
+        faces=floor_faces,
+        mesh_prim_path=f"{prim_prefix}/floor",
+        label="floor",
+    )
+    _write_selection(
+        output_path=args.output_obstacle_usd.expanduser().resolve() if args.output_obstacle_usd else None,
+        vertices=vertices,
+        faces=obstacle_faces,
+        mesh_prim_path=f"{prim_prefix}/obstacle",
+        label="obstacle",
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/isaaclab_arena/scripts/assets/debug_matterport_collision_stage.py b/isaaclab_arena/scripts/assets/debug_matterport_collision_stage.py
new file mode 100644
index 000000000..d9eae1646
--- /dev/null
+++ b/isaaclab_arena/scripts/assets/debug_matterport_collision_stage.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Build a debug stage for inspecting Matterport collision setup in Isaac Sim.
+
+This script mirrors the runtime collision wiring used by
+`MatterportBackground` but is focused on inspection instead of evaluation.
+It can:
+
+1. Spawn the visual Matterport USD.
+2. Optionally enable explicit child-mesh colliders.
+3. Optionally attach legacy or split collision overlays.
+4. Make hidden collision overlays visible for viewport inspection.
+5. Save the resulting stage to USD so it can be reopened in Isaac Sim.
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from isaaclab.app import AppLauncher
+
+
+def _path_or_none(path: Path | None) -> str | None:
+    if path is None:
+        return None
+    return str(path.expanduser().resolve())
+
+
+parser = argparse.ArgumentParser(description="Create a debug USD stage for Matterport collision inspection.")
+parser.add_argument("--visual_usd_path", type=Path, required=True, help="Path to the visual Matterport USD.")
+parser.add_argument(
+    "--output_usd_path",
+    type=Path,
+    default=None,
+    help="Optional path to export the composed debug stage.",
+)
+parser.add_argument(
+    "--combined_collision_usd_path",
+    type=Path,
+    default=None,
+    help="Optional legacy combined collision overlay USD.",
+)
+parser.add_argument(
+    "--floor_collision_usd_path",
+    type=Path,
+    default=None,
+    help="Optional floor-only collision overlay USD.",
+)
+parser.add_argument(
+    "--obstacle_collision_usd_path",
+    type=Path,
+    default=None,
+    help="Optional obstacle-only collision overlay USD.",
+)
+parser.add_argument(
+    "--prim_path",
+    type=str,
+    default="/World/matterport",
+    help="Prim path where the Matterport scene will be spawned.",
+)
+parser.add_argument(
+    "--mesh_collider_type",
+    type=str,
+    default="triangle",
+    choices=("triangle", "convex_decomposition", "sdf"),
+    help="Approximation used for explicit child-mesh colliders and overlays.",
+)
+parser.add_argument(
+    "--enable_child_mesh_colliders",
+    action="store_true",
+    default=False,
+    help="Apply explicit collision schemas to descendant visual USD meshes.",
+)
+parser.add_argument(
+    "--disable_ground_plane",
+    action="store_true",
+    default=False,
+    help="Do not spawn the fallback invisible z=0 ground plane.",
+)
+parser.add_argument(
+    "--show_collision_prims",
+    action="store_true",
+    default=False,
+    help="Make hidden collision overlay prims visible after spawning them.",
+)
+parser.add_argument(
+    "--show_ground_plane",
+    action="store_true",
+    default=False,
+    help="Make the fallback ground plane visible if it is spawned.",
+)
+parser.add_argument(
+    "--flatten_output",
+    action="store_true",
+    default=False,
+    help="Export a flattened stage instead of only the root layer.",
+)
+parser.add_argument(
+    "--settle_frames",
+    type=int,
+    default=4,
+    help="Number of app update frames after spawning before export.",
+)
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+
+def _iter_descendant_prims(root_prim):
+    from pxr import Usd
+
+    for child in root_prim.GetFilteredChildren(Usd.TraverseInstanceProxies()):
+        yield child
+        yield from _iter_descendant_prims(child)
+
+
+def _count_meshes(stage, prim_path: str) -> int:
+    from pxr import UsdGeom
+
+    prim = stage.GetPrimAtPath(prim_path)
+    if not prim.IsValid():
+        return 0
+    return sum(1 for descendant in _iter_descendant_prims(prim) if descendant.IsA(UsdGeom.Mesh))
+
+
+def _set_visible(stage, prim_path: str) -> bool:
+    from pxr import UsdGeom
+
+    prim = stage.GetPrimAtPath(prim_path)
+    if not prim.IsValid():
+        return False
+    UsdGeom.Imageable(prim).MakeVisible()
+    return True
+
+
+def _overlay_prim_names() -> list[str]:
+    names: list[str] = []
+    if args_cli.combined_collision_usd_path is not None:
+        names.append("collision")
+    if args_cli.floor_collision_usd_path is not None:
+        names.append("collisionFloor")
+    if args_cli.obstacle_collision_usd_path is not None:
+        names.append("collisionObstacle")
+    return names
+
+
+def main() -> None:
+    import omni.usd
+    import isaaclab.sim as sim_utils
+
+    from isaaclab_arena.assets.matterport_background import _spawn_matterport_with_ground
+
+    if args_cli.combined_collision_usd_path and (
+        args_cli.floor_collision_usd_path or args_cli.obstacle_collision_usd_path
+    ):
+        raise ValueError("Use either the combined collision overlay or split floor/obstacle overlays, not both.")
+
+    visual_usd_path = str(args_cli.visual_usd_path.expanduser().resolve())
+    output_usd_path = args_cli.output_usd_path.expanduser().resolve() if args_cli.output_usd_path else None
+
+    usd_context = omni.usd.get_context()
+    usd_context.new_stage()
+    stage = usd_context.get_stage()
+    if stage is None:
+        raise RuntimeError("Failed to create a fresh USD stage.")
+
+    cfg = sim_utils.UsdFileCfg(usd_path=visual_usd_path)
+    cfg.ground_plane_z = None if args_cli.disable_ground_plane else 0.0
+    cfg.explicit_mesh_colliders = args_cli.enable_child_mesh_colliders
+    cfg.mesh_collider_approximation = args_cli.mesh_collider_type
+    cfg.collision_overlay_usd_path = _path_or_none(args_cli.combined_collision_usd_path)
+    cfg.floor_collision_usd_path = _path_or_none(args_cli.floor_collision_usd_path)
+    cfg.obstacle_collision_usd_path = _path_or_none(args_cli.obstacle_collision_usd_path)
+
+    _spawn_matterport_with_ground(args_cli.prim_path, cfg)
+
+    for _ in range(max(args_cli.settle_frames, 0)):
+        simulation_app.update()
+
+    visible_overlays: list[str] = []
+    if args_cli.show_collision_prims:
+        for overlay_name in _overlay_prim_names():
+            overlay_path = f"{args_cli.prim_path}/{overlay_name}"
+            if _set_visible(stage, overlay_path):
+                visible_overlays.append(overlay_path)
+
+    ground_plane_visible = False
+    if args_cli.show_ground_plane:
+        ground_plane_visible = _set_visible(stage, "/World/GroundPlane")
+
+    visual_mesh_count = _count_meshes(stage, args_cli.prim_path)
+    print(f"[debug_matterport_collision_stage] visual_root={args_cli.prim_path} meshes={visual_mesh_count}")
+    for overlay_name in _overlay_prim_names():
+        overlay_path = f"{args_cli.prim_path}/{overlay_name}"
+        print(f"[debug_matterport_collision_stage] overlay={overlay_path} meshes={_count_meshes(stage, overlay_path)}")
+
+    if visible_overlays:
+        print("[debug_matterport_collision_stage] visible_overlays=" + ", ".join(visible_overlays))
+    if ground_plane_visible:
+        print("[debug_matterport_collision_stage] ground_plane made visible at /World/GroundPlane")
+
+    if output_usd_path is not None:
+        output_usd_path.parent.mkdir(parents=True, exist_ok=True)
+        if args_cli.flatten_output:
+            stage.Export(str(output_usd_path))
+            export_mode = "flattened stage"
+        else:
+            stage.GetRootLayer().Export(str(output_usd_path))
+            export_mode = "root layer"
+        print(f"[debug_matterport_collision_stage] exported {export_mode} to {output_usd_path}")
+
+    if not args_cli.headless:
+        print("[debug_matterport_collision_stage] GUI mode active. Inspect the stage and close Isaac Sim when done.")
+        while simulation_app.is_running():
+            simulation_app.update()
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        simulation_app.close()
diff --git a/isaaclab_arena/scripts/assets/export_matterport_collision_proxy.py b/isaaclab_arena/scripts/assets/export_matterport_collision_proxy.py
new file mode 100644
index 000000000..47362212c
--- /dev/null
+++ b/isaaclab_arena/scripts/assets/export_matterport_collision_proxy.py
@@ -0,0 +1,178 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Export a scene asset into a mesh file for collision authoring workflows.
+
+This script runs inside Isaac Sim so it can use Omniverse's asset converter.
+The intended workflow is:
+
+1. Export a raw Matterport / MP3D mesh (preferred) or a visual USD fallback to OBJ.
+2. Simplify / clean the OBJ in an external mesh tool if needed.
+3. Convert the cleaned OBJ into one or more collision-only USDA files.
+4. Feed those USDA files to the Matterport collision overlay CLI flags.
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+from pathlib import Path
+
+from isaaclab.app import AppLauncher
+
+
+parser = argparse.ArgumentParser(
+    description="Export a scene asset (raw mesh or USD) to OBJ or USD via Omniverse asset converter."
+)
+parser.add_argument(
+    "--input_path",
+    type=Path,
+    required=True,
+    help="Input asset path (raw Matterport mesh, USD, or another asset-converter supported format).",
+)
+parser.add_argument("--output_path", type=Path, required=True, help="Output asset path (e.g. .obj or .usd).")
+parser.add_argument(
+    "--merge_all_meshes",
+    action="store_true",
+    default=False,
+    help="Request Omniverse asset converter to merge meshes when supported.",
+)
+parser.add_argument(
+    "--ignore_materials",
+    action="store_true",
+    default=False,
+    help="Drop materials during export when supported by the converter.",
+)
+parser.add_argument(
+    "--export_hidden_props",
+    action="store_true",
+    default=False,
+    help="Include hidden prims in the exported asset when supported.",
+)
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+try:
+    import omni.kit.asset_converter as asset_converter  # noqa: E402
+except ModuleNotFoundError:
+    asset_converter = None
+
+from pxr import Gf, Usd, UsdGeom  # noqa: E402
+
+
+async def _convert_asset(input_path: Path, output_path: Path) -> None:
+    """Convert an asset using Omniverse asset converter."""
+    input_path = input_path.expanduser().resolve()
+    output_path = output_path.expanduser().resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    converter = asset_converter.get_instance()
+    ctx = asset_converter.AssetConverterContext()
+    ctx.ignore_materials = args_cli.ignore_materials
+    ctx.export_hidden_props = args_cli.export_hidden_props
+    ctx.merge_all_meshes = args_cli.merge_all_meshes
+
+    def _progress_callback(progress: float, total_steps: float) -> bool:
+        print(f"[asset_converter] progress={progress:.3f} total_steps={total_steps}")
+        return True
+
+    task = converter.create_converter_task(
+        str(input_path),
+        str(output_path),
+        _progress_callback,
+        ctx,
+    )
+    success = await task.wait_until_finished()
+    if not success:
+        raise RuntimeError(
+            f"Asset conversion failed: status={task.get_status()} error={task.get_error_message()}"
+        )
+    print(f"[asset_converter] exported: {output_path}")
+
+
+def _iter_mesh_prims(stage: Usd.Stage):
+    """Yield mesh prims from a USD stage."""
+    for prim in stage.Traverse():
+        if prim.IsA(UsdGeom.Mesh):
+            yield prim
+
+
+def _transform_points(matrix: Gf.Matrix4d, points) -> list[tuple[float, float, float]]:
+    """Transform USD mesh points into world-space XYZ tuples."""
+    xyz_points: list[tuple[float, float, float]] = []
+    for point in points:
+        world = matrix.Transform(Gf.Vec3d(point[0], point[1], point[2]))
+        xyz_points.append((world[0], world[1], world[2]))
+    return xyz_points
+
+
+def _write_obj_from_usd(input_path: Path, output_path: Path) -> None:
+    """Fallback exporter: compose the USD stage and dump all meshes to OBJ."""
+    stage = Usd.Stage.Open(str(input_path))
+    if stage is None:
+        raise RuntimeError(f"Failed to open USD stage: {input_path}")
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    xform_cache = UsdGeom.XformCache()
+    vertex_offset = 1
+    mesh_count = 0
+
+    with output_path.open("w", encoding="utf-8") as obj_file:
+        obj_file.write(f"# Exported from USD: {input_path}\n")
+        for prim in _iter_mesh_prims(stage):
+            mesh = UsdGeom.Mesh(prim)
+            points = mesh.GetPointsAttr().Get()
+            face_counts = mesh.GetFaceVertexCountsAttr().Get()
+            face_indices = mesh.GetFaceVertexIndicesAttr().Get()
+            if not points or not face_counts or not face_indices:
+                continue
+
+            local_to_world = xform_cache.GetLocalToWorldTransform(prim)
+            world_points = _transform_points(local_to_world, points)
+            if not world_points:
+                continue
+
+            mesh_count += 1
+            obj_file.write(f"o mesh_{mesh_count}\n")
+            for x, y, z in world_points:
+                obj_file.write(f"v {x:.8f} {y:.8f} {z:.8f}\n")
+
+            cursor = 0
+            for face_count in face_counts:
+                face = [face_indices[cursor + i] + vertex_offset for i in range(face_count)]
+                cursor += face_count
+                if len(face) < 3:
+                    continue
+                # Fan triangulation for non-triangle faces.
+                for i in range(1, len(face) - 1):
+                    obj_file.write(f"f {face[0]} {face[i]} {face[i + 1]}\n")
+
+            vertex_offset += len(world_points)
+
+    if mesh_count == 0:
+        raise RuntimeError(f"No meshes found in USD stage: {input_path}")
+    print(f"[pxr_obj_export] exported {mesh_count} meshes to: {output_path}")
+
+
+def main() -> None:
+    try:
+        if asset_converter is not None:
+            asyncio.get_event_loop().run_until_complete(_convert_asset(args_cli.input_path, args_cli.output_path))
+        else:
+            if args_cli.output_path.suffix.lower() != ".obj":
+                raise RuntimeError(
+                    "Asset converter is unavailable in this Isaac Sim image. "
+                    "Fallback exporter currently supports only OBJ output."
+                )
+            _write_obj_from_usd(args_cli.input_path, args_cli.output_path)
+    finally:
+        simulation_app.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/isaaclab_arena/scripts/assets/filter_collision_usda.py b/isaaclab_arena/scripts/assets/filter_collision_usda.py
new file mode 100644
index 000000000..2a6f35b70
--- /dev/null
+++ b/isaaclab_arena/scripts/assets/filter_collision_usda.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+
+"""Filter simple collision USDA meshes and optionally add viewport colors."""
+
+from __future__ import annotations
+
+import argparse
+import math
+import re
+from collections import defaultdict, deque
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class MeshData:
+    mesh_name: str
+    vertices: list[tuple[float, float, float]]
+    faces: list[tuple[int, ...]]
+
+
+@dataclass
+class ComponentStats:
+    face_indices: list[int]
+    min_z: float
+    max_z: float
+    diag: float
+
+
+def _parse_usda_mesh(path: Path) -> MeshData:
+    text = path.read_text(encoding="utf-8")
+    mesh_name_match = re.search(r'def Mesh "([^"]+)"', text)
+    points_match = re.search(r"point3f\[\] points = \[(.*?)\]\n", text, re.S)
+    counts_match = re.search(r"int\[\] faceVertexCounts = \[(.*?)\]\n", text, re.S)
+    indices_match = re.search(r"int\[\] faceVertexIndices = \[(.*?)\]\n", text, re.S)
+    if not (mesh_name_match and points_match and counts_match and indices_match):
+        raise RuntimeError(f"Failed to parse simple collision USDA mesh: {path}")
+
+    vertices = [
+        tuple(map(float, match))
+        for match in re.findall(r"\(([-0-9.eE]+),\s*([-0-9.eE]+),\s*([-0-9.eE]+)\)", points_match.group(1))
+    ]
+    face_counts = [int(value) for value in re.findall(r"-?\d+", counts_match.group(1))]
+    face_indices_flat = [int(value) for value in re.findall(r"-?\d+", indices_match.group(1))]
+
+    faces: list[tuple[int, ...]] = []
+    cursor = 0
+    for face_count in face_counts:
+        faces.append(tuple(face_indices_flat[cursor : cursor + face_count]))
+        cursor += face_count
+
+    return MeshData(mesh_name=mesh_name_match.group(1), vertices=vertices, faces=faces)
+
+
+def _iter_connected_components(mesh: MeshData) -> list[ComponentStats]:
+    triangle_faces = [tuple(face) for face in mesh.faces if len(face) == 3]
+    vertex_to_faces: dict[int, list[int]] = defaultdict(list)
+    for face_index, face in enumerate(triangle_faces):
+        for vertex_index in face:
+            vertex_to_faces[vertex_index].append(face_index)
+
+    seen = [False] * len(triangle_faces)
+    components: list[ComponentStats] = []
+    for start_index in range(len(triangle_faces)):
+        if seen[start_index]:
+            continue
+
+        queue = deque([start_index])
+        seen[start_index] = True
+        component_faces: list[int] = []
+        component_vertices: set[int] = set()
+        while queue:
+            face_index = queue.popleft()
+            component_faces.append(face_index)
+            for vertex_index in triangle_faces[face_index]:
+                component_vertices.add(vertex_index)
+                for neighbor_face_index in vertex_to_faces[vertex_index]:
+                    if not seen[neighbor_face_index]:
+                        seen[neighbor_face_index] = True
+                        queue.append(neighbor_face_index)
+
+        xs = [mesh.vertices[index][0] for index in component_vertices]
+        ys = [mesh.vertices[index][1] for index in component_vertices]
+        zs = [mesh.vertices[index][2] for index in component_vertices]
+        dx = max(xs) - min(xs)
+        dy = max(ys) - min(ys)
+        dz = max(zs) - min(zs)
+        components.append(
+            ComponentStats(
+                face_indices=component_faces,
+                min_z=min(zs),
+                max_z=max(zs),
+                diag=math.sqrt(dx * dx + dy * dy + dz * dz),
+            )
+        )
+
+    return components
+
+
+def _should_remove_ground_component(component: ComponentStats, args: argparse.Namespace) -> bool:
+    return (
+        args.ground_max_faces > 0
+        and len(component.face_indices) <= args.ground_max_faces
+        and component.max_z <= args.ground_max_top_z
+        and component.diag <= args.ground_max_diag
+    )
+
+
+def _should_remove_floating_component(component: ComponentStats, args: argparse.Namespace) -> bool:
+    return (
+        args.floating_max_faces > 0
+        and len(component.face_indices) <= args.floating_max_faces
+        and component.min_z >= args.floating_min_bottom_z
+        and (args.floating_max_top_z <= 0 or component.max_z <= args.floating_max_top_z)
+        and component.diag <= args.floating_max_diag
+    )
+
+
+def _compact_mesh(
+    vertices: list[tuple[float, float, float]], faces: list[tuple[int, ...]]
+) -> tuple[list[tuple[float, float, float]], list[int], list[int]]:
+    used_indices: dict[int, int] = {}
+    compact_vertices: list[tuple[float, float, float]] = []
+    compact_face_counts: list[int] = []
+    compact_face_indices: list[int] = []
+
+    for face in faces:
+        compact_face_counts.append(len(face))
+        for old_index in face:
+            if old_index not in used_indices:
+                used_indices[old_index] = len(compact_vertices)
+                compact_vertices.append(vertices[old_index])
+            compact_face_indices.append(used_indices[old_index])
+
+    return compact_vertices, compact_face_counts, compact_face_indices
+
+
+def _format_points(vertices: list[tuple[float, float, float]]) -> str:
+    return "[" + ", ".join(f"({x:.8f}, {y:.8f}, {z:.8f})" for x, y, z in vertices) + "]"
+
+
+def _format_int_array(values: list[int]) -> str:
+    return "[" + ", ".join(str(value) for value in values) + "]"
+
+
+def _build_output_text(
+    mesh_name: str,
+    vertices: list[tuple[float, float, float]],
+    face_counts: list[int],
+    face_indices: list[int],
+    display_color: tuple[float, float, float] | None,
+    display_opacity: float | None,
+) -> str:
+    lines = [
+        "#usda 1.0",
+        "(",
+        '    defaultPrim = "World"',
+        '    upAxis = "Z"',
+        ")",
+        "",
+        'def Xform "World"',
+        "{",
+        '    def Xform "CollisionProxy"',
+        "    {",
+        f'        def Mesh "{mesh_name}"',
+        "        {",
+        f"            int[] faceVertexCounts = {_format_int_array(face_counts)}",
+        f"            int[] faceVertexIndices = {_format_int_array(face_indices)}",
+        f"            point3f[] points = {_format_points(vertices)}",
+    ]
+    if display_color is not None:
+        r, g, b = display_color
+        lines.append(f"            color3f[] primvars:displayColor = [({r:.4f}, {g:.4f}, {b:.4f})]")
+        lines.append('            uniform token primvars:displayColor:interpolation = "constant"')
+    if display_opacity is not None:
+        lines.append(f"            float[] primvars:displayOpacity = [{display_opacity:.4f}]")
+        lines.append('            uniform token primvars:displayOpacity:interpolation = "constant"')
+    lines.extend(
+        [
+            '            uniform token subdivisionScheme = "none"',
+            "        }",
+            "    }",
+            "}",
+            "",
+        ]
+    )
+    return "\n".join(lines)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Filter simple collision USDA meshes and optionally add display color.")
+    parser.add_argument("--input_usd", type=Path, required=True, help="Input USDA produced by the collision pipeline.")
+    parser.add_argument("--output_usd", type=Path, required=True, help="Output USDA path.")
+    parser.add_argument(
+        "--display_color",
+        type=float,
+        nargs=3,
+        default=None,
+        metavar=("R", "G", "B"),
+        help="Optional constant displayColor for viewport visualization.",
+    )
+    parser.add_argument(
+        "--display_opacity",
+        type=float,
+        default=None,
+        help="Optional constant displayOpacity for viewport visualization.",
+    )
+    parser.add_argument(
+        "--ground_max_faces",
+        type=int,
+        default=0,
+        help="Remove connected components at ground height with face counts at or below this value (0 disables).",
+    )
+    parser.add_argument(
+        "--ground_max_top_z",
+        type=float,
+        default=0.0,
+        help="Maximum component top z for ground-fragment removal.",
+    )
+    parser.add_argument(
+        "--ground_max_diag",
+        type=float,
+        default=0.0,
+        help="Maximum component diagonal length for ground-fragment removal.",
+    )
+    parser.add_argument(
+        "--floating_max_faces",
+        type=int,
+        default=0,
+        help="Remove floating connected components with face counts at or below this value (0 disables).",
+    )
+    parser.add_argument(
+        "--floating_min_bottom_z",
+        type=float,
+        default=0.0,
+        help="Minimum component bottom z for floating-fragment removal.",
+    )
+    parser.add_argument(
+        "--floating_max_diag",
+        type=float,
+        default=0.0,
+        help="Maximum component diagonal length for floating-fragment removal.",
+    )
+    parser.add_argument(
+        "--floating_max_top_z",
+        type=float,
+        default=0.0,
+        help="Optional maximum component top z for floating-fragment removal (0 disables).",
+    )
+    args = parser.parse_args()
+
+    input_usd = args.input_usd.expanduser().resolve()
+    output_usd = args.output_usd.expanduser().resolve()
+    mesh = _parse_usda_mesh(input_usd)
+    components = _iter_connected_components(mesh)
+
+    removed_face_indices: set[int] = set()
+    ground_removed = 0
+    floating_removed = 0
+    for component in components:
+        if _should_remove_ground_component(component, args):
+            removed_face_indices.update(component.face_indices)
+            ground_removed += 1
+            continue
+        if _should_remove_floating_component(component, args):
+            removed_face_indices.update(component.face_indices)
+            floating_removed += 1
+
+    triangle_faces = [tuple(face) for face in mesh.faces if len(face) == 3]
+    kept_faces = [triangle_faces[index] for index in range(len(triangle_faces)) if index not in removed_face_indices]
+    compact_vertices, face_counts, face_indices = _compact_mesh(mesh.vertices, kept_faces)
+    output_usd.parent.mkdir(parents=True, exist_ok=True)
+    output_usd.write_text(
+        _build_output_text(
+            mesh_name=mesh.mesh_name,
+            vertices=compact_vertices,
+            face_counts=face_counts,
+            face_indices=face_indices,
+            display_color=tuple(args.display_color) if args.display_color is not None else None,
+            display_opacity=args.display_opacity,
+        ),
+        encoding="utf-8",
+    )
+
+    print(
+        "[filter_collision_usda] "
+        f"input={input_usd} output={output_usd} "
+        f"kept_faces={len(face_counts)} removed_faces={len(removed_face_indices)} "
+        f"removed_ground_components={ground_removed} removed_floating_components={floating_removed}"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/isaaclab_arena/scripts/assets/obj_to_usd_mesh.py b/isaaclab_arena/scripts/assets/obj_to_usd_mesh.py
new file mode 100644
index 000000000..4417f1b8f
--- /dev/null
+++ b/isaaclab_arena/scripts/assets/obj_to_usd_mesh.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Convert a triangulated OBJ mesh into a simple USD mesh stage.
+
+This utility is intended for collision-proxy workflows where we already have an
+OBJ export from the Matterport visual USD and want a lightweight USD mesh back
+for ``--matterport_collision_usd_path``.
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+def _parse_obj(obj_path: Path) -> tuple[list[tuple[float, float, float]], list[int], list[int]]:
+    """Parse vertex and face data from a simple OBJ file."""
+    vertices: list[tuple[float, float, float]] = []
+    face_vertex_counts: list[int] = []
+    face_vertex_indices: list[int] = []
+
+    with obj_path.open("r", encoding="utf-8", errors="ignore") as file:
+        for line in file:
+            if line.startswith("v "):
+                _, x_str, y_str, z_str = line.strip().split()[:4]
+                vertices.append((float(x_str), float(y_str), float(z_str)))
+            elif line.startswith("f "):
+                tokens = line.strip().split()[1:]
+                face: list[int] = []
+                for token in tokens:
+                    vertex_index_str = token.split("/")[0]
+                    if not vertex_index_str:
+                        continue
+                    vertex_index = int(vertex_index_str)
+                    if vertex_index < 0:
+                        vertex_index = len(vertices) + vertex_index
+                    else:
+                        vertex_index -= 1
+                    face.append(vertex_index)
+                if len(face) < 3:
+                    continue
+                face_vertex_counts.append(len(face))
+                face_vertex_indices.extend(face)
+
+    if not vertices or not face_vertex_counts:
+        raise RuntimeError(f"OBJ parse produced no usable mesh data: {obj_path}")
+
+    return vertices, face_vertex_counts, face_vertex_indices
+
+
+def _format_int_array(values: list[int]) -> str:
+    """Format a USD int array."""
+    return "[" + ", ".join(str(value) for value in values) + "]"
+
+
+def _format_points(vertices: list[tuple[float, float, float]]) -> str:
+    """Format a USD point3f array."""
+    return "[" + ", ".join(f"({x:.8f}, {y:.8f}, {z:.8f})" for x, y, z in vertices) + "]"
+
+
+def _write_usda(
+    usd_path: Path,
+    vertices: list[tuple[float, float, float]],
+    face_vertex_counts: list[int],
+    face_vertex_indices: list[int],
+    mesh_prim_path: str,
+) -> None:
+    """Write a simple USDA stage containing a single mesh."""
+    prim_parts = [part for part in mesh_prim_path.split("/") if part]
+    if len(prim_parts) < 2 or prim_parts[0] != "World":
+        raise RuntimeError("mesh_prim_path must start with /World and include a mesh name.")
+
+    mesh_name = prim_parts[-1]
+    xform_parts = prim_parts[:-1]
+
+    lines: list[str] = [
+        "#usda 1.0",
+        "(",
+        '    defaultPrim = "World"',
+        '    upAxis = "Z"',
+        ")",
+        "",
+    ]
+
+    indent = ""
+    for index, prim_name in enumerate(xform_parts):
+        lines.append(f'{indent}def Xform "{prim_name}"')
+        lines.append(f"{indent}" + "{")
+        indent += "    "
+        if index == len(xform_parts) - 1:
+            lines.append(f'{indent}def Mesh "{mesh_name}"')
+            lines.append(f"{indent}" + "{")
+            lines.append(f"{indent}    int[] faceVertexCounts = {_format_int_array(face_vertex_counts)}")
+            lines.append(f"{indent}    int[] faceVertexIndices = {_format_int_array(face_vertex_indices)}")
+            lines.append(f"{indent}    point3f[] points = {_format_points(vertices)}")
+            lines.append(f'{indent}    uniform token subdivisionScheme = "none"')
+            lines.append(f"{indent}" + "}")
+
+    for close_idx in range(len(xform_parts)):
+        close_indent = "    " * (len(xform_parts) - close_idx - 1)
+        lines.append(f"{close_indent}" + "}")
+
+    usd_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Convert OBJ mesh to a simple USD mesh.")
+    parser.add_argument("--input_obj", type=Path, required=True, help="Input OBJ mesh path.")
+    parser.add_argument("--output_usd", type=Path, required=True, help="Output USD path.")
+    parser.add_argument(
+        "--mesh_prim_path",
+        type=str,
+        default="/World/CollisionProxy/mesh",
+        help="Prim path for the generated mesh.",
+    )
+    args = parser.parse_args()
+
+    input_obj = args.input_obj.expanduser().resolve()
+    output_usd = args.output_usd.expanduser().resolve()
+    output_usd.parent.mkdir(parents=True, exist_ok=True)
+
+    vertices, face_vertex_counts, face_vertex_indices = _parse_obj(input_obj)
+    _write_usda(
+        usd_path=output_usd,
+        vertices=vertices,
+        face_vertex_counts=face_vertex_counts,
+        face_vertex_indices=face_vertex_indices,
+        mesh_prim_path=args.mesh_prim_path,
+    )
+    print(
+        f"[obj_to_usd_mesh] wrote {output_usd} "
+        f"(vertices={len(vertices)} faces={len(face_vertex_counts)})"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/isaaclab_arena/scripts/assets/open_usd_stage.py b/isaaclab_arena/scripts/assets/open_usd_stage.py
new file mode 100644
index 000000000..265dc37e1
--- /dev/null
+++ b/isaaclab_arena/scripts/assets/open_usd_stage.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Open a USD stage in Isaac Sim and keep the app alive for inspection."""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from isaaclab.app import AppLauncher
+
+
+parser = argparse.ArgumentParser(description="Open a USD stage in Isaac Sim for manual inspection.")
+parser.add_argument("--usd_path", type=Path, required=True, help="USD or USDA file to open in Isaac Sim.")
+parser.add_argument(
+    "--wait_frames",
+    type=int,
+    default=4,
+    help="Number of app update frames after opening the stage before reporting success.",
+)
+AppLauncher.add_app_launcher_args(parser)
+args_cli = parser.parse_args()
+
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+
+def main() -> None:
+    import omni.usd
+
+    usd_path = args_cli.usd_path.expanduser().resolve()
+    if not usd_path.exists():
+        raise FileNotFoundError(f"USD stage does not exist: {usd_path}")
+
+    usd_context = omni.usd.get_context()
+    opened = usd_context.open_stage(str(usd_path))
+    if not opened:
+        raise RuntimeError(f"Failed to open stage: {usd_path}")
+
+    for _ in range(max(args_cli.wait_frames, 0)):
+        simulation_app.update()
+
+    stage = usd_context.get_stage()
+    default_prim = stage.GetDefaultPrim().GetPath().pathString if stage and stage.GetDefaultPrim() else "<none>"
+    print(f"[open_usd_stage] opened={usd_path}")
+    print(f"[open_usd_stage] default_prim={default_prim}")
+
+    if not args_cli.headless:
+        print("[open_usd_stage] GUI mode active. Inspect the stage and close Isaac Sim when done.")
+        while simulation_app.is_running():
+            simulation_app.update()
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        simulation_app.close()
diff --git a/isaaclab_arena/scripts/reinforcement_learning/train.py b/isaaclab_arena/scripts/reinforcement_learning/train.py
index a7ad52391..4e5a0c72a 100644
--- a/isaaclab_arena/scripts/reinforcement_learning/train.py
+++ b/isaaclab_arena/scripts/reinforcement_learning/train.py
@@ -33,9 +33,6 @@
     default=Path("isaaclab_arena/policy/rl_policy/generic_policy.json"),
     help="Path to the RL agent configuration file.",
 )
-parser.add_argument(
-    "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
-)
 parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.")
 # append RSL-RL cli arguments
 cli_args.add_rsl_rl_args(parser)
diff --git a/isaaclab_arena/tasks/vln_r2r_matterport_task.py b/isaaclab_arena/tasks/vln_r2r_matterport_task.py
new file mode 100644
index 000000000..3a18489d0
--- /dev/null
+++ b/isaaclab_arena/tasks/vln_r2r_matterport_task.py
@@ -0,0 +1,541 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""VLN navigation task for IsaacLab Arena.
+
+This module provides:
+  - ``VlnEpisodeCfg``:  configuration for a single VLN episode.
+  - ``VlnBenchmarkCfg``: sequential episode sampler (supports multi-env).
+  - ``VlnNavTask``:      TaskBase implementation that wires episodes,
+                          terminations, events, and metrics together.
+  - Event / termination helpers that reset the robot pose and check goal
+    arrival at runtime.
+
+Multi-env support:
+  When ``num_envs > 1``, each environment gets its own episode, goal
+  position, and instruction.  Episode data is stored in ``env.extras``
+  with per-env arrays (e.g. ``current_goal_pos`` has shape ``[N, 3]``).
+  Note that each env also loads a full copy of the Matterport scene,
+  which may require significant GPU memory.
+"""
+
+from __future__ import annotations
+
+import gzip
+import json
+import os
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+
+import numpy as np
+import torch
+from dataclasses import MISSING
+
+import isaaclab.envs.mdp as mdp_isaac_lab
+from isaaclab.envs.common import ViewerCfg
+from isaaclab.managers import EventTermCfg, SceneEntityCfg, TerminationTermCfg
+from isaaclab.utils import configclass
+
+from isaaclab_arena.assets.asset import Asset
+from isaaclab_arena.environments.isaaclab_arena_manager_based_env import IsaacLabArenaManagerBasedRLEnvCfg
+from isaaclab_arena.metrics.metric_base import MetricBase
+from isaaclab_arena.tasks.task_base import TaskBase
+
+from isaaclab_arena.metrics.vln_metrics import (
+    DistanceToGoalMetric,
+    OracleSuccessMetric,
+    PathLengthMetric,
+    SPLMetric,
+    SuccessMetric,
+)
+
+
+# ========================================================================== #
+# Dataset loading                                                            #
+# ========================================================================== #
+
+
+def read_episodes(r2r_dataset_path: str) -> List[Dict[str, Any]]:
+    """Read R2R-style episodes from a gzipped JSON file.
+
+    Expected format::
+
+        {"episodes": [ {episode_dict}, ... ]}
+    """
+    with gzip.open(r2r_dataset_path, "rt", encoding="utf-8") as f:
+        data = json.load(f)
+    return data["episodes"]
+
+
+# ========================================================================== #
+# Episode configuration                                                      #
+# ========================================================================== #
+
+
+@configclass
+class VlnEpisodeCfg:
+    """Configuration for a single VLN episode."""
+
+    scene_id: str = MISSING
+    episode_id: int = MISSING
+    start_pos: Tuple[float, float, float] = MISSING
+    start_quat_wxyz: Tuple[float, float, float, float] = MISSING
+    goal_pos: Tuple[float, float, float] = MISSING
+    goal_radius: float = MISSING
+    geodesic_distance: float = MISSING
+    instruction_text: str = MISSING
+    reference_path: List[Tuple[float, float, float]] = MISSING
+
+
+@configclass
+class VlnBenchmarkCfg:
+    """Sequential episode sampler wrapping a list of :class:`VlnEpisodeCfg`.
+
+    Supports multi-env: ``sample_episodes(n)`` returns ``n`` episodes,
+    one per environment, advancing the internal index sequentially.
+    """
+
+    episodes: list[VlnEpisodeCfg] = MISSING
+    current_index: int = 0
+    robot_root_height_offset: float = 0.0
+
+    def sample_episode(self) -> VlnEpisodeCfg:
+        """Sample a single episode (for single-env or per-env calls)."""
+        ep = self.episodes[self.current_index]
+        self.current_index = (self.current_index + 1) % len(self.episodes)
+        return ep
+
+    def sample_episodes(self, n: int) -> list[VlnEpisodeCfg]:
+        """Sample ``n`` episodes sequentially (one per env)."""
+        return [self.sample_episode() for _ in range(n)]
+
+
+def build_vln_episode_from_raw(raw_ep: dict) -> VlnEpisodeCfg:
+    """Convert a raw R2R-style episode dict into :class:`VlnEpisodeCfg`."""
+    scene_id = os.path.splitext(os.path.basename(raw_ep["scene_id"]))[0]
+    start_pos = tuple(raw_ep["start_position"])
+    start_quat = tuple(raw_ep["start_rotation"])
+    goals = raw_ep.get("goals") or []
+    if goals:
+        goal_pos = tuple(goals[0]["position"])
+        goal_radius = float(goals[0].get("radius", 3.0))
+    else:
+        goal_pos = tuple(raw_ep["reference_path"][-1])
+        goal_radius = 3.0
+    geodesic_distance = float(raw_ep.get("info", {}).get("geodesic_distance", 0.0))
+    instruction_text = raw_ep["instruction"]["instruction_text"]
+    reference_path = [tuple(p) for p in raw_ep["reference_path"]]
+
+    return VlnEpisodeCfg(
+        scene_id=scene_id,
+        episode_id=int(raw_ep["episode_id"]),
+        start_pos=start_pos,
+        start_quat_wxyz=start_quat,
+        goal_pos=goal_pos,
+        goal_radius=goal_radius,
+        geodesic_distance=geodesic_distance,
+        instruction_text=instruction_text,
+        reference_path=reference_path,
+    )
+
+
+# ========================================================================== #
+# Termination helpers                                                        #
+# ========================================================================== #
+
+
+def _log_episode_end(env, reason: str, env_ids: torch.Tensor) -> None:
+    """Log distance-to-goal when an episode ends."""
+    if not env_ids.any():
+        return
+    try:
+        ids = env_ids.nonzero(as_tuple=False).flatten().tolist()
+        root_pos = env.scene["robot"].data.root_pos_w.cpu().numpy()
+        goal_pos = env.extras.get("current_goal_pos")
+        ep_ids = env.extras.get("current_episode_id")
+        for i in ids:
+            pos = root_pos[i]
+            goal_str = ""
+            if goal_pos is not None:
+                g = np.asarray(goal_pos[i])
+                dist = float(np.linalg.norm(pos - g))
+                goal_str = f" goal=[{g[0]:.1f},{g[1]:.1f},{g[2]:.2f}] dist={dist:.2f}"
+            ep_str = f" ep={ep_ids[i]}" if ep_ids is not None else ""
+            print(f"[VlnNavTask] EPISODE END ({reason}) env={i}{ep_str} robot=[{pos[0]:.1f},{pos[1]:.1f},{pos[2]:.2f}]{goal_str}")
+    except Exception:
+        pass
+
+
+def vln_stop_term(env) -> torch.Tensor:
+    """Termination term: True when the VLM policy signals STOP.
+
+    The policy sets ``env.extras["vln_stop_called"]`` (a bool tensor of
+    shape ``[N]``) when the VLM outputs a stop command (zero velocity
+    and zero duration).
+    """
+    flag = env.extras.get("vln_stop_called")
+    if flag is None:
+        return torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)
+    result = torch.as_tensor(flag, device=env.device, dtype=torch.bool)
+    if result.any():
+        _log_episode_end(env, "STOP", result)
+    return result
+
+
+def vln_stuck_term(env, velocity_threshold: float = 0.1, patience: int = 1000) -> torch.Tensor:
+    """Termination term: True when the robot has been stuck for too long.
+
+    A robot is considered stuck when its linear velocity magnitude stays
+    below ``velocity_threshold`` for ``patience`` consecutive steps.
+    Per-env counters are stored in ``env.extras["_vln_stuck_counter"]``.
+    """
+    num_envs = env.num_envs
+    device = env.device
+
+    root_vel = env.scene["robot"].data.root_vel_w[:num_envs, :3]  # [N, 3]
+    speed = torch.norm(root_vel, dim=-1)  # [N]
+
+    if "_vln_stuck_counter" not in env.extras:
+        env.extras["_vln_stuck_counter"] = torch.zeros(num_envs, dtype=torch.long, device=device)
+
+    counter = env.extras["_vln_stuck_counter"]
+    is_slow = speed < velocity_threshold
+    counter = torch.where(is_slow, counter + 1, torch.zeros_like(counter))
+    env.extras["_vln_stuck_counter"] = counter
+
+    stuck_mask = counter >= patience
+    if stuck_mask.any():
+        _log_episode_end(env, f"STUCK({patience}steps)", stuck_mask)
+    return stuck_mask
+
+
+def vln_success_term(env, position_tolerance: float = 0.3) -> torch.Tensor:
+    """Termination term: True when the robot is within *position_tolerance* of the goal.
+
+    Supports multi-env: ``env.extras["current_goal_pos"]`` should have
+    shape ``[N, 3]`` where ``N = num_envs``.
+    """
+    num_envs = env.num_envs
+    root_state = env.scene["robot"].data.root_state_w  # [N, 13]
+    goal_pos_np = env.extras.get("current_goal_pos")
+    if goal_pos_np is None:
+        return torch.zeros(num_envs, dtype=torch.bool, device=root_state.device)
+
+    goal_pos = torch.as_tensor(goal_pos_np, device=root_state.device, dtype=root_state.dtype)
+    # Ensure shape [N, 3]
+    if goal_pos.ndim == 1:
+        goal_pos = goal_pos.unsqueeze(0).expand(num_envs, -1)
+    elif goal_pos.shape[0] == 1 and num_envs > 1:
+        goal_pos = goal_pos.expand(num_envs, -1)
+
+    robot_pos = root_state[..., :3]
+    dist = torch.linalg.norm(robot_pos - goal_pos, dim=-1)
+    result = dist < position_tolerance
+    if result.any():
+        _log_episode_end(env, f"SUCCESS(d<{position_tolerance}m)", result)
+    return result
+
+
+def vln_disabled_success_term(env, position_tolerance: float = 0.0) -> torch.Tensor:
+    """Disabled proximity-success termination.
+
+    When following the embodied-navigation evaluation protocol from
+    Anderson et al., episodes should only be deemed successful if the
+    agent explicitly emits STOP/DONE. We therefore keep the termination
+    slot but disable auto-success by default.
+    """
+    _ = position_tolerance
+    return torch.zeros(env.num_envs, dtype=torch.bool, device=env.device)
+
+
+def vln_time_out_term(env) -> torch.Tensor:
+    """Wraps standard time_out to add EPISODE END logging."""
+    result = mdp_isaac_lab.time_out(env)
+    if result.any():
+        _log_episode_end(env, "TIME_OUT", result)
+    return result
+
+
+@configclass
+class VlnTerminationsCfg:
+    """Termination terms for the VLN task."""
+
+    time_out: TerminationTermCfg = TerminationTermCfg(func=vln_time_out_term, time_out=True)
+    success: TerminationTermCfg = MISSING
+    stop: TerminationTermCfg = TerminationTermCfg(func=vln_stop_term)
+    stuck: TerminationTermCfg = MISSING
+
+
+# ========================================================================== #
+# Event helpers (reset robot pose from dataset)                              #
+# ========================================================================== #
+
+
+def reset_robot_and_goal_from_vln_dataset(
+    env,
+    env_ids: torch.Tensor,
+    robot_cfg: SceneEntityCfg,
+    vln_cfg: VlnBenchmarkCfg,
+):
+    """Event callback executed on environment reset.
+
+    Supports multi-env: only the environments in ``env_ids`` are reset.
+    Each reset env gets a new episode sampled sequentially from the dataset.
+    Episode metadata is stored per-env in ``env.extras``.
+
+    Storage layout in ``env.extras``:
+      - ``current_goal_pos``:        np.ndarray shape ``[N, 3]``
+      - ``current_instruction``:     list[str] length ``N``
+      - ``current_reference_path``:  list[np.ndarray] length ``N``
+      - ``current_scene_id``:        list[str] length ``N``
+      - ``current_episode_id``:      list[int] length ``N``
+    """
+    num_envs = env.num_envs
+    env_ids_list = env_ids.cpu().tolist()
+    num_reset = len(env_ids_list)
+
+    # Initialize per-env storage on first call
+    if "current_goal_pos" not in env.extras:
+        env.extras["current_goal_pos"] = np.zeros((num_envs, 3), dtype=np.float32)
+        env.extras["current_instruction"] = [""] * num_envs
+        env.extras["current_reference_path"] = [np.zeros((1, 3), dtype=np.float32)] * num_envs
+        env.extras["current_scene_id"] = [""] * num_envs
+        env.extras["current_episode_id"] = [0] * num_envs
+        env.extras["vln_stop_called"] = torch.zeros(num_envs, dtype=torch.bool, device=env.device)
+        env.extras["_vln_stuck_counter"] = torch.zeros(num_envs, dtype=torch.long, device=env.device)
+
+    # Clear stop/stuck flags for reset envs
+    if "vln_stop_called" in env.extras:
+        env.extras["vln_stop_called"][env_ids] = False
+    if "_vln_stuck_counter" in env.extras:
+        env.extras["_vln_stuck_counter"][env_ids] = 0
+
+    # Sample one episode per reset env
+    episodes = vln_cfg.sample_episodes(num_reset)
+
+    # Build batched start poses for all reset envs
+    start_poses = torch.zeros(num_reset, 7, dtype=torch.float32, device=env.device)
+    for i, ep in enumerate(episodes):
+        start_poses[i, :3] = torch.tensor(ep.start_pos)
+        start_poses[i, 2] += vln_cfg.robot_root_height_offset
+        start_poses[i, 3:7] = torch.tensor(ep.start_quat_wxyz)
+
+    # Teleport all reset robots and zero velocities in one batched call
+    robot = env.scene[robot_cfg.name]
+    robot.write_root_pose_to_sim(start_poses, env_ids=env_ids)
+    zero_vel = torch.zeros(num_reset, 6, dtype=torch.float32, device=env.device)
+    robot.write_root_velocity_to_sim(zero_vel, env_ids=env_ids)
+
+    # Update per-env metadata
+    for i, env_id in enumerate(env_ids_list):
+        ep = episodes[i]
+        env.extras["current_goal_pos"][env_id] = np.array(ep.goal_pos, dtype=np.float32)
+        env.extras["current_instruction"][env_id] = ep.instruction_text
+        env.extras["current_reference_path"][env_id] = np.array(ep.reference_path, dtype=np.float32)
+        env.extras["current_scene_id"][env_id] = ep.scene_id
+        env.extras["current_episode_id"][env_id] = ep.episode_id
+        print(
+            f"[VlnNavTask] NEW EPISODE env={env_id} ep={ep.episode_id} scene={ep.scene_id}"
+            f" start=[{ep.start_pos[0]:.1f},{ep.start_pos[1]:.1f},{ep.start_pos[2]:.2f}]"
+            f" root_z_offset={vln_cfg.robot_root_height_offset:.2f}"
+            f" goal=[{ep.goal_pos[0]:.1f},{ep.goal_pos[1]:.1f},{ep.goal_pos[2]:.2f}]"
+            f" instruction={ep.instruction_text[:80]}..."
+        )
+
+
+@configclass
+class VlnEventsCfg:
+    """Events configuration for the VLN task."""
+
+    reset_robot_and_goal: EventTermCfg = MISSING
+
+
+# ========================================================================== #
+# VLN Navigation Task                                                        #
+# ========================================================================== #
+
+
+class VlnR2rMatterportTask(TaskBase):
+    """VLN task using R2R (Room-to-Room) episodes in Matterport scenes.
+
+    This task:
+      - Reads episodes from a gzipped VLN-CE R2R dataset.
+      - Provides termination (time-out, goal-reached, VLM stop, stuck).
+      - Registers VLN metrics (Oracle Success, SPL, Success, PathLength, DistanceToGoal).
+      - On reset, teleports the robot to the episode start pose and stores
+        the instruction / goal in ``env.extras``.
+
+    For other VLN datasets (RxR, REVERIE) or scene types (Gibson),
+    create a new task class following this pattern.
+    """
+
+    def __init__(
+        self,
+        robot: Asset,
+        r2r_dataset_path: str,
+        episode_indices: Optional[Sequence[int]] = None,
+        episode_length_s: float = 60.0,
+        success_radius: float = 3.0,
+        scene_filter: Optional[str] = None,
+        robot_root_height_offset: float = 0.0,
+        require_stop_for_success: bool = True,
+    ):
+        """
+        Args:
+            robot: The robot embodiment asset.
+            r2r_dataset_path: Path to the gzipped R2R dataset JSON file.
+            episode_indices: Optional subset of episode indices to evaluate.
+            episode_length_s: Maximum episode duration in seconds.
+            success_radius: Fallback distance threshold (meters) for goal
+                success when the dataset episode does not define a goal radius.
+            scene_filter: If set, only load episodes whose scene_id contains
+                this string (e.g. ``"zsNo4HB9uLZ"``).  This is critical
+                because the dataset contains episodes from many scenes but
+                only one scene USD is loaded at a time.
+            robot_root_height_offset: Added to dataset start_position.z when
+                teleporting the robot root on reset. Useful when dataset
+                positions are floor-level but the robot root is pelvis-level.
+            require_stop_for_success: If True, only STOP/DONE can end a
+                successful episode. If False, the benchmark falls back to
+                proximity-based success termination.
+        """
+        super().__init__(
+            episode_length_s=episode_length_s,
+            task_description="Navigate to the target location following the instruction.",
+        )
+        self.robot = robot
+        self.success_radius = success_radius
+        self.require_stop_for_success = require_stop_for_success
+
+        # Load episodes: filter by scene FIRST, then slice by index range.
+        # This ensures --episode_start/end refer to indices within the
+        # filtered (scene-specific) episode list, not the global dataset.
+        raw_episodes = read_episodes(r2r_dataset_path)
+
+        if scene_filter is not None:
+            before = len(raw_episodes)
+            raw_episodes = [ep for ep in raw_episodes if scene_filter in ep.get("scene_id", "")]
+            print(
+                f"[VlnR2rMatterportTask] Scene filter '{scene_filter}': {before} -> {len(raw_episodes)} episodes"
+            )
+            if not raw_episodes:
+                raise ValueError(
+                    f"No episodes match scene_filter='{scene_filter}'. "
+                    f"Check that --usd_path matches the dataset scenes."
+                )
+
+        if episode_indices is not None:
+            raw_episodes = [raw_episodes[i] for i in episode_indices if i < len(raw_episodes)]
+            print(f"[VlnR2rMatterportTask] Episode indices: {len(raw_episodes)} episodes selected")
+
+        vln_episodes = [build_vln_episode_from_raw(ep) for ep in raw_episodes]
+        self.vln_benchmark_cfg = VlnBenchmarkCfg(
+            episodes=vln_episodes,
+            robot_root_height_offset=robot_root_height_offset,
+        )
+
+        # Pre-compute data for metrics
+        self._gt_waypoints_per_episode: list[list[list[float]]] = [
+            [list(p) for p in ep.reference_path] for ep in vln_episodes
+        ]
+        self._success_radius_per_episode: list[float] = [
+            float(ep.goal_radius) if ep.goal_radius > 0.0 else success_radius
+            for ep in vln_episodes
+        ]
+        self._shortest_path_distance_per_episode: list[float] | None = None
+        if all(ep.geodesic_distance > 0.0 for ep in vln_episodes):
+            self._shortest_path_distance_per_episode = [
+                float(ep.geodesic_distance) for ep in vln_episodes
+            ]
+
+    # ------------------------------------------------------------------ #
+    # TaskBase interface                                                   #
+    # ------------------------------------------------------------------ #
+
+    def get_scene_cfg(self):
+        """VLN scene assets (Matterport background) are added by the Environment, not the Task."""
+        return None
+
+    def get_termination_cfg(self) -> VlnTerminationsCfg:
+        success_term_func = vln_disabled_success_term if self.require_stop_for_success else vln_success_term
+        return VlnTerminationsCfg(
+            success=TerminationTermCfg(
+                func=success_term_func,
+                params={"position_tolerance": self.success_radius},
+            ),
+            stuck=TerminationTermCfg(
+                func=vln_stuck_term,
+                params={"velocity_threshold": 0.1, "patience": 1000},
+            ),
+        )
+
+    def get_events_cfg(self) -> VlnEventsCfg:
+        cfg = VlnEventsCfg(
+            reset_robot_and_goal=EventTermCfg(
+                func=reset_robot_and_goal_from_vln_dataset,
+                mode="reset",
+                params={
+                    "robot_cfg": SceneEntityCfg("robot"),
+                    "vln_cfg": self.vln_benchmark_cfg,
+                },
+            ),
+        )
+        return cfg
+
+    def get_mimic_env_cfg(self, arm_mode):
+        # VLN does not use mimic / imitation learning datagen
+        return None
+
+    def get_metrics(self) -> list[MetricBase]:
+        return [
+            PathLengthMetric(),
+            DistanceToGoalMetric(self._gt_waypoints_per_episode),
+            OracleSuccessMetric(
+                self._gt_waypoints_per_episode,
+                self._success_radius_per_episode,
+            ),
+            SuccessMetric(
+                self._gt_waypoints_per_episode,
+                self._success_radius_per_episode,
+                require_stop_signal=self.require_stop_for_success,
+            ),
+            SPLMetric(
+                self._gt_waypoints_per_episode,
+                self._success_radius_per_episode,
+                shortest_path_distance_per_episode=self._shortest_path_distance_per_episode,
+                require_stop_signal=self.require_stop_for_success,
+            ),
+        ]
+
+    def get_task_description(self) -> str | None:
+        """Return a static task description for the VLN benchmark.
+
+        This returns a generic description set at init time.  It does NOT
+        return the per-episode navigation instruction, because:
+
+          1. Arena's ``policy_runner`` calls ``get_task_description()`` once
+             at startup via ``task.get_task_description()``, before any
+             episode is loaded.
+          2. VLN instructions change every episode and are per-env.
+
+        Per-episode instructions are stored in
+        ``env.extras["current_instruction"]`` (a list of length ``num_envs``)
+        and are automatically picked up by ``VlnPolicy`` on each step via
+        ``_check_instruction_update()``.
+        """
+        return self.task_description
+
+    def get_viewer_cfg(self) -> ViewerCfg:
+        # Third-person camera behind and above the robot, looking slightly ahead.
+        # Kept close to avoid clipping through Matterport walls.
+        return ViewerCfg(
+            eye=(-1.5, 0.0, 1.5),
+            lookat=(1.0, 0.0, 0.8),
+            origin_type="asset_root",
+            asset_name="robot",
+            env_index=0,
+        )
+
+    def modify_env_cfg(self, env_cfg: IsaacLabArenaManagerBasedRLEnvCfg) -> IsaacLabArenaManagerBasedRLEnvCfg:
+        return env_cfg
diff --git a/isaaclab_arena/tests/test_vln_metrics_protocol.py b/isaaclab_arena/tests/test_vln_metrics_protocol.py
new file mode 100644
index 000000000..4208f6094
--- /dev/null
+++ b/isaaclab_arena/tests/test_vln_metrics_protocol.py
@@ -0,0 +1,260 @@
+import sys
+import types
+
+import numpy as np
+import torch
+
+
+def _install_lightweight_stubs() -> None:
+    """Install minimal module stubs so pure-logic tests can import VLN code."""
+
+    def _identity_configclass(cls):
+        return cls
+
+    class _RecorderTerm:
+        def __init__(self, cfg=None, env=None):
+            self.cfg = cfg
+            self._env = env
+
+    class _RecorderTermCfg:
+        class_type = None
+
+    class _ManagerBasedEnv:
+        pass
+
+    class _DummyClientSidePolicy:
+        def __init__(self, *args, **kwargs):
+            pass
+
+        @staticmethod
+        def add_remote_args_to_parser(parser):
+            return parser
+
+    class _DummyProtocol:
+        default_duration = 1.0
+
+    class _DummyRemotePolicyConfig:
+        pass
+
+    class _DummyRslRlVecEnvWrapper:
+        pass
+
+    class _DummyOnPolicyRunner:
+        def __init__(self, *args, **kwargs):
+            pass
+
+    sys.modules.setdefault("isaaclab", types.ModuleType("isaaclab"))
+    sys.modules.setdefault("isaaclab.envs", types.ModuleType("isaaclab.envs"))
+    sys.modules.setdefault("isaaclab.managers", types.ModuleType("isaaclab.managers"))
+    sys.modules.setdefault("isaaclab.utils", types.ModuleType("isaaclab.utils"))
+    sys.modules.setdefault("isaaclab.utils.io", types.ModuleType("isaaclab.utils.io"))
+    sys.modules.setdefault("isaaclab_rl", types.ModuleType("isaaclab_rl"))
+    sys.modules.setdefault("isaaclab_rl.rsl_rl", types.ModuleType("isaaclab_rl.rsl_rl"))
+    sys.modules.setdefault("rsl_rl", types.ModuleType("rsl_rl"))
+    sys.modules.setdefault("rsl_rl.runners", types.ModuleType("rsl_rl.runners"))
+
+    env_mod = types.ModuleType("isaaclab.envs.manager_based_rl_env")
+    env_mod.ManagerBasedEnv = _ManagerBasedEnv
+    sys.modules["isaaclab.envs.manager_based_rl_env"] = env_mod
+
+    recorder_mod = types.ModuleType("isaaclab.managers.recorder_manager")
+    recorder_mod.RecorderTerm = _RecorderTerm
+    recorder_mod.RecorderTermCfg = _RecorderTermCfg
+    sys.modules["isaaclab.managers.recorder_manager"] = recorder_mod
+
+    utils_mod = sys.modules["isaaclab.utils"]
+    utils_mod.configclass = _identity_configclass
+
+    io_mod = sys.modules["isaaclab.utils.io"]
+    io_mod.load_yaml = lambda _path: {"device": "cpu"}
+
+    rsl_wrapper_mod = sys.modules["isaaclab_rl.rsl_rl"]
+    rsl_wrapper_mod.RslRlVecEnvWrapper = _DummyRslRlVecEnvWrapper
+
+    runners_mod = sys.modules["rsl_rl.runners"]
+    runners_mod.OnPolicyRunner = _DummyOnPolicyRunner
+
+    client_policy_mod = types.ModuleType("isaaclab_arena.policy.client_side_policy")
+    client_policy_mod.ClientSidePolicy = _DummyClientSidePolicy
+    sys.modules["isaaclab_arena.policy.client_side_policy"] = client_policy_mod
+
+    protocol_mod = types.ModuleType("isaaclab_arena.remote_policy.action_protocol")
+    protocol_mod.VlnVelocityActionProtocol = _DummyProtocol
+    sys.modules["isaaclab_arena.remote_policy.action_protocol"] = protocol_mod
+
+    config_mod = types.ModuleType("isaaclab_arena.remote_policy.remote_policy_config")
+    config_mod.RemotePolicyConfig = _DummyRemotePolicyConfig
+    sys.modules["isaaclab_arena.remote_policy.remote_policy_config"] = config_mod
+
+
+_install_lightweight_stubs()
+
+from isaaclab_arena.metrics.vln_metrics import OracleSuccessMetric, PathLengthMetric, SPLMetric, SuccessMetric
+from isaaclab_arena.policy.vln.vln_vlm_locomotion_policy import VlnVlmLocomotionPolicy
+
+
+def test_success_and_spl_require_stop_signal():
+    gt_waypoints = [[[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]]
+    success_radius = [0.5]
+
+    # Episode reaches the goal region but never emits STOP.
+    no_stop_episode = [
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0],
+                [0.8, 0.0, 0.0, 0.0],
+                [1.1, 0.0, 0.0, 0.0],
+            ],
+            dtype=np.float32,
+        )
+    ]
+    success_metric = SuccessMetric(gt_waypoints, success_radius, require_stop_signal=True)
+    spl_metric = SPLMetric(gt_waypoints, success_radius, require_stop_signal=True)
+    assert success_metric.compute_metric_from_recording(no_stop_episode) == 0.0
+    assert spl_metric.compute_metric_from_recording(no_stop_episode) == 0.0
+
+    # Episode emits STOP near the goal and should count as successful.
+    stop_success_episode = [
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0],
+                [0.5, 0.0, 0.0, 0.0],
+                [1.0, 0.0, 0.0, 1.0],
+            ],
+            dtype=np.float32,
+        )
+    ]
+    assert success_metric.compute_metric_from_recording(stop_success_episode) == 1.0
+
+    # Same shortest path (1m), but a longer actual path (3m) before STOP.
+    stop_long_path_episode = [
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0, 0.0],
+                [1.0, 1.0, 0.0, 0.0],
+                [1.0, 0.0, 0.0, 1.0],
+            ],
+            dtype=np.float32,
+        )
+    ]
+    assert np.isclose(spl_metric.compute_metric_from_recording(stop_long_path_episode), 1.0 / 3.0)
+
+
+def test_oracle_success_uses_best_trajectory_point_without_stop():
+    gt_waypoints = [[[0.0, 0.0, 0.0], [2.0, 0.0, 0.0]]]
+    success_radius = [0.5]
+    oracle_metric = OracleSuccessMetric(gt_waypoints, success_radius)
+
+    # The agent reaches the goal area once, then walks away and never emits STOP.
+    pass_then_leave_episode = [
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0],
+                [2.0, 0.0, 0.0, 0.0],
+                [4.0, 0.0, 0.0, 0.0],
+            ],
+            dtype=np.float32,
+        )
+    ]
+    assert oracle_metric.compute_metric_from_recording(pass_then_leave_episode) == 1.0
+
+    # The agent never gets close enough to the goal region.
+    miss_episode = [
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0],
+                [0.8, 0.0, 0.0, 0.0],
+                [1.2, 0.0, 0.0, 0.0],
+            ],
+            dtype=np.float32,
+        )
+    ]
+    assert oracle_metric.compute_metric_from_recording(miss_episode) == 0.0
+
+
+def test_spl_prefers_dataset_geodesic_distance_when_provided():
+    gt_waypoints = [[[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]]]
+    success_radius = [1.1]
+    spl_metric = SPLMetric(
+        gt_waypoints,
+        success_radius,
+        shortest_path_distance_per_episode=[2.0],
+        require_stop_signal=True,
+    )
+
+    stop_success_episode = [
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0],
+                [2.0, 0.0, 0.0, 1.0],
+            ],
+            dtype=np.float32,
+        )
+    ]
+    assert np.isclose(spl_metric.compute_metric_from_recording(stop_success_episode), 1.0)
+
+
+def test_path_length_ignores_stop_flag_column():
+    metric = PathLengthMetric()
+    episode = [
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0],
+                [3.0, 4.0, 0.0, 0.0],
+                [6.0, 8.0, 0.0, 1.0],
+            ],
+            dtype=np.float32,
+        )
+    ]
+    assert metric.compute_metric_from_recording(episode) == 10.0
+
+
+def test_low_level_warmup_does_not_step_environment(monkeypatch):
+    class DummyVecEnv:
+        def __init__(self):
+            self.unwrapped = types.SimpleNamespace(device="cpu")
+            self.step_calls = 0
+
+        def get_observations(self):
+            return {"policy": torch.zeros((1, 69), dtype=torch.float32)}
+
+        def step(self, _actions):
+            self.step_calls += 1
+            raise AssertionError("Warmup should not step the environment")
+
+    class DummyRunner:
+        def __init__(self, vec_env, _cfg, log_dir=None, device="cpu"):
+            self._vec_env = vec_env
+
+        def load(self, _checkpoint_path):
+            return None
+
+        def get_inference_policy(self, device="cpu"):
+            def _policy(obs_td):
+                return torch.zeros((1, 19), dtype=torch.float32)
+
+            return _policy
+
+    monkeypatch.setattr("isaaclab_arena.policy.vln.vln_vlm_locomotion_policy.load_yaml", lambda _path: {"device": "cpu"})
+    monkeypatch.setattr("isaaclab_arena.policy.vln.vln_vlm_locomotion_policy.OnPolicyRunner", DummyRunner)
+    monkeypatch.setattr("isaaclab_arena.policy.vln.vln_vlm_locomotion_policy.RslRlVecEnvWrapper", DummyVecEnv)
+
+    dummy_vec_env = DummyVecEnv()
+    dummy_policy = types.SimpleNamespace(
+        _ll_agent_cfg="unused.yaml",
+        _ll_checkpoint_path="unused.pt",
+        _device="cpu",
+        action_dim=3,
+        _vel_cmd_indices=(9, 12),
+        _warmup_steps=5,
+        _ll_vec_env=None,
+        _ll_policy=None,
+        _ll_obs_td=None,
+    )
+
+    VlnVlmLocomotionPolicy._load_low_level_policy(dummy_policy, dummy_vec_env)
+
+    assert dummy_vec_env.step_calls == 0
+    assert dummy_policy._ll_vec_env is dummy_vec_env
+    assert dummy_policy._ll_policy is not None
diff --git a/isaaclab_arena/utils/configclass.py b/isaaclab_arena/utils/configclass.py
index 28c69835d..427912e85 100644
--- a/isaaclab_arena/utils/configclass.py
+++ b/isaaclab_arena/utils/configclass.py
@@ -127,7 +127,7 @@ def get_field_info(field: dataclasses.Field) -> tuple[str, type, Any]:
     if field.default is not dataclasses.MISSING:
         field_info += (field.default,)
     elif field.default_factory is not dataclasses.MISSING:
-        field_info += (field.default_factory,)
+        field_info += (dataclasses.field(default_factory=field.default_factory),)
     return field_info
 
 
diff --git a/isaaclab_arena_environments/cli.py b/isaaclab_arena_environments/cli.py
index 81df53cd8..7b2b58e20 100644
--- a/isaaclab_arena_environments/cli.py
+++ b/isaaclab_arena_environments/cli.py
@@ -22,6 +22,7 @@
 from isaaclab_arena_environments.lift_object_environment import LiftObjectEnvironment
 from isaaclab_arena_environments.press_button_environment import PressButtonEnvironment
 from isaaclab_arena_environments.tabletop_place_upright_environment import TableTopPlaceUprightEnvironment
+from isaaclab_arena_environments.vln_environment import VLNBenchmarkEnvironment
 
 # NOTE(alexmillane, 2025.09.04): There is an issue with type annotation in this file.
 # We cannot annotate types which require the simulation app to be started in order to
@@ -44,6 +45,7 @@
     LiftObjectEnvironment.name: LiftObjectEnvironment,
     TableTopPlaceUprightEnvironment.name: TableTopPlaceUprightEnvironment,
     Gr1TurnStandMixerKnobEnvironment.name: Gr1TurnStandMixerKnobEnvironment,
+    VLNBenchmarkEnvironment.name: VLNBenchmarkEnvironment,
 }
 
 
diff --git a/isaaclab_arena_environments/vln_environment.py b/isaaclab_arena_environments/vln_environment.py
new file mode 100644
index 000000000..51c2d3c55
--- /dev/null
+++ b/isaaclab_arena_environments/vln_environment.py
@@ -0,0 +1,392 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""VLN benchmark environment builder.
+
+This module defines the ``VLNBenchmarkEnvironment`` that integrates:
+  - A Matterport 3D background scene.
+  - The H1 humanoid embodiment configured for VLN.
+  - The VLN navigation task with R2R episode management.
+
+It follows the ``ExampleEnvironmentBase`` pattern used by all IsaacLab Arena
+environments, so it plugs into the CLI and ``ArenaEnvBuilder`` seamlessly.
+
+Usage (CLI)::
+
+    python -m isaaclab_arena.evaluation.policy_runner \\
+        --policy_type isaaclab_arena.policy.vln.vln_vlm_locomotion_policy.VlnVlmLocomotionPolicy \\
+        --remote_host localhost --remote_port 5555 \\
+        --num_episodes 10 \\
+        VLN_Benchmark \\
+        --usd_path /path/to/matterport.usd \\
+        --r2r_dataset_path /path/to/vln_ce_isaac_v1.json.gz
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+
+from isaaclab_arena_environments.example_environment_base import ExampleEnvironmentBase
+
+
+def _patch_usd_resolved_path():
+    """Monkey-patch omni.usd functions that fail with ``Ar.ResolvedPath``.
+
+    Isaac Sim 5.1.0's ``is_usd_crate_file`` and
+    ``is_usd_crate_file_version_supported`` internally call
+    ``Sdf.FileFormat.GetFileExtension`` and ``Usd.CrateInfo.Open`` with
+    ``Ar.ResolvedPath`` objects, but those C++ APIs only accept ``str``.
+    Wrapping the outer function does not help because the ``ResolvedPath``
+    is created as a *local variable* inside the function body.
+
+    The fix replaces both functions with clean re-implementations that
+    convert every path to ``str`` before passing to C++ APIs.
+    """
+    try:
+        import omni.usd
+        import omni.usd._impl.utils as _usd_utils
+        from pxr import Ar, Sdf, Usd
+
+        def _fixed_is_usd_crate_file(filepath):
+            ext = Sdf.FileFormat.GetFileExtension(str(filepath))
+            return ext in ("usdc", "usd")
+
+        def _fixed_is_usd_crate_file_version_supported(filepath):
+            filepath = str(filepath)
+            if not _fixed_is_usd_crate_file(filepath):
+                return True
+            try:
+                resolved = Ar.GetResolver().Resolve(filepath)
+                resolved_str = str(resolved) if resolved else filepath
+                if not resolved_str:
+                    return False
+                crate_info = Usd.CrateInfo.Open(resolved_str)
+                return crate_info is not None
+            except Exception:
+                return True
+
+        _usd_utils.is_usd_crate_file = _fixed_is_usd_crate_file
+        _usd_utils.is_usd_crate_file_version_supported = _fixed_is_usd_crate_file_version_supported
+        omni.usd.is_usd_crate_file = _fixed_is_usd_crate_file
+        omni.usd.is_usd_crate_file_version_supported = _fixed_is_usd_crate_file_version_supported
+        print("[VLN] Patched omni.usd ResolvedPath compatibility (full replacement).")
+    except Exception as e:
+        print(f"[VLN] Warning: could not patch omni.usd: {e}")
+
+
+class VLNBenchmarkEnvironment(ExampleEnvironmentBase):
+    """IsaacLab Arena environment for VLN benchmarking."""
+
+    name: str = "h1_vln_matterport"
+
+    def get_env(self, args_cli: argparse.Namespace):
+        """Build and return the VLN environment.
+
+        Multi-env note:
+            When ``num_envs > 1``, each env gets a full copy of the Matterport
+            scene.  The ``env_spacing`` should be large enough that scenes don't
+            overlap (Matterport houses are typically 20-50m wide).  The default
+            is overridden to 100m if the user hasn't set it explicitly.
+        """
+        # Currently only num_envs=1 is supported.  Multi-env requires
+        # per-env instruction tracking in the VLM server and dynamic scene
+        # switching, which are not yet implemented.
+        num_envs = getattr(args_cli, "num_envs", 1)
+        if num_envs != 1:
+            raise ValueError(
+                f"VLN benchmark currently only supports num_envs=1 (got {num_envs}). "
+                f"Multi-env support requires per-env VLM instruction tracking "
+                f"and is planned for a future release."
+            )
+
+        # Fix Isaac Sim 5.1.0 USD ResolvedPath compatibility issue
+        _patch_usd_resolved_path()
+
+        # Matterport scenes are large — override default env_spacing if user
+        # hasn't set a custom value (the Arena default is 30m, too small).
+        if not hasattr(args_cli, "_env_spacing_set_by_user"):
+            if getattr(args_cli, "num_envs", 1) > 1 and args_cli.env_spacing < 100.0:
+                print(
+                    f"[VLN] Overriding env_spacing from {args_cli.env_spacing}m to 100m "
+                    f"for Matterport scenes (num_envs={args_cli.num_envs})."
+                )
+                args_cli.env_spacing = 100.0
+
+        # Delayed imports — require simulation app to be running
+        import isaaclab.sim as sim_utils
+        from isaaclab_arena.environments.isaaclab_arena_environment import IsaacLabArenaEnvironment
+        from isaaclab_arena.scene.scene import Scene
+
+        from isaaclab_arena.assets.matterport_background import MatterportBackground
+        from isaaclab_arena.embodiments.h1.h1 import _DEFAULT_H1_CAMERA_OFFSET, _DEFAULT_H1_FOLLOW_CAMERA_OFFSET
+        from isaaclab_arena.embodiments.h1.h1_vln import H1VlnEmbodiment
+        from isaaclab_arena.tasks.vln_r2r_matterport_task import VlnR2rMatterportTask
+        from isaaclab_arena.utils.pose import Pose
+
+        # 1) Background: Matterport 3D scene
+        ground_plane_z = None if getattr(args_cli, "disable_matterport_ground_plane", False) else 0.0
+        background = MatterportBackground(
+            usd_path=args_cli.usd_path,
+            ground_plane_z=ground_plane_z,
+            use_global_prim=getattr(args_cli, "use_global_matterport_prim", False),
+            explicit_mesh_colliders=getattr(args_cli, "enable_matterport_child_mesh_colliders", False),
+            mesh_collider_approximation=getattr(args_cli, "matterport_mesh_collider_type", "triangle"),
+            collision_overlay_usd_path=getattr(args_cli, "matterport_collision_usd_path", None),
+            floor_collision_usd_path=getattr(args_cli, "matterport_floor_collision_usd_path", None),
+            obstacle_collision_usd_path=getattr(args_cli, "matterport_obstacle_collision_usd_path", None),
+        )
+
+        # 2) Embodiment: H1 humanoid with camera
+        use_tiled = getattr(args_cli, "use_tiled_camera", False)
+        enable_follow = not getattr(args_cli, "no_follow_camera", False)
+        cam_res = getattr(args_cli, "camera_resolution", 512)
+        head_camera_offset = _DEFAULT_H1_CAMERA_OFFSET
+        follow_camera_offset = _DEFAULT_H1_FOLLOW_CAMERA_OFFSET
+        if getattr(args_cli, "head_camera_offset_xyz", None) is not None:
+            head_camera_offset = Pose(
+                position_xyz=tuple(args_cli.head_camera_offset_xyz),
+                rotation_wxyz=_DEFAULT_H1_CAMERA_OFFSET.rotation_wxyz,
+            )
+        if getattr(args_cli, "follow_camera_offset_xyz", None) is not None:
+            follow_camera_offset = Pose(
+                position_xyz=tuple(args_cli.follow_camera_offset_xyz),
+                rotation_wxyz=_DEFAULT_H1_FOLLOW_CAMERA_OFFSET.rotation_wxyz,
+            )
+        if getattr(args_cli, "enable_height_scanner", False) and not getattr(args_cli, "use_global_matterport_prim", False):
+            raise ValueError("Height scanner currently requires --use_global_matterport_prim so it can raycast /World/matterport.")
+        embodiment = H1VlnEmbodiment(
+            enable_cameras=True,
+            camera_offset=head_camera_offset,
+            use_tiled_camera=use_tiled,
+            enable_follow_camera=enable_follow,
+            follow_camera_offset=follow_camera_offset,
+            camera_resolution=cam_res,
+            enable_head_depth=getattr(args_cli, "enable_head_camera_depth", False),
+            enable_height_scanner=getattr(args_cli, "enable_height_scanner", False),
+            height_scanner_debug_vis=getattr(args_cli, "height_scanner_debug_vis", False),
+        )
+
+        # 3) Task: VLN navigation with R2R episodes
+        # Extract scene_id from the USD path to filter episodes.
+        usd_scene_id = os.path.splitext(os.path.basename(args_cli.usd_path))[0]
+
+        episode_indices = None
+        if hasattr(args_cli, "episode_start") and args_cli.episode_start is not None:
+            end = getattr(args_cli, "episode_end", args_cli.episode_start + 1)
+            episode_indices = list(range(args_cli.episode_start, end))
+
+        task = VlnR2rMatterportTask(
+            robot=embodiment,
+            r2r_dataset_path=args_cli.r2r_dataset_path,
+            episode_indices=episode_indices,
+            episode_length_s=getattr(args_cli, "episode_length_s", 60.0),
+            success_radius=getattr(args_cli, "success_radius", 3.0),
+            scene_filter=usd_scene_id,
+            robot_root_height_offset=getattr(args_cli, "robot_root_height_offset", 1.0),
+            require_stop_for_success=not getattr(args_cli, "allow_success_without_stop", False),
+        )
+
+        # 4) Scene: Matterport background
+        scene = Scene(assets=[background])
+
+        # 5) Simulation parameters callback
+        # These MUST match the low-level locomotion policy training config.
+        # Default values come from NaVILA-Bench:
+        #   h1_matterport_base_cfg.py -> H1MatterportBaseCfg.__post_init__()
+        sim_dt = getattr(args_cli, "sim_dt", 0.005)
+        decimation = getattr(args_cli, "sim_decimation", 4)
+
+        def vln_sim_cfg_callback(env_cfg):
+            env_cfg.sim.dt = sim_dt                # 200 Hz physics
+            env_cfg.decimation = decimation         # 50 Hz control (200/4)
+            env_cfg.sim.render_interval = decimation
+            env_cfg.sim.disable_contact_processing = True
+            env_cfg.sim.physics_material = sim_utils.RigidBodyMaterialCfg(
+                static_friction=1.0,
+                dynamic_friction=1.0,
+                friction_combine_mode="max",
+                restitution_combine_mode="max",
+            )
+            return env_cfg
+
+        # 6) Compose the Arena environment
+        arena_env = IsaacLabArenaEnvironment(
+            name=self.name,
+            scene=scene,
+            embodiment=embodiment,
+            task=task,
+            env_cfg_callback=vln_sim_cfg_callback,
+        )
+        return arena_env
+
+    @staticmethod
+    def add_cli_args(parser: argparse.ArgumentParser) -> None:
+        """Add VLN-specific CLI arguments."""
+        group = parser.add_argument_group("VLN Benchmark", "VLN benchmark environment arguments")
+        group.add_argument(
+            "--usd_path",
+            type=str,
+            required=True,
+            help="Path to the Matterport USD scene file.",
+        )
+        group.add_argument(
+            "--r2r_dataset_path",
+            type=str,
+            required=True,
+            help="Path to the R2R VLN dataset (e.g. vln_ce_isaac_v1.json.gz).",
+        )
+        group.add_argument(
+            "--episode_start",
+            type=int,
+            default=None,
+            help="Starting episode index (inclusive).  If None, use all episodes.",
+        )
+        group.add_argument(
+            "--episode_end",
+            type=int,
+            default=None,
+            help="Ending episode index (exclusive).  Used with --episode_start.",
+        )
+        group.add_argument(
+            "--episode_length_s",
+            type=float,
+            default=60.0,
+            help="Maximum episode duration in seconds (default: 60).",
+        )
+        group.add_argument(
+            "--success_radius",
+            type=float,
+            default=3.0,
+            help="Distance threshold for goal success (default: 3.0m).",
+        )
+        group.add_argument(
+            "--use_tiled_camera",
+            action="store_true",
+            default=False,
+            help="Use TiledCamera for parallel evaluation (default: False).",
+        )
+        group.add_argument(
+            "--no_follow_camera",
+            action="store_true",
+            default=False,
+            help="Disable the third-person follow camera (default: enabled).",
+        )
+        group.add_argument(
+            "--camera_resolution",
+            type=int,
+            default=512,
+            help="Camera resolution in pixels (default: 512, use 1024 for high-quality demo).",
+        )
+        group.add_argument(
+            "--head_camera_offset_xyz",
+            type=float,
+            nargs=3,
+            default=None,
+            metavar=("X", "Y", "Z"),
+            help="Override head camera XYZ offset in pelvis frame.",
+        )
+        group.add_argument(
+            "--follow_camera_offset_xyz",
+            type=float,
+            nargs=3,
+            default=None,
+            metavar=("X", "Y", "Z"),
+            help="Override follow camera XYZ offset in pelvis frame.",
+        )
+        group.add_argument(
+            "--enable_head_camera_depth",
+            action="store_true",
+            default=False,
+            help="Expose head camera depth alongside RGB for debugging; not sent to the VLM by default.",
+        )
+        group.add_argument(
+            "--enable_height_scanner",
+            action="store_true",
+            default=False,
+            help="Enable a NaVILA-Bench-style height scanner sensor for debugging or future experiments.",
+        )
+        group.add_argument(
+            "--height_scanner_debug_vis",
+            action="store_true",
+            default=False,
+            help="Visualize height-scanner rays when the height scanner is enabled.",
+        )
+        group.add_argument(
+            "--disable_matterport_ground_plane",
+            action="store_true",
+            default=False,
+            help="Disable the fallback invisible ground plane and rely on Matterport mesh collision only.",
+        )
+        group.add_argument(
+            "--use_global_matterport_prim",
+            action="store_true",
+            default=False,
+            help="Spawn Matterport at /World/matterport with collision_group=-1 for collision experiments.",
+        )
+        group.add_argument(
+            "--enable_matterport_child_mesh_colliders",
+            action="store_true",
+            default=False,
+            help="Explicitly apply collision and mesh-collider schemas to descendant Matterport Mesh prims.",
+        )
+        group.add_argument(
+            "--matterport_mesh_collider_type",
+            type=str,
+            default="triangle",
+            choices=("triangle", "sdf", "convex_decomposition"),
+            help="Approximation used when explicit Matterport child-mesh colliders are enabled.",
+        )
+        group.add_argument(
+            "--matterport_collision_usd_path",
+            type=str,
+            default=None,
+            help="Legacy combined collision-only USD layered under the visual Matterport scene.",
+        )
+        group.add_argument(
+            "--matterport_floor_collision_usd_path",
+            type=str,
+            default=None,
+            help="Optional floor-only collision USD layered under the visual Matterport scene.",
+        )
+        group.add_argument(
+            "--matterport_obstacle_collision_usd_path",
+            type=str,
+            default=None,
+            help="Optional obstacle-only collision USD layered under the visual Matterport scene.",
+        )
+        group.add_argument(
+            "--robot_root_height_offset",
+            type=float,
+            default=1.0,
+            help="Added to dataset start_position.z when resetting the robot root (default tuned for H1 pelvis root).",
+        )
+        group.add_argument(
+            "--allow_success_without_stop",
+            action="store_true",
+            default=False,
+            help=(
+                "Legacy mode: allow proximity-only success without requiring VLM STOP/DONE. "
+                "By default the benchmark follows STOP-gated success."
+            ),
+        )
+
+        # Simulation parameters — must match the low-level policy training config
+        sim_group = parser.add_argument_group(
+            "Simulation", "Physics simulation parameters (must match low-level policy training)"
+        )
+        sim_group.add_argument(
+            "--sim_dt",
+            type=float,
+            default=0.005,
+            help="Physics simulation timestep in seconds (default: 0.005 = 200Hz).",
+        )
+        sim_group.add_argument(
+            "--sim_decimation",
+            type=int,
+            default=4,
+            help="Number of physics steps per policy step (default: 4, giving 50Hz control).",
+        )
+
diff --git a/isaaclab_arena_navila/README.md b/isaaclab_arena_navila/README.md
new file mode 100644
index 000000000..df6582186
--- /dev/null
+++ b/isaaclab_arena_navila/README.md
@@ -0,0 +1,549 @@
+# NaVILA VLN Benchmark for IsaacLab Arena
+
+**Vision-Language Navigation (VLN)** benchmark using the [NaVILA](https://github.com/AnjieCheng/NaVILA) VLM on the H1 humanoid robot in Matterport 3D indoor scenes.
+
+## Architecture
+
+```
+ ISAAC SIM CONTAINER                       NAVILA SERVER CONTAINER
+ ══════════════════                        ═══════════════════════
+
+ ┌──────────────┐  ┌───────────────────┐
+ │ Matterport   │  │ VLN-CE R2R Data   │
+ │ Scene (.usd) │  │ (.json.gz)        │
+ └──────┬───────┘  └───────┬───────────┘
+        │                  │
+        ▼                  ▼
+ ┌──────────────┐  ┌───────────────────┐
+ │ H1 Robot     │  │VlnR2rMatterport   │
+ │ + Head Cam   │  │Task               │
+ │ + Follow Cam │  │ scene_filter      │
+ └──────┬───────┘  │ termination       │
+        │          │ metrics (SPL...)  │
+        │          └───────┬───────────┘
+        │                  │
+        ▼                  ▼
+ ┌─────────────────────────────────┐
+ │  VlnVlmLocomotionPolicy        │
+ │                                 │
+ │  ┌─ High-level ──────────────┐  │
+ │  │  Send 1 RGB frame ────────┼──┼──► ZeroMQ ──►┐
+ │  │  Receive vel_cmd ◄────────┼──┼──◄ ZeroMQ ◄──┤
+ │  └────────────┬──────────────┘  │              │
+ │               │                 │              │
+ │  ┌────────────▼──────────────┐  │              │
+ │  │  Low-level: RSL-RL        │  │              │
+ │  │  Inject vel_cmd → obs     │  │              │
+ │  │  NN forward → joint act   │  │              │
+ │  └────────────┬──────────────┘  │              │
+ └───────────────┼─────────────────┘              │
+                 │                                │
+                 ▼                                │
+ ┌─────────────────────────────┐    ┌─────────────▼────────────┐
+ │  PhysX GPU Simulation       │    │  NaVilaServerPolicy      │
+ │  joint act → robot moves    │    │                          │
+ │  → new camera RGB           │    │  1. Receive 1 RGB frame  │
+ └─────────────────────────────┘    │  2. Append to history    │
+                                    │  3. Uniform sample 8     │
+                                    │     from full history    │
+                                    │  4. VLM inference        │
+                                    │  5. Parse → vel_cmd      │
+                                    │  6. Return vel_cmd + dur │
+                                    └──────────────────────────┘
+```
+
+## Quick Start
+
+### 1. Start the NaVILA VLM Server
+
+```bash
+# Option A: Use the launch script (rebuilds Docker if needed)
+bash docker/run_vln_server.sh -m ~/models/navila/navila-llama3-8b-8f --port 5555
+
+# Option B: Manual start with code mounted (no rebuild)
+docker run --rm -d --gpus all --net host \
+    --name vln_policy_server_container \
+    -v ~/models:/models \
+    -v ~/IsaacLab-Arena/isaaclab_arena_navila:/workspace/isaaclab_arena_navila \
+    -v ~/IsaacLab-Arena/isaaclab_arena/remote_policy:/workspace/isaaclab_arena/remote_policy \
+    vln_policy_server:latest \
+    --host 0.0.0.0 --port 5555 --timeout_ms 15000 \
+    --policy_type isaaclab_arena_navila.navila_server_policy.NaVilaServerPolicy \
+    --model_path /models/navila/navila-llama3-8b-8f
+```
+
+Wait for: `listening on tcp://0.0.0.0:5555`
+
+### 2. Run VLN Evaluation (Client)
+
+```bash
+# Enter Isaac Sim container
+bash docker/run_docker.sh
+
+# Inside the container:
+/isaac-sim/python.sh -u -m isaaclab_arena.evaluation.policy_runner \
+    --enable_cameras --num_envs 1 \
+    --policy_type isaaclab_arena.policy.vln.vln_vlm_locomotion_policy.VlnVlmLocomotionPolicy \
+    --remote_host localhost --remote_port 5555 \
+    --ll_checkpoint_path isaaclab_arena/policy/vln/pretrained/h1_navila_locomotion.pt \
+    --ll_agent_cfg isaaclab_arena/policy/vln/pretrained/h1_navila_agent.yaml \
+    --num_episodes 10 \
+    h1_vln_matterport \
+    --usd_path /path/to/matterport_usd/zsNo4HB9uLZ/zsNo4HB9uLZ.usd \
+    --r2r_dataset_path /path/to/vln_ce_isaac_v1.json.gz
+```
+
+Add `--headless` for batch evaluation without GUI.
+
+## Matterport Scene Collision
+
+The default VLN-CE-Isaac Matterport USD scenes are primarily **visual meshes**.
+They render correctly, but they do not provide reliable PhysX collision by
+default. The current benchmark therefore uses an **overlay collision** approach:
+
+- keep the original Matterport visual USD for rendering
+- generate one or more collision-only USDA assets
+- attach those collision USDA assets under `/World/matterport`
+- let PhysX collide with the overlay mesh instead of trusting the visual mesh
+
+By default, if no collision overlay is provided, an invisible ground plane at
+`z=0` provides basic support so the robot can stand and walk.
+
+### Why the default Matterport USD is not enough
+
+The common failure mode is that the visual USD is a poor proxy for stable
+collision:
+
+1. raw scan / mesh -> visual USD
+2. visual USD -> OBJ
+3. OBJ -> collision USDA
+
+If geometric detail is already lost or distorted in step 1, the later collision
+asset inherits that loss. In practice this leads to:
+
+- thin door-frame shells
+- floating or near-ground fragments
+- furniture-adjacent blue shells that do not match the visible structure
+- floor noise that destabilizes humanoid locomotion
+
+For better collision fidelity, prefer starting from a **cleaner raw Matterport /
+MP3D mesh** instead of relying only on the already-converted visual USD.
+
+### Collision overlay modes
+
+The runtime currently supports three collision-overlay styles:
+
+- **legacy combined overlay**
+  - one collision-only USDA under the visual scene
+- **split overlay**
+  - `floor` collision for locomotion support
+  - `obstacle` collision for walls, cabinets, sofas, tables, and door frames
+- **explicit child-mesh colliders**
+  - apply collision schemas directly to the visual USD descendant meshes
+
+The split `floor + obstacle` route is usually the most practical for debugging
+and tuning.
+
+### Recommended asset layout
+
+For each scene, keep:
+
+- visual scene:
+  - `matterport_usd/<scene>/<scene>.usd`
+- floor collision:
+  - `collision/<scene>_floor_collision.usda`
+- obstacle collision:
+  - `collision/<scene>_obstacle_collision.usda`
+- optional combined collision:
+  - `collision/<scene>_combined_collision.usda`
+
+Put intermediate OBJ files and temporary logs under `claude_tmp/`.
+
+### Build workflow
+
+#### 1. Export raw mesh to OBJ
+
+If you have the original raw MP3D / Matterport mesh, export or convert it to
+OBJ first:
+
+```bash
+/isaac-sim/python.sh isaaclab_arena/scripts/assets/export_matterport_collision_proxy.py \
+    --input_path /path/to/raw_mp3d_mesh.glb \
+    --output_path /tmp/scene_raw.obj
+```
+
+If you do not have the raw mesh yet, you can still use the current visual USD as
+a fallback, but collision quality will usually be worse.
+
+#### 2. Clean / simplify the OBJ externally
+
+Before building collision assets, the most useful cleanup is:
+
+- remove tiny floating fragments
+- remove ceiling-only details that do not matter to locomotion
+- smooth or rebuild the floor if it is noisy or broken
+- simplify small decorative clutter
+- keep large walls, cabinets, sofas, tables, and door frames
+
+Try to make the floor cleaner than the obstacle mesh. For humanoid locomotion,
+floor quality usually matters more than visual fidelity.
+
+#### 3. Build split floor / obstacle USDA files
+
+Use the builder:
+
+```bash
+python3 isaaclab_arena/scripts/assets/build_matterport_collision_layers.py \
+    --input_obj /tmp/scene_clean.obj \
+    --output_floor_usd /path/to/collision/scene_floor_collision.usda \
+    --output_obstacle_usd /path/to/collision/scene_obstacle_collision.usda \
+    --output_combined_usd /path/to/collision/scene_combined_collision.usda \
+    --floor_min_z -0.25 \
+    --floor_max_z 0.35 \
+    --floor_normal_min_z 0.9
+```
+
+This split is heuristic and scene-dependent. The key idea is:
+
+- floor-like triangles: low enough and upward-facing
+- everything else: obstacle
+
+#### 4. Filter obstacle fragments
+
+After the initial split, a collision mesh is often still too noisy. The new
+`filter_collision_usda.py` script removes connected components that look like:
+
+- tiny near-ground shards
+- small floating fragments
+- optionally, medium-sized suspended shells after a second pass
+
+Example first pass:
+
+```bash
+python3 isaaclab_arena/scripts/assets/filter_collision_usda.py \
+    --input_usd /path/to/collision/scene_obstacle_collision.usda \
+    --output_usd /path/to/collision/scene_obstacle_collision_clean.usda \
+    --ground_max_faces 20 \
+    --ground_max_top_z 0.30 \
+    --ground_max_diag 0.35 \
+    --floating_max_faces 10 \
+    --floating_min_bottom_z 0.35 \
+    --floating_max_diag 0.25
+```
+
+This stage is what removes the obvious “bad collision geometry” that makes the
+robot snag, trip, or appear to hit empty space.
+
+### Runtime configurations
+
+#### A. Stable baseline
+
+Keep the invisible ground plane, add only obstacle collision:
+
+```bash
+/isaac-sim/python.sh -u -m isaaclab_arena.evaluation.policy_runner \
+    ... \
+    h1_vln_matterport \
+    --usd_path /path/to/scene.usd \
+    --r2r_dataset_path /path/to/vln_ce_isaac_v1.json.gz \
+    --use_global_matterport_prim \
+    --matterport_obstacle_collision_usd_path /path/to/scene_obstacle_collision.usda
+```
+
+Use this first when debugging torso clipping, arm clipping, or corridor contact
+without destabilizing the feet.
+
+#### B. Split full collision
+
+Disable the fallback ground plane and use both floor + obstacle collision:
+
+```bash
+/isaac-sim/python.sh -u -m isaaclab_arena.evaluation.policy_runner \
+    ... \
+    h1_vln_matterport \
+    --usd_path /path/to/scene.usd \
+    --r2r_dataset_path /path/to/vln_ce_isaac_v1.json.gz \
+    --use_global_matterport_prim \
+    --disable_matterport_ground_plane \
+    --matterport_floor_collision_usd_path /path/to/scene_floor_collision.usda \
+    --matterport_obstacle_collision_usd_path /path/to/scene_obstacle_collision.usda
+```
+
+Use this after the floor mesh looks smooth enough to replace the fallback ground
+plane.
+
+#### C. Legacy combined overlay
+
+This mode is still supported:
+
+```bash
+/isaac-sim/python.sh -u -m isaaclab_arena.evaluation.policy_runner \
+    ... \
+    h1_vln_matterport \
+    --usd_path /path/to/scene.usd \
+    --r2r_dataset_path /path/to/vln_ce_isaac_v1.json.gz \
+    --use_global_matterport_prim \
+    --disable_matterport_ground_plane \
+    --matterport_collision_usd_path /path/to/scene_combined_collision.usda
+```
+
+Do not mix the legacy combined overlay with the split `floor / obstacle`
+overlays in the same run.
+
+### Visual inspection in Isaac Sim
+
+The recommended local inspection tool is:
+
+- `isaaclab_arena/scripts/assets/debug_matterport_collision_stage.py`
+
+Use it to export a debug stage and reopen it in Isaac Sim. When checking the
+result, verify:
+
+1. Stage paths exist:
+   - `/World/matterport`
+   - `/World/matterport/collision`
+   - `/World/matterport/collisionFloor`
+   - `/World/matterport/collisionObstacle`
+   - `/World/GroundPlane`
+2. Collision visualization is enabled:
+   - eye icon -> `Show by type` -> `Physics Mesh` -> `All`
+   - `Tools` -> `Physics API Editor`
+   - `Window` -> `Simulation` -> `Debug`
+3. Geometry alignment looks reasonable:
+   - obstacle shells are not drifting away from walls or furniture
+   - floor meshes are not floating above or below the visible floor
+   - door frames are not dominated by noisy thin slivers
+   - the fallback ground plane does not fight a custom floor mesh
+
+### Validation ladder
+
+Use this order when debugging a new scene:
+
+1. Visual USD + ground plane only
+2. Ground plane + obstacle collision
+3. Floor + obstacle collision, no ground plane
+
+This isolates whether failures come from:
+
+- obstacle mesh quality
+- floor mesh quality
+- low-level locomotion mismatch
+
+### Common failure modes
+
+#### Robot clips through furniture
+
+Likely causes:
+
+- obstacle collision is too coarse
+- only the ground plane is active
+- upper-body posture is too wide for corridor / furniture clearance
+
+What to try:
+
+- enable obstacle-only collision first
+- keep large furniture and wall boundaries in the obstacle mesh
+- reduce decorative clutter that adds noisy contacts but does not help body clearance
+
+#### Robot gets stuck and stops moving
+
+Likely causes:
+
+- floor mesh is noisy or non-manifold
+- floor overlay and ground plane both support the robot in conflicting ways
+- `robot_root_height_offset` is wrong
+- the low-level locomotion policy was not trained for contact-rich indoor mesh geometry
+
+What to try:
+
+- keep the ground plane and add only obstacle collision first
+- rebuild a cleaner floor mesh
+- tune `--robot_root_height_offset`
+- compare against a ground-plane-only baseline
+
+#### Robot snags on door frames or thin geometry
+
+Likely causes:
+
+- obstacle collision is too detailed for the current locomotion controller
+- the robot body is wider than the assumed corridor
+
+What to try:
+
+- simplify sharp mesh details near doors
+- test `triangle` vs `convex_decomposition`
+- compare with and without obstacle collision
+
+### Handoff checklist
+
+If another machine or another person needs to continue the work, keep these
+unchanged:
+
+- the visual Matterport USD
+- the VLN episode file such as `vln_ce_isaac_v1.json.gz`
+- the existing Arena / NaVILA evaluation pipeline
+
+Replace or improve only:
+
+- the collision-only assets used by PhysX
+
+For the other machine, the most useful assets are:
+
+- `matterport_usd/<scene>/<scene>.usd`
+- `vln_ce_isaac_v1.json.gz`
+- a cleaner raw MP3D / Matterport mesh
+- generated collision outputs:
+  - `<scene>_floor_collision.usda`
+  - `<scene>_obstacle_collision.usda`
+  - optional `<scene>_combined_collision.usda`
+
+In this branch, the collision-related code and docs to carry forward are:
+
+- `isaaclab_arena/assets/matterport_background.py`
+- `isaaclab_arena_environments/vln_environment.py`
+- `isaaclab_arena/scripts/assets/export_matterport_collision_proxy.py`
+- `isaaclab_arena/scripts/assets/build_matterport_collision_layers.py`
+- `isaaclab_arena/scripts/assets/filter_collision_usda.py`
+- `isaaclab_arena/scripts/assets/debug_matterport_collision_stage.py`
+- `isaaclab_arena/scripts/assets/open_usd_stage.py`
+
+Collision-related CLI flags:
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--use_global_matterport_prim` | off | Spawn Matterport at `/World/matterport` with `collision_group=-1`. Required for collision overlay and height scanner. |
+| `--disable_matterport_ground_plane` | off | Remove the invisible z=0 ground plane. Use when a collision overlay provides floor support. |
+| `--matterport_collision_usd_path` | None | Legacy combined collision-only USD layered under the visual scene. |
+| `--matterport_floor_collision_usd_path` | None | Floor-only collision USD layered under the visual scene. |
+| `--matterport_obstacle_collision_usd_path` | None | Obstacle-only collision USD layered under the visual scene. |
+| `--enable_matterport_child_mesh_colliders` | off | Explicitly apply collision schemas to descendant Mesh prims of the visual USD. |
+| `--matterport_mesh_collider_type` | `triangle` | Mesh collider approximation: `triangle`, `sdf`, or `convex_decomposition`. |
+| `--robot_root_height_offset` | 1.0 | Added to dataset `start_position.z` when resetting the robot (H1 pelvis is ~1m above the floor). |
+
+## Training Your Own Low-Level Locomotion Policy
+
+The VLN pipeline uses a pre-trained RSL-RL locomotion policy that converts
+velocity commands `[vx, vy, yaw_rate]` into joint-position actions. A default
+checkpoint is included at `isaaclab_arena/policy/vln/pretrained/`.
+
+To train a replacement checkpoint with the built-in rough-terrain environment:
+
+```bash
+docker run --rm --gpus all --net host --entrypoint /bin/bash \
+  -v "$(pwd):/workspaces/isaaclab_arena" \
+  -v "/home/$USER:/home/$USER" \
+  isaaclab_arena:latest -lc "
+    cd /workspaces/isaaclab_arena && \
+    /isaac-sim/python.sh -u isaaclab_arena/scripts/reinforcement_learning/train.py \
+      --headless \
+      --num_envs 4096 \
+      --max_iterations 1000 \
+      --save_interval 50 \
+      --experiment_name h1_base_rough \
+      --agent_cfg_path isaaclab_arena/policy/rl_policy/h1_base_rough_policy.json \
+      h1_base_rough
+  "
+```
+
+Checkpoints are saved under `logs/rsl_rl/h1_base_rough/<timestamp>/`.
+To use a newly trained checkpoint for VLN evaluation, point to it with:
+
+```bash
+--ll_checkpoint_path logs/rsl_rl/h1_base_rough/<timestamp>/model_<iter>.pt
+--ll_agent_cfg logs/rsl_rl/h1_base_rough/<timestamp>/params/agent.yaml
+```
+
+The training environment preserves the same 69-dim proprioceptive layout as the
+VLN evaluation, so the velocity-command injection at `obs[:, 9:12]` remains
+compatible without code changes.
+
+## Sensor Configuration
+
+### Camera Positions (CLI-configurable)
+
+| Camera | Default Position (pelvis frame) | FOV | CLI Override |
+|--------|--------------------------------|-----|-------------|
+| Head camera | `(0.1, 0.0, 0.5)` | 54 deg | `--head_camera_offset_xyz X Y Z` |
+| Follow camera | `(-1.0, 0.0, 0.57)` | 100 deg | `--follow_camera_offset_xyz X Y Z` |
+
+### Optional Sensors
+
+| Flag | Description |
+|------|-------------|
+| `--enable_head_camera_depth` | Add depth output to the head camera (not sent to VLM by default). |
+| `--enable_height_scanner` | Enable a raycast-based height scanner on the pelvis. Requires `--use_global_matterport_prim`. |
+| `--height_scanner_debug_vis` | Visualize the height-scanner rays in the viewport. |
+| `--camera_resolution N` | Camera resolution in pixels (default: 512). |
+| `--no_follow_camera` | Disable the third-person follow camera. |
+| `--use_tiled_camera` | Use TiledCamera for parallel multi-env evaluation. |
+
+### STOP Diagnostic Flags
+
+| Flag | Description |
+|------|-------------|
+| `--ignore_vlm_stop` | Never let the VLM STOP command terminate an episode. Useful for diagnosing early-stop behaviour. |
+| `--min_vlm_stop_distance D` | Ignore VLM STOP when distance-to-goal exceeds `D` meters. Negative value (default) disables. |
+| `--debug_vln` | Enable verbose VLM query logging and debug frame saving. |
+| `--save_interval N` | Save camera frames every N steps to `--save_dir`. |
+| `--save_dir DIR` | Directory for saved camera frames. |
+
+## CLI Parameters
+
+### Client Parameters (before `h1_vln_matterport`)
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `--num_envs` | 1 | Must be 1 (multi-env not yet supported). |
+| `--policy_type` | (required) | `isaaclab_arena.policy.vln.vln_vlm_locomotion_policy.VlnVlmLocomotionPolicy` |
+| `--remote_host` | localhost | VLM server hostname. |
+| `--remote_port` | 5555 | VLM server port. |
+| `--ll_checkpoint_path` | (required) | RSL-RL locomotion checkpoint (.pt). |
+| `--ll_agent_cfg` | (required) | RSL-RL agent config (.yaml). |
+| `--warmup_steps` | 200 | Low-level policy warmup steps. |
+| `--num_episodes` | (required) | Number of episodes to evaluate. |
+
+### Environment Parameters (after `h1_vln_matterport`)
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `--usd_path` | (required) | Path to Matterport USD scene file. |
+| `--r2r_dataset_path` | (required) | Path to VLN-CE R2R dataset (.json.gz). |
+| `--episode_start` | None | Start index within filtered episodes. |
+| `--episode_end` | None | End index (exclusive) within filtered episodes. |
+| `--episode_length_s` | 60 | Max episode duration in seconds. |
+| `--success_radius` | 3.0 | Success distance threshold (meters). |
+
+### Server Parameters
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `--model_path` | (required) | Path to NaVILA / LLaVA model checkpoint. |
+| `--port` | 5555 | Server listening port. |
+| `--num_video_frames` | 8 | Number of frames sent to VLM per query. |
+| `--conv_mode` | llama_3 | LLaVA conversation template. |
+| `--history_padding_mode` | repeat_first | Early-frame padding: `repeat_first` or `black` (NaVILA-Bench style). |
+| `--max_history_frames` | 200 | Maximum frames kept in server-side history. |
+| `--max_new_tokens` | 80 | Maximum generated tokens per VLM query. |
+
+## Metrics
+
+| Metric | Description |
+|--------|-------------|
+| **Success** | Fraction of episodes where the agent emits `STOP` and the final distance-to-goal is below `success_radius`. |
+| **SPL** | Mean over episodes of `S_i * l_i / max(p_i, l_i)`, where `S_i` is STOP-gated success, `l_i` is shortest-path distance, and `p_i` is actual path length. |
+| **Distance-to-Goal** | Approximate geodesic distance from the final position to the goal in the XY plane, estimated from the reference path. |
+| **Path Length** | Total distance traversed (XY only). |
+
+The benchmark follows a `DONE/STOP`-style evaluation protocol: without an explicit `STOP`, an episode is counted as unsuccessful even if the agent passes near the goal.
+
+## Known Limitations
+
+- **`num_envs` must be 1.** Multi-env requires per-env VLM instruction tracking.
+- **Scene switching requires restart.** Matterport USD is loaded at initialization.
+- **Some episode start positions are outside the building envelope.** The
+  Matterport scan may have incomplete or visually degraded geometry at these
+  locations, which can confuse the VLM and cause the robot to get stuck.
+  Consider filtering edge-case episodes or adjusting `--episode_start` / `--episode_end`.
+- **Collision proxy is experimental.** The exported proxy mesh may not perfectly
+  match the original building geometry. Use the default ground plane for stable
+  baseline evaluation.
diff --git a/isaaclab_arena_navila/__init__.py b/isaaclab_arena_navila/__init__.py
new file mode 100644
index 000000000..2ec11e967
--- /dev/null
+++ b/isaaclab_arena_navila/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""NaVILA VLM server policy for VLN navigation.
+
+This package provides the NaVILA-specific server-side policy that wraps
+the NaVILA/LLaVA model for generating navigation velocity commands.
+Similar in structure to ``isaaclab_arena_gr00t``.
+"""
+
+from isaaclab_arena_navila.navila_server_policy import NaVilaServerPolicy  # noqa: F401
diff --git a/isaaclab_arena_navila/navila_server_policy.py b/isaaclab_arena_navila/navila_server_policy.py
new file mode 100644
index 000000000..edfd49987
--- /dev/null
+++ b/isaaclab_arena_navila/navila_server_policy.py
@@ -0,0 +1,358 @@
+# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""NaVILA server-side policy for VLN navigation.
+
+This policy runs on the **server** side (typically a GPU machine with the
+NaVILA / LLaVA model loaded).  It receives RGB image observations from the
+client, runs VLM inference, parses the text output into a velocity command,
+and returns it.
+
+To add a different VLM backend (e.g. GR00T), create a new server policy
+in ``isaaclab_arena_navila/`` following this pattern.
+
+Launch via the standard Arena remote policy server runner::
+
+    python -m isaaclab_arena.remote_policy.remote_policy_server_runner \\
+        --policy_type isaaclab_arena_navila.navila_server_policy.NaVilaServerPolicy \\
+        --model_path /path/to/navila/checkpoint \\
+        --port 5555
+"""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+import torch
+from PIL import Image
+
+from llava.constants import IMAGE_TOKEN_INDEX
+from llava.conversation import SeparatorStyle, conv_templates
+from llava.mm_utils import (
+    KeywordsStoppingCriteria,
+    get_model_name_from_path,
+    process_image,
+    tokenizer_image_token,
+)
+from llava.model.builder import load_pretrained_model
+
+from isaaclab_arena.remote_policy.action_protocol import ActionProtocol, VlnVelocityActionProtocol
+from isaaclab_arena.remote_policy.server_side_policy import ServerSidePolicy
+
+
+# ========================================================================== #
+# VLM command parsing                                                        #
+# ========================================================================== #
+
+
+def parse_vlm_output_to_velocity(text: str) -> tuple[np.ndarray, float]:
+    """Parse VLM text output into a velocity command and duration.
+
+    Recognized commands (case-insensitive):
+      - "turn left [15|30|45]"  -> [0, 0, +pi/6], duration 0.5-1.5s
+      - "turn right [15|30|45]" -> [0, 0, -pi/6], duration 0.5-1.5s
+      - "move forward [25|50|75]" -> [0.5, 0, 0], duration 0.5-1.5s
+      - "stop" -> [0, 0, 0], duration 0.0
+
+    Returns:
+        (vel_cmd, duration) where vel_cmd is [vx, vy, yaw_rate].
+    """
+    t = text.lower().strip()
+
+    if "turn left" in t:
+        vx, vy, wz = 0.0, 0.0, np.pi / 6.0
+        if "45" in t:
+            dur = 1.5
+        elif "30" in t:
+            dur = 1.0
+        else:
+            dur = 0.5
+        return np.array([vx, vy, wz], dtype=np.float32), dur
+
+    if "turn right" in t:
+        vx, vy, wz = 0.0, 0.0, -np.pi / 6.0
+        if "45" in t:
+            dur = 1.5
+        elif "30" in t:
+            dur = 1.0
+        else:
+            dur = 0.5
+        return np.array([vx, vy, wz], dtype=np.float32), dur
+
+    if "move forward" in t or "move" in t:
+        vx, vy, wz = 0.5, 0.0, 0.0
+        if "75" in t:
+            dur = 1.5
+        elif "50" in t:
+            dur = 1.0
+        elif "25" in t:
+            dur = 0.5
+        else:
+            dur = 0.5
+        return np.array([vx, vy, wz], dtype=np.float32), dur
+
+    if "stop" in t:
+        return np.array([0.0, 0.0, 0.0], dtype=np.float32), 0.0
+
+    return np.array([0.5, 0.0, 0.0], dtype=np.float32), 0.5
+
+
+# ========================================================================== #
+# Server-side policy                                                         #
+# ========================================================================== #
+
+
+@dataclass
+class NaVilaServerPolicyConfig:
+    """Configuration for :class:`NaVilaServerPolicy`."""
+
+    model_path: str = ""
+    device: str = "cuda"
+    num_video_frames: int = 8
+    conv_mode: str = "llama_3"
+    history_padding_mode: str = "repeat_first"
+    max_history_frames: int = 200
+    max_new_tokens: int = 80
+
+
+class NaVilaServerPolicy(ServerSidePolicy):
+    """NaVILA VLM server policy for VLN navigation.
+
+    Wraps the NaVILA (LLaVA-based) model to generate navigation
+    velocity commands from RGB image observations.  Maintains a
+    full episode image history and uses uniform sampling (matching
+    the NaVILA-Bench evaluation protocol) so the VLM can determine
+    task completion and output "stop".
+    """
+
+    config_class = NaVilaServerPolicyConfig
+
+    def __init__(self, config: NaVilaServerPolicyConfig) -> None:
+        super().__init__(config)
+        self._instruction: str = "Navigate to the target."
+        self._image_history: list[Image.Image] = []
+        self._num_video_frames: int = config.num_video_frames
+        self._device = config.device
+        self._conv_mode = config.conv_mode
+        self._model_path = config.model_path
+        self._history_padding_mode = config.history_padding_mode
+        self._max_history_frames = config.max_history_frames
+        self._max_new_tokens = config.max_new_tokens
+        if self._history_padding_mode not in {"repeat_first", "black"}:
+            raise ValueError(
+                "history_padding_mode must be one of: repeat_first, black. "
+                f"Got: {self._history_padding_mode!r}"
+            )
+        if self._max_history_frames <= 0:
+            raise ValueError(f"max_history_frames must be > 0. Got: {self._max_history_frames}")
+        if self._max_new_tokens <= 0:
+            raise ValueError(f"max_new_tokens must be > 0. Got: {self._max_new_tokens}")
+
+        print(f"[NaVilaServerPolicy] Loading VLM model from: {self._model_path}")
+        model_name = get_model_name_from_path(self._model_path)
+        tokenizer, model, image_processor, _ = load_pretrained_model(
+            self._model_path, model_name, None
+        )
+        self._tokenizer = tokenizer
+        self._model = model.to(self._device)
+        self._image_processor = image_processor
+        print("[NaVilaServerPolicy] VLM model loaded successfully.")
+
+    # ------------------------------------------------------------------ #
+    # Protocol                                                            #
+    # ------------------------------------------------------------------ #
+
+    def _build_protocol(self) -> ActionProtocol:
+        return VlnVelocityActionProtocol(
+            action_dim=3,
+            observation_keys=["camera_obs.robot_head_cam_rgb"],
+            default_duration=0.5,
+        )
+
+    # ------------------------------------------------------------------ #
+    # RPC methods                                                         #
+    # ------------------------------------------------------------------ #
+
+    def set_task_description(self, task_description: str | None) -> dict[str, Any]:
+        self._instruction = task_description or "Navigate to the target."
+        print(f"[NaVilaServerPolicy] Instruction: {self._instruction[:80]}...")
+        return {"task_description": self._instruction}
+
+    def get_action(
+        self, observation: dict[str, Any], options: dict[str, Any] | None = None
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        nested = self.unpack_observation(observation)
+        rgb_np = None
+        if "camera_obs" in nested:
+            cam_obs = nested["camera_obs"]
+            for cam_key in cam_obs:
+                cam_data = cam_obs[cam_key]
+                if isinstance(cam_data, dict) and "rgb" in cam_data:
+                    rgb_np = cam_data["rgb"]
+                elif isinstance(cam_data, np.ndarray):
+                    rgb_np = cam_data
+                break
+
+        if rgb_np is not None:
+            if rgb_np.ndim == 4:
+                rgb_np = rgb_np[0]
+            img = Image.fromarray(rgb_np[:, :, :3].astype(np.uint8))
+            self._image_history.append(img)
+            if len(self._image_history) > self._max_history_frames:
+                self._image_history = self._image_history[-self._max_history_frames:]
+
+        vlm_text = self._run_vlm_inference()
+        vel_cmd, duration = parse_vlm_output_to_velocity(vlm_text)
+
+        action = {"action": vel_cmd, "duration": duration}
+        info = {"vlm_text": vlm_text}
+        return action, info
+
+    def reset(self, env_ids=None, reset_options=None) -> dict[str, Any]:
+        self._image_history.clear()
+        return {"status": "reset_success"}
+
+    # ------------------------------------------------------------------ #
+    # VLM inference                                                       #
+    # ------------------------------------------------------------------ #
+
+    def _run_vlm_inference(self) -> str:
+        """Run VLM forward pass on the image history + instruction.
+
+        Matches NaVILA-Bench: uniformly sample ``num_video_frames - 1``
+        frames from the *full* episode history, plus the latest frame.
+        This gives the VLM global temporal context so it can determine
+        when the navigation task is complete and output "stop".
+        """
+        if not self._image_history:
+            return "move forward 50"
+
+        n = self._num_video_frames
+        num_images = len(self._image_history)
+
+        if num_images < n:
+            if self._history_padding_mode == "black":
+                # Match NaVILA-Bench exactly for apples-to-apples diagnosis.
+                pad_img = Image.new("RGB", self._image_history[-1].size, (0, 0, 0))
+            else:
+                # Repeating the first real image often gives a steadier early
+                # rollout than feeding synthetic black frames.
+                pad_img = self._image_history[0]
+            frames = [pad_img] * (n - num_images) + list(self._image_history)
+        else:
+            # Uniform sample n-1 from full history + latest (matching NaVILA)
+            n_hist = n - 1
+            indices = [int(i * (num_images - 1) / n_hist) for i in range(n_hist)]
+            frames = [self._image_history[i] for i in indices]
+            frames.append(self._image_history[-1])
+
+        self._model.config.image_processor = self._image_processor
+        processed = []
+        for img in frames:
+            p = process_image(img, self._model.config, None)
+            processed.append(p)
+        if all(x.shape == processed[0].shape for x in processed):
+            image_tensor = torch.stack(processed, dim=0).to(self._device, dtype=torch.float16)
+        else:
+            image_tensor = processed[0].unsqueeze(0).to(self._device, dtype=torch.float16)
+
+        conv = conv_templates[self._conv_mode].copy()
+        image_token = "<image>\n"
+        n_hist = self._num_video_frames - 1
+        qs = (
+            f"Imagine you are a robot programmed for navigation tasks. You have been given a video "
+            f'of historical observations {image_token * n_hist}, and current observation <image>\n. '
+            f'Your assigned task is: "{self._instruction}" '
+            f"Analyze this series of images to decide your next action, which could be turning left "
+            f"or right by a specific degree, moving forward a certain distance, or stop if the task "
+            f"is completed."
+        )
+        conv.append_message(conv.roles[0], qs)
+        conv.append_message(conv.roles[1], None)
+        prompt = conv.get_prompt()
+
+        input_ids = tokenizer_image_token(
+            prompt, self._tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
+        ).unsqueeze(0).to(self._device)
+
+        stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
+        stopping_criteria = KeywordsStoppingCriteria([stop_str], self._tokenizer, input_ids)
+
+        with torch.inference_mode():
+            output_ids = self._model.generate(
+                input_ids,
+                images=[image_tensor],
+                do_sample=False,
+                temperature=0,
+                top_p=None,
+                num_beams=1,
+                max_new_tokens=self._max_new_tokens,
+                use_cache=True,
+                stopping_criteria=[stopping_criteria],
+            )
+
+        output_text = self._tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
+        print(f"[VLM] history={len(self._image_history)} output=\"{output_text[-120:]}\"")
+        return output_text
+
+    # ------------------------------------------------------------------ #
+    # CLI helpers                                                         #
+    # ------------------------------------------------------------------ #
+
+    @staticmethod
+    def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
+        group = parser.add_argument_group("VLN Server-Side Policy")
+        group.add_argument(
+            "--model_path", type=str, required=True,
+            help="Path to the NaVILA / LLaVA model checkpoint.",
+        )
+        group.add_argument(
+            "--policy_device", type=str, default="cuda",
+            help="Device for VLM inference (default: cuda).",
+        )
+        group.add_argument(
+            "--num_video_frames", type=int, default=8,
+            help="Number of video frames to send to the VLM (default: 8).",
+        )
+        group.add_argument(
+            "--conv_mode", type=str, default="llama_3",
+            help="Conversation template mode for LLaVA (default: llama_3).",
+        )
+        group.add_argument(
+            "--history_padding_mode",
+            type=str,
+            default="repeat_first",
+            choices=("repeat_first", "black"),
+            help="How to pad early frame history before enough frames are collected.",
+        )
+        group.add_argument(
+            "--max_history_frames",
+            type=int,
+            default=200,
+            help="Maximum number of RGB frames kept in server-side history.",
+        )
+        group.add_argument(
+            "--max_new_tokens",
+            type=int,
+            default=80,
+            help="Maximum generated tokens per VLM query.",
+        )
+        return parser
+
+    @staticmethod
+    def from_args(args: argparse.Namespace) -> NaVilaServerPolicy:
+        config = NaVilaServerPolicyConfig(
+            model_path=args.model_path,
+            device=getattr(args, "policy_device", "cuda"),
+            num_video_frames=getattr(args, "num_video_frames", 8),
+            conv_mode=getattr(args, "conv_mode", "llama_3"),
+            history_padding_mode=getattr(args, "history_padding_mode", "repeat_first"),
+            max_history_frames=getattr(args, "max_history_frames", 200),
+            max_new_tokens=getattr(args, "max_new_tokens", 80),
+        )
+        return NaVilaServerPolicy(config)
diff --git a/setup.py b/setup.py
index b05b9afed..b903cfcf0 100644
--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@
             "isaaclab_arena_examples*",
             "isaaclab_arena_g1*",
             "isaaclab_arena_gr00t*",
+            "isaaclab_arena_navila*",
         ]
     ),
     python_requires=">=3.10",