pv-nvidia
diff --git a/‎.github/workflows/docs.yaml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/docs.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/test-multi-gpu.yaml‎
Lines changed: 153 additions & 0 deletions b/‎.github/workflows/test-multi-gpu.yaml‎
Lines changed: 153 additions & 0 deletions
diff --git a/‎docs/source/experimental-features/newton-physics-integration/solver-transitioning.rst‎
Lines changed: 24 additions & 9 deletions b/‎docs/source/experimental-features/newton-physics-integration/solver-transitioning.rst‎
Lines changed: 24 additions & 9 deletions
diff --git a/‎docs/source/features/hydra.rst‎
Lines changed: 68 additions & 0 deletions b/‎docs/source/features/hydra.rst‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎docs/source/setup/installation/include/pip_extras_note.rst‎
Lines changed: 8 additions & 0 deletions b/‎docs/source/setup/installation/include/pip_extras_note.rst‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/source/setup/installation/isaaclab_pip_installation.rst‎
Lines changed: 4 additions & 0 deletions b/‎docs/source/setup/installation/isaaclab_pip_installation.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/tutorials/01_assets/run_deformable_object.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/tutorials/01_assets/run_deformable_object.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/benchmarks/benchmark_non_rl.py‎
Lines changed: 7 additions & 2 deletions b/‎scripts/benchmarks/benchmark_non_rl.py‎
Lines changed: 7 additions & 2 deletions
@@ -131,4 +131,3 @@ jobs:
         github_token: ${{ secrets.GITHUB_TOKEN }}
         publish_dir: ./docs/_build
         keep_files: false
-        force_orphan: true
@@ -0,0 +1,153 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+# Multi-GPU distributed training validation
+#
+# This workflow validates that multi-GPU training works correctly across:
+# - Physics backends: PhysX, Newton
+# - Rendering backends: none (physics-only), Isaac RTX, Newton Warp
+#
+# Runs on a dedicated multi-GPU runner (separate from standard CI) to minimize costs.
+# Only triggered on PRs that touch distributed training code paths.
+
+name: Multi-GPU Training Tests
+
+on:
+  pull_request:
+    paths:
+      - "source/isaaclab/isaaclab/app/app_launcher.py"
+      - "source/isaaclab_tasks/isaaclab_tasks/utils/sim_launcher.py"
+      - "scripts/reinforcement_learning/**/train.py"
+      - ".github/workflows/test-multi-gpu.yaml"
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-multi-gpu:
+    name: Multi-GPU (${{ matrix.physics }}, ${{ matrix.renderer }})
+    # Use dedicated multi-GPU runner to avoid blocking standard CI resources
+    # Configure this label on a runner with 2+ GPUs (e.g., g5.12xlarge with 4x A10G)
+    runs-on: [self-hosted, linux, x64, gpu, multi-gpu]
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # PhysX physics-only
+          - physics: physx
+            renderer: none
+            task: Isaac-Cartpole-Direct-v0
+            extra_args: ""
+
+          # PhysX + Isaac RTX renderer
+          - physics: physx
+            renderer: isaac-rtx
+            task: Isaac-Cartpole-Camera-Presets-Direct-v0
+            extra_args: ""
+            trainer: skrl
+
+          # PhysX + Newton Warp renderer (hybrid)
+          - physics: physx
+            renderer: newton-warp
+            task: Isaac-Cartpole-Camera-Presets-Direct-v0
+            extra_args: "env.tiled_camera.renderer_cfg=newton_renderer"
+            trainer: skrl
+
+          # Newton physics-only
+          - physics: newton
+            renderer: none
+            task: Isaac-Cartpole-Direct-v0
+            extra_args: "+sim=newton"
+
+          # Newton + Newton Warp renderer
+          - physics: newton
+            renderer: newton-warp
+            task: Isaac-Cartpole-Camera-Presets-Direct-v0
+            extra_args: "+sim=newton env.tiled_camera.renderer_cfg=newton_renderer"
+            trainer: skrl
+
+          # Newton + Isaac RTX renderer (hybrid)
+          - physics: newton
+            renderer: isaac-rtx
+            task: Isaac-Cartpole-Camera-Presets-Direct-v0
+            extra_args: "+sim=newton"
+            trainer: skrl
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install Isaac Lab
+        run: |
+          ./isaaclab.sh --install
+
+      - name: Verify multi-GPU availability
+        run: |
+          echo "=== GPU Info ==="
+          nvidia-smi --query-gpu=index,name,memory.total --format=csv
+
+          GPU_COUNT=$(python -c "import torch; print(torch.cuda.device_count())")
+          echo "Detected $GPU_COUNT GPU(s)"
+
+          if [ "$GPU_COUNT" -lt 2 ]; then
+            echo "::error::At least 2 GPUs required for multi-GPU tests, found $GPU_COUNT"
+            exit 1
+          fi
+
+      - name: Run multi-GPU training (${{ matrix.physics }}, ${{ matrix.renderer }})
+        env:
+          NCCL_DEBUG: WARN
+        run: |
+          TRAINER="${{ matrix.trainer || 'rsl_rl' }}"
+
+          echo "=========================================="
+          echo "Physics: ${{ matrix.physics }}"
+          echo "Renderer: ${{ matrix.renderer }}"
+          echo "Task: ${{ matrix.task }}"
+          echo "Trainer: $TRAINER"
+          echo "Extra args: ${{ matrix.extra_args }}"
+          echo "=========================================="
+
+          # Run 2-GPU distributed training for 3 iterations
+          ./isaaclab.sh -p -m torch.distributed.run --nproc_per_node=2 \
+            scripts/reinforcement_learning/${TRAINER}/train.py \
+            --task=${{ matrix.task }} \
+            --headless \
+            --distributed \
+            --max_iterations=3 \
+            --num_envs=16 \
+            ${{ matrix.extra_args }}
+
+      - name: Verify training completed
+        run: |
+          # Find the most recent log directory
+          LATEST_LOG=$(ls -td logs/*/*/*/ 2>/dev/null | head -1)
+
+          if [ -z "$LATEST_LOG" ]; then
+            echo "::error::No training log directory found"
+            exit 1
+          fi
+
+          echo "Log directory: $LATEST_LOG"
+          ls -la "$LATEST_LOG"
+
+          # Check for model checkpoints
+          MODELS=$(find "$LATEST_LOG" -name "*.pt" | wc -l)
+          echo "Model checkpoints found: $MODELS"
+
+          if [ "$MODELS" -lt 1 ]; then
+            echo "::error::No model checkpoints found - training may have failed"
+            exit 1
+          fi
+
+          echo "✅ Multi-GPU training completed successfully (${{ matrix.physics }}, ${{ matrix.renderer }})"
@@ -2,7 +2,16 @@ Solver Transitioning
 ====================
 
 Transitioning to the Newton physics engine introduces new physics solvers that handle simulation using different numerical approaches.
-While Newton supports several different solvers, our initial focus for Isaac Lab is on using the MuJoCo-Warp solver from Google DeepMind.
+While Newton supports several different solvers, our initial focus for Isaac Lab is on using the
+MuJoCo-Warp solver from Google DeepMind. Isaac Lab also includes beta support for the Kamino
+solver on selected classic tasks. Kamino is selected through a physics preset rather than as a
+separate backend; see :ref:`hydra-backend-solver-presets`.
+
+.. note::
+
+    Kamino support is experimental and currently depends on assets being structured
+    in a way that Kamino can consume. Assets that work with MuJoCo-Warp or PhysX
+    may still require model-structure updates before they work with Kamino.
 
 The way the physics scene itself is defined does not change - we continue to use USD as the primary way to set basic parameters of objects and robots in the scene,
 and for current environments, the exact same USD files used for the PhysX-based Isaac Lab are used.
@@ -12,15 +21,18 @@ What does require change is the way that some solver-specific settings are confi
 Tuning these parameters can have a significant impact on both simulation performance and behaviour.
 
 For now, we will show an example of setting these parameters to help provide a feel for these changes.
-Note that the :class:`~isaaclab.sim.NewtonCfg` replaces the :class:`~isaaclab.sim.PhysxCfg` and is used to set everything related to the physical simulation parameters except for the ``dt``:
+Note that the :class:`~isaaclab_newton.physics.NewtonCfg` replaces
+:class:`~isaaclab_physx.physics.PhysxCfg` and is used to set everything related to the physical
+simulation parameters except for the ``dt``:
 
 .. code-block:: python
 
-    from isaaclab.sim._impl.newton_manager_cfg import NewtonCfg
-    from isaaclab.sim._impl.solvers_cfg import MJWarpSolverCfg
+    from isaaclab.sim import SimulationCfg
+    from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg
 
     solver_cfg = MJWarpSolverCfg(
-        nefc_per_env=35,
+        njmax=35,
+        nconmax=20,
         ls_iterations=10,
         cone="pyramidal",
         ls_parallel=True,
@@ -31,14 +43,17 @@ Note that the :class:`~isaaclab.sim.NewtonCfg` replaces the :class:`~isaaclab.si
         num_substeps=1,
         debug_mode=False,
     )
-    sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation, newton_cfg=newton_cfg)
+    sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation, physics=newton_cfg)
 
 
 Here is a very brief explanation of some of the key parameters above:
 
-* ``nefc_per_env``: This is the size of the buffer constraints we want MuJoCo warp to
-  pre-allocate for a given environment. A large value will slow down the simulation,
-  while a too small value may lead to some contacts being missed.
+* ``njmax``: This is the number of constraint rows MuJoCo-Warp pre-allocates for a
+  given environment. A large value will slow down the simulation, while a too small
+  value may lead to missing constraints.
+
+* ``nconmax``: This is the maximum number of contact points MuJoCo-Warp pre-allocates
+  for a given environment. Set it high enough for the expected contact count.
 
 * ``ls_iterations``: The number of line searches performed by the MuJoCo Warp solver.
   Line searches are used to find an optimal step size, and for each solver step,
 
@@ -242,6 +242,74 @@ disabled unless explicitly selected:
     python train.py --task=Isaac-Reach-Franka-v0 env.scene.camera=large
 
 
+.. _hydra-backend-solver-presets:
+
+Backend and Solver Presets
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Physics backend selection uses the same preset system. A task can define a
+``PresetCfg`` whose entries replace the complete physics config:
+
+.. code-block:: python
+
+    from isaaclab.utils import configclass
+    from isaaclab_newton.physics import KaminoSolverCfg, MJWarpSolverCfg, NewtonCfg
+    from isaaclab_physx.physics import PhysxCfg
+    from isaaclab_tasks.utils import PresetCfg
+
+    @configclass
+    class CartpolePhysicsCfg(PresetCfg):
+        default: PhysxCfg = PhysxCfg()
+        physx: PhysxCfg = PhysxCfg()
+        newton: NewtonCfg = NewtonCfg(
+            solver_cfg=MJWarpSolverCfg(njmax=5, nconmax=3),
+            num_substeps=1,
+        )
+        kamino: NewtonCfg = NewtonCfg(
+            solver_cfg=KaminoSolverCfg(
+                integrator="moreau",
+                use_collision_detector=True,
+                sparse_jacobian=True,
+                padmm_max_iterations=100,
+            ),
+            num_substeps=1,
+            debug_mode=False,
+            use_cuda_graph=True,
+        )
+
+The ``newton`` and ``kamino`` entries both select the Newton physics backend because
+both entries are :class:`~isaaclab_newton.physics.NewtonCfg` objects. The difference
+is the solver configuration: ``newton`` uses
+:class:`~isaaclab_newton.physics.MJWarpSolverCfg`, while ``kamino`` uses
+:class:`~isaaclab_newton.physics.KaminoSolverCfg`.
+
+Kamino is therefore a solver preset, not a separate Isaac Lab backend. The same
+Newton assets, sensors, renderers, and visualizers are used after the preset is
+resolved. It is a Proximal Alternating Direction Method of Multipliers (P-ADMM)
+based solver for constrained rigid multi-body dynamics, and its Isaac Lab support
+is currently beta.
+
+.. note::
+
+    Kamino support is experimental and currently depends on the asset being
+    structured in a way that Kamino can consume. Assets that work with the
+    MuJoCo-Warp or PhysX presets may still require model-structure updates before
+    they work with ``presets=kamino``.
+
+.. code-block:: bash
+
+    # Select the Kamino solver preset everywhere it is defined
+    python train.py --task=Isaac-Cartpole-v0 presets=kamino
+
+    # Select the Kamino solver preset for a specific physics config path
+    python train.py --task=Isaac-Cartpole-v0 env.sim.physics=kamino
+
+The ``kamino`` preset is currently defined for ``Isaac-Cartpole-Direct-v0``,
+``Isaac-Ant-Direct-v0``, ``Isaac-Cartpole-v0``, and ``Isaac-Ant-v0``. Passing
+``presets=kamino`` to a task without a ``kamino`` preset does not enable Kamino;
+add and validate a task-specific preset first.
+
+
 Inline Presets with preset()
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
@@ -0,0 +1,8 @@
+.. note::
+
+   The bare ``isaaclab`` install ships only the core extension. To run
+   the bundled training scripts under ``scripts/reinforcement_learning/``
+   you must install with the ``[all]`` extras (or the per-framework
+   extras ``[skrl]`` / ``[sb3]`` / ``[rsl-rl]``); otherwise commands such
+   as ``python scripts/reinforcement_learning/skrl/train.py ...`` fail
+   at import time with ``ModuleNotFoundError: No module named 'skrl'``.
@@ -71,6 +71,8 @@ Isaac Lab sub-packages:
          # Isaac Lab + Isaac Sim + all sub-packages
          uv pip install "isaaclab[isaacsim,all]" --extra-index-url https://pypi.nvidia.com --index-strategy unsafe-best-match --prerelease=allow
 
+      .. include:: include/pip_extras_note.rst
+
    .. tab-item:: pip
 
       .. code-block:: bash
@@ -90,6 +92,8 @@ Isaac Lab sub-packages:
          # Isaac Lab + Isaac Sim + all Isaac Lab sub-packages
          pip install "isaaclab[isaacsim,all]" --extra-index-url https://pypi.nvidia.com --pre
 
+      .. include:: include/pip_extras_note.rst
+
 Installing dependencies
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 
@@ -163,7 +163,7 @@ Now that we have gone through the code, let's run the script and see the result:
    ./isaaclab.sh -p scripts/tutorials/01_assets/run_deformable_object.py --visualizer kit
 
 
-This should open a stage with a ground plane, lights, and several green cubes. Two of the four cubes must be dropping
+This should open a stage with a ground plane, lights, and several cubes. Two of the four cubes must be dropping
 from a height and settling on to the ground. Meanwhile the other two cubes must be moving along the z-axis. You
 should see a marker showing the kinematic target position for the nodes at the bottom-left corner of the cubes.
 To stop the simulation, you can either close the window, or press ``Ctrl+C`` in the terminal
 
@@ -120,7 +120,12 @@ def main(
 
     # override configurations with non-hydra CLI arguments
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+    # For distributed training, launch_simulation() already resolved the
+    # correct per-rank device; only apply a CLI --device override for
+    # non-distributed runs (the default "cuda:0" would clobber the
+    # per-rank device otherwise).
+    if not args_cli.distributed:
+        env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
     env_cfg.seed = args_cli.seed
 
     # check for invalid combination of CPU device with distributed training
@@ -131,10 +136,10 @@ def main(
         )
 
     # process distributed
+    # env_cfg.sim.device is already resolved by launch_simulation().
     world_size = 1
     world_rank = 0
     if args_cli.distributed:
-        env_cfg.sim.device = f"cuda:{int(os.getenv('LOCAL_RANK', '0'))}"
         world_size = int(os.getenv("WORLD_SIZE", 1))
         world_rank = int(os.getenv("RANK", "0"))