Add Newton backend support for Shadow-Hand-Over (MAPPO)

hujc7 · hujc7 · commit 9f1c465dedbf · 2026-05-06T04:19:26.000Z
Port ``Isaac-Shadow-Hand-Over-Direct-v0`` to dual PhysX/Newton
backend, mirroring the single-agent Shadow Hand Newton port pattern.
Selectable via ``--preset newton`` / Hydra preset resolution; PhysX
behavior unchanged.

The Newton variant of the Shadow Hand articulation is built as a
delta of the single-agent ``ShadowHandRobotCfg.newton`` (cross-task
import), parameterized per-robot ``prim_path``/init pose, with two
``ImplicitActuatorCfg`` overrides:

* ``fingers`` (wrist + per-finger joints): ``stiffness=20.0`` /
  ``damping=2.0``. PhysX uses ``5.0`` / ``0.5`` on wrists and
  ``1.0`` / ``0.1`` on fingers and layers
  ``fixed_tendons_props(limit_stiffness=30, damping=0.1)`` plus
  ``solver_position_iteration_count=8`` — both amplify effective
  torque per unit nominal gain. Newton's MJWarp implicit-PD path has
  neither, so larger nominal gains are needed for comparable joint
  authority.
* ``distal_passive`` (the four ``robot0_(FF|MF|RF|LF)J0`` joints):
  ``stiffness=10.0`` / ``damping=0.1``. The Newton USD bakes
  ``stiffness=286`` / ``damping=57`` on these joints from the MJCF→USD
  translation, which fights the ``MjcTendon`` coupling and bounces
  the ball. ``stiffness=10`` keeps the joints near-passive while the
  tendon constraint dominates.

Object, scene, and physics presets follow the established
``PresetCfg(physx=..., newton=..., default=physx)`` pattern. Newton's
``ObjectCfg`` drops PhysX-only ``rigid_props`` knobs (per-shape solver
iterations, sleep thresholds, max depenetration velocity, custom
physics material). Newton's scene cloning sets
``clone_in_fabric=False``. The Newton physics preset is
``NewtonCfg(MJWarpSolverCfg(...))`` mirroring the single-agent
configuration.

Bumps ``isaaclab_tasks`` 1.5.33 → 1.5.34 with one CHANGELOG entry.
diff --git a/source/isaaclab_tasks/changelog.d/jichuanh-shadow-hand-newton-parity.minor.rst b/source/isaaclab_tasks/changelog.d/jichuanh-shadow-hand-newton-parity.minor.rst
@@ -0,0 +1,43 @@
+Added
+^^^^^
+
+* Added Newton backend support for the multi-agent
+  ``Isaac-Shadow-Hand-Over-Direct-v0`` (MAPPO/IPPO) env. Mirrors the
+  single-agent Shadow Hand Newton port: per-hand
+  :class:`ImplicitActuatorCfg`, ``shadow_hand_instanceable_newton.usd``,
+  per-backend :class:`~isaaclab_tasks.utils.PresetCfg` wrappers for sim
+  physics, scene cloning (``clone_in_fabric=False`` on Newton), the
+  hand-over object (``RigidObjectCfg`` on both backends, dropping
+  PhysX-only knobs on Newton), and the two robot configs. Selectable via
+  ``--preset newton`` / Hydra preset resolution; PhysX behavior unchanged.
+
+Fixed
+^^^^^
+
+* Fixed Newton training failing to learn the catch in
+  ``Isaac-Shadow-Hand-Over-Direct-v0`` MAPPO. Two Newton-side
+  :class:`~isaaclab.actuators.ImplicitActuatorCfg` overrides are added:
+
+  * ``fingers`` (wrist + per-finger joints): ``stiffness=20.0`` /
+    ``damping=2.0``, vs PhysX's ``5.0`` / ``0.5`` on wrists and
+    ``1.0`` / ``0.1`` on fingers. PhysX layers
+    ``fixed_tendons_props(limit_stiffness=30, damping=0.1)`` on top of
+    the implicit drive and runs ``solver_position_iteration_count=8``
+    per substep — both amplify the effective torque per unit nominal
+    gain. Newton's MJWarp implicit-PD path has neither, so larger
+    nominal gains are needed for comparable joint authority.
+  * ``distal_passive`` (the four ``robot0_(FF|MF|RF|LF)J0`` joints):
+    ``stiffness=10.0`` / ``damping=0.1``. The Newton USD bakes
+    ``stiffness=286`` / ``damping=57`` on these joints from the
+    MJCF→USD translation, which fights the ``MjcTendon`` coupling and
+    bounces the ball. ``stiffness=10`` (~1/3 of PhysX
+    ``limit_stiffness=30``) keeps the joints near-passive while the
+    tendon constraint dominates. PhysX uses tendon coupling on these
+    joints directly and does not need an analogous override.
+
+  At iter 200 / 2048 envs, MAPPO ``Reward / Total reward (mean)``:
+  PhysX baseline **246.7**, Newton at ``stiffness=1.0`` / ``damping=0.1``
+  (no catch learned) **23.4**, Newton at the new gains **777.1**.
+  Newton learns the catch reliably; longer runs and behavior-level
+  comparison (catch / drop rate, ball trajectory) are follow-ups.
+  PhysX path is unchanged.
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py
@@ -64,7 +64,7 @@ def __init__(self, cfg: ShadowHandOverEnvCfg, render_mode: str | None = None, **
         self.num_fingertips = len(self.finger_bodies)
 
         # joint limits
-        joint_pos_limits = wp.to_torch(self.right_hand.root_view.get_dof_limits()).to(self.device)
+        joint_pos_limits = wp.to_torch(self.right_hand.data.joint_limits).to(self.device)
         self.hand_dof_lower_limits = joint_pos_limits[..., 0]
         self.hand_dof_upper_limits = joint_pos_limits[..., 1]
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
@@ -3,10 +3,12 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
+from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg
 from isaaclab_physx.physics import PhysxCfg
 
 import isaaclab.envs.mdp as mdp
 import isaaclab.sim as sim_utils
+from isaaclab.actuators import ImplicitActuatorCfg
 from isaaclab.assets import ArticulationCfg, RigidObjectCfg
 from isaaclab.envs import DirectMARLEnvCfg
 from isaaclab.managers import EventTermCfg as EventTerm
@@ -17,12 +19,21 @@
 from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
 from isaaclab.utils import configclass
 
+from isaaclab_tasks.direct.shadow_hand.shadow_hand_env_cfg import ShadowHandRobotCfg
+from isaaclab_tasks.utils import PresetCfg
+
 from isaaclab_assets.robots.shadow_hand import SHADOW_HAND_CFG
 
 
 @configclass
 class EventCfg:
-    """Configuration for randomization."""
+    """Configuration for randomization (PhysX path).
+
+    Note: this config is currently not wired into ``ShadowHandOverEnvCfg.events`` -
+    it is kept as a reference for future event-randomization work. The event
+    terms here use PhysX-only APIs (rigid-body materials, fixed tendons), so
+    they would need a Newton variant before being enabled in the env.
+    """
 
     # -- robot
     robot_physics_material = EventTerm(
@@ -113,6 +124,182 @@ class EventCfg:
     )
 
 
+# Reuse the single-agent Shadow Hand Newton port (USD path, ``rot`` reapplication
+# workaround, effort limits, joint regex). The multi-agent variant only diverges
+# in actuator gains (stiffness/damping bumped for the catch task) and adds a
+# ``distal_passive`` override for the J0 USD-baked values.
+_SHADOW_HAND_NEWTON_CFG = ShadowHandRobotCfg().newton
+
+
+def _newton_shadow_hand_cfg(
+    prim_path: str, init_pos: tuple[float, float, float], init_rot: tuple[float, float, float, float]
+) -> ArticulationCfg:
+    """Newton Shadow Hand cfg parameterized by per-robot ``prim_path`` and init pose.
+
+    Two overrides versus the single-agent Newton port:
+
+    * ``fingers`` actuator: ``stiffness=20.0`` / ``damping=2.0`` (vs PhysX's
+      ``5.0`` / ``0.5`` on wrists and ``1.0`` / ``0.1`` on fingers). PhysX layers
+      ``fixed_tendons_props(limit_stiffness=30, damping=0.1)`` and runs
+      ``solver_position_iteration_count=8`` per substep — both amplify the
+      effective torque per unit nominal gain. Newton's MJWarp implicit-PD path
+      has neither, so a larger nominal gain is needed for comparable joint
+      authority. ``20.0`` / ``2.0`` is the smallest tested setting at which
+      MAPPO learns the catch (mean reward at iter 200 / 2048 envs goes from
+      ~27 at PhysX-mirrored gains to ~777).
+    * ``distal_passive`` actuator on the four ``robot0_(FF|MF|RF|LF)J0`` joints
+      with ``stiffness=10.0`` / ``damping=0.1``. The Newton USD bakes
+      ``stiffness=286 / damping=57`` on these joints from the MJCF→USD
+      translation, which fights the ``MjcTendon`` coupling and bounces the
+      ball. ``stiffness=10`` (1/3 of PhysX ``limit_stiffness=30``) keeps the
+      joints near-passive while the tendon constraint dominates.
+    """
+    return _SHADOW_HAND_NEWTON_CFG.replace(
+        prim_path=prim_path,
+        init_state=_SHADOW_HAND_NEWTON_CFG.init_state.replace(pos=init_pos, rot=init_rot),
+        actuators={
+            "fingers": _SHADOW_HAND_NEWTON_CFG.actuators["fingers"].replace(stiffness=20.0, damping=2.0),
+            "distal_passive": ImplicitActuatorCfg(
+                joint_names_expr=["robot0_(FF|MF|RF|LF)J0"],
+                stiffness=10.0,
+                damping=0.1,
+                friction=1e-2,
+                armature=2e-3,
+            ),
+        },
+    )
+
+
+@configclass
+class RightRobotCfg(PresetCfg):
+    physx = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/RightRobot").replace(
+        init_state=ArticulationCfg.InitialStateCfg(
+            pos=(0.0, 0.0, 0.5),
+            rot=(0.0, 0.0, 0.0, 1.0),
+            joint_pos={".*": 0.0},
+        )
+    )
+    newton = _newton_shadow_hand_cfg(
+        prim_path="/World/envs/env_.*/RightRobot",
+        init_pos=(0.0, 0.0, 0.5),
+        init_rot=(0.0, 0.0, 0.0, 1.0),
+    )
+    default = physx
+
+
+@configclass
+class LeftRobotCfg(PresetCfg):
+    physx = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/LeftRobot").replace(
+        init_state=ArticulationCfg.InitialStateCfg(
+            pos=(0.0, -1.0, 0.5),
+            rot=(0.0, 0.0, 1.0, 0.0),
+            joint_pos={".*": 0.0},
+        )
+    )
+    newton = _newton_shadow_hand_cfg(
+        prim_path="/World/envs/env_.*/LeftRobot",
+        init_pos=(0.0, -1.0, 0.5),
+        init_rot=(0.0, 0.0, 1.0, 0.0),
+    )
+    default = physx
+
+
+@configclass
+class ObjectCfg(PresetCfg):
+    """Hand-over object preset.
+
+    Both backends spawn the same procedural sphere as a free rigid body:
+    Newton's :class:`~isaaclab_newton.assets.RigidObject` resolves the
+    asset via the ``UsdPhysics.RigidBodyAPI`` that
+    :class:`~isaaclab.sim.RigidBodyPropertiesCfg` applies. The Newton
+    variant drops PhysX-only knobs (per-shape solver iterations, sleep
+    thresholds, max depenetration velocity, custom physics material).
+    """
+
+    physx = RigidObjectCfg(
+        prim_path="/World/envs/env_.*/object",
+        spawn=sim_utils.SphereCfg(
+            radius=0.0335,
+            visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 1.0, 0.0)),
+            physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=0.7),
+            rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                kinematic_enabled=False,
+                disable_gravity=False,
+                enable_gyroscopic_forces=True,
+                solver_position_iteration_count=8,
+                solver_velocity_iteration_count=0,
+                sleep_threshold=0.005,
+                stabilization_threshold=0.0025,
+                max_depenetration_velocity=1000.0,
+            ),
+            collision_props=sim_utils.CollisionPropertiesCfg(),
+            mass_props=sim_utils.MassPropertiesCfg(density=500.0),
+        ),
+        init_state=RigidObjectCfg.InitialStateCfg(pos=(0.0, -0.39, 0.54), rot=(0.0, 0.0, 0.0, 1.0)),
+    )
+    newton = RigidObjectCfg(
+        prim_path="/World/envs/env_.*/object",
+        spawn=sim_utils.SphereCfg(
+            radius=0.0335,
+            visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 1.0, 0.0)),
+            rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                kinematic_enabled=False,
+                disable_gravity=False,
+                enable_gyroscopic_forces=True,
+            ),
+            collision_props=sim_utils.CollisionPropertiesCfg(),
+            mass_props=sim_utils.MassPropertiesCfg(density=500.0),
+        ),
+        init_state=RigidObjectCfg.InitialStateCfg(pos=(0.0, -0.39, 0.54), rot=(0.0, 0.0, 0.0, 1.0)),
+    )
+    default = physx
+
+
+@configclass
+class ShadowHandOverSceneCfg(PresetCfg):
+    """Scene preset.
+
+    PhysX supports ``clone_in_fabric=True`` for faster cloning. Newton's
+    cloning path does not, so the Newton variant disables Fabric cloning.
+    """
+
+    physx: InteractiveSceneCfg = InteractiveSceneCfg(
+        num_envs=2048, env_spacing=1.5, replicate_physics=True, clone_in_fabric=True
+    )
+    newton: InteractiveSceneCfg = InteractiveSceneCfg(
+        num_envs=2048, env_spacing=1.5, replicate_physics=True, clone_in_fabric=False
+    )
+    default: InteractiveSceneCfg = physx
+
+
+@configclass
+class PhysicsCfg(PresetCfg):
+    """Physics-backend preset (PhysX vs Newton/MJWarp).
+
+    Newton settings mirror the single-agent ShadowHand Newton port: elliptic
+    cone, ``impratio=10`` (favors normal contacts over friction), 100 solver
+    iterations, 2 substeps. Empirically converges on the single-agent ShadowHand
+    tasks; tuning may be needed for handover-specific contact dynamics.
+    """
+
+    physx = PhysxCfg(bounce_threshold_velocity=0.2)
+    newton = NewtonCfg(
+        solver_cfg=MJWarpSolverCfg(
+            solver="newton",
+            integrator="implicitfast",
+            njmax=200,
+            nconmax=70,
+            impratio=10.0,
+            cone="elliptic",
+            update_data_interval=2,
+            iterations=100,
+        ),
+        num_substeps=2,
+        debug_mode=False,
+    )
+    default = physx
+
+
 @configclass
 class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
     # env
@@ -131,25 +318,11 @@ class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
             static_friction=1.0,
             dynamic_friction=1.0,
         ),
-        physics=PhysxCfg(
-            bounce_threshold_velocity=0.2,
-        ),
+        physics=PhysicsCfg(),
     )
     # robot
-    right_robot_cfg: ArticulationCfg = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/RightRobot").replace(
-        init_state=ArticulationCfg.InitialStateCfg(
-            pos=(0.0, 0.0, 0.5),
-            rot=(0.0, 0.0, 0.0, 1.0),
-            joint_pos={".*": 0.0},
-        )
-    )
-    left_robot_cfg: ArticulationCfg = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/LeftRobot").replace(
-        init_state=ArticulationCfg.InitialStateCfg(
-            pos=(0.0, -1.0, 0.5),
-            rot=(0.0, 0.0, 1.0, 0.0),
-            joint_pos={".*": 0.0},
-        )
-    )
+    right_robot_cfg: RightRobotCfg = RightRobotCfg()
+    left_robot_cfg: LeftRobotCfg = LeftRobotCfg()
     actuated_joint_names = [
         "robot0_WRJ1",
         "robot0_WRJ0",
@@ -181,27 +354,7 @@ class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
     ]
 
     # in-hand object
-    object_cfg: RigidObjectCfg = RigidObjectCfg(
-        prim_path="/World/envs/env_.*/object",
-        spawn=sim_utils.SphereCfg(
-            radius=0.0335,
-            visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 1.0, 0.0)),
-            physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=0.7),
-            rigid_props=sim_utils.RigidBodyPropertiesCfg(
-                kinematic_enabled=False,
-                disable_gravity=False,
-                enable_gyroscopic_forces=True,
-                solver_position_iteration_count=8,
-                solver_velocity_iteration_count=0,
-                sleep_threshold=0.005,
-                stabilization_threshold=0.0025,
-                max_depenetration_velocity=1000.0,
-            ),
-            collision_props=sim_utils.CollisionPropertiesCfg(),
-            mass_props=sim_utils.MassPropertiesCfg(density=500.0),
-        ),
-        init_state=RigidObjectCfg.InitialStateCfg(pos=(0.0, -0.39, 0.54), rot=(0.0, 0.0, 0.0, 1.0)),
-    )
+    object_cfg: ObjectCfg = ObjectCfg()
     # goal object
     goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg(
         prim_path="/Visuals/goal_marker",
@@ -212,8 +365,8 @@ class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
             ),
         },
     )
-    # scene
-    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=2048, env_spacing=1.5, replicate_physics=True)
+    # scene - use ShadowHandOverSceneCfg so that --preset newton disables clone_in_fabric automatically
+    scene: ShadowHandOverSceneCfg = ShadowHandOverSceneCfg()
 
     # reset
     reset_position_noise = 0.01  # range of position at reset