Skip to content

Commit 151b054

Browse files
ooctipusisaaclab-review-bot[bot]AntoineRichard
authored
Fixes NaN after env reset by syncing body_q before collision detection (#5176)
# Description This is an important fix for locomotion rough terrain in newton to work - Fixed NaN explosions during Newton training caused by stale body_q after env resets. Articulation write methods (write_root_pose_to_sim_*, write_joint_position_to_sim_*) updated joint_q but never recomputed body_q via forward kinematics. The collision pipeline read the old body_q, generating contacts for the wrong position. The MuJoCo Warp solver then computed constraint Jacobians from new qpos but old contacts — a geometry mismatch that overflowed the solver gradient, producing NaN. - Added NewtonManager.invalidate_fk() flag set by articulation write methods, checked in step() before collision detection to conditionally run eval_fk. - Added regression test that patches _simulate_physics_only to capture body_q at collide() time and asserts consistency with joint_q. Fails with 10m mismatch without the fix. Fixes # (issue) <!-- As a practice, it is recommended to open an issue to have discussions on the proposed pull request. This makes it easier for the community to keep track of what is being developed or added, and if a given feature is demanded by more than one party. --> ## Type of change <!-- As you go through the list, delete the ones that are not applicable. --> - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) - Breaking change (existing functionality will not work without user modification) - Documentation update ## Screenshots Please attach before and after screenshots of the change if applicable. <!-- Example: | Before | After | | ------ | ----- | | _gif/png before_ | _gif/png after_ | To upload images to a PR -- simply drag and drop an image while in edit mode and it should upload the image directly. You can then paste that source into the above before/after sections. --> ## Checklist - [x] I have read and understood the [contribution guidelines](https://isaac-sim.github.io/IsaacLab/main/source/refs/contributing.html) - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there <!-- As you go through the checklist above, you can mark something as done by putting an x character in it For example, - [x] I have done this task - [ ] I have not done this task --> --------- Co-authored-by: isaaclab-review-bot[bot] <isaaclab-review-bot[bot]@users.noreply.github.com> Co-authored-by: Antoine RICHARD <antoiner@nvidia.com>
1 parent 6f57c14 commit 151b054

5 files changed

Lines changed: 120 additions & 1 deletion

File tree

source/isaaclab_newton/config/extension.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
# Note: Semantic Versioning is used: https://semver.org/
4-
version = "0.5.9"
4+
version = "0.5.10"
55

66
# Description
77
title = "Newton simulation interfaces for IsaacLab core package"

source/isaaclab_newton/docs/CHANGELOG.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
Changelog
22
---------
33

4+
0.5.10 (2026-04-05)
5+
~~~~~~~~~~~~~~~~~~~
6+
7+
Fixed
8+
^^^^^
9+
10+
* Fixed NaN after env reset caused by stale ``body_q`` in the collision
11+
pipeline. Added :meth:`~isaaclab_newton.physics.NewtonManager.invalidate_fk`
12+
so articulation write methods trigger ``eval_fk`` before the next
13+
``collide()``.
14+
15+
416
0.5.9 (2026-03-16)
517
~~~~~~~~~~~~~~~~~~
618

source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ def write_root_link_pose_to_sim_index(
495495
if self.data._root_state_w is not None:
496496
self.data._root_state_w.timestamp = -1.0
497497
self.data._fk_timestamp = -1.0 # Forces a kinematic update to get the latest body link poses.
498+
SimulationManager.invalidate_fk()
498499
if self.data._body_com_pose_w is not None:
499500
self.data._body_com_pose_w.timestamp = -1.0
500501
if self.data._body_state_w is not None:
@@ -550,6 +551,7 @@ def write_root_link_pose_to_sim_mask(
550551
if self.data._root_state_w is not None:
551552
self.data._root_state_w.timestamp = -1.0
552553
self.data._fk_timestamp = -1.0 # Forces a kinematic update to get the latest body link poses.
554+
SimulationManager.invalidate_fk()
553555
if self.data._body_com_pose_w is not None:
554556
self.data._body_com_pose_w.timestamp = -1.0
555557
if self.data._body_state_w is not None:
@@ -614,6 +616,7 @@ def write_root_com_pose_to_sim_index(
614616
if self.data._root_state_w is not None:
615617
self.data._root_state_w.timestamp = -1.0
616618
self.data._fk_timestamp = -1.0 # Forces a kinematic update to get the latest body link poses.
619+
SimulationManager.invalidate_fk()
617620
if self.data._body_com_pose_w is not None:
618621
self.data._body_com_pose_w.timestamp = -1.0
619622
if self.data._body_state_w is not None:
@@ -674,6 +677,7 @@ def write_root_com_pose_to_sim_mask(
674677
if self.data._root_state_w is not None:
675678
self.data._root_state_w.timestamp = -1.0
676679
self.data._fk_timestamp = -1.0 # Forces a kinematic update to get the latest body link poses.
680+
SimulationManager.invalidate_fk()
677681
if self.data._body_com_pose_w is not None:
678682
self.data._body_com_pose_w.timestamp = -1.0
679683
if self.data._body_state_w is not None:
@@ -1012,6 +1016,7 @@ def write_joint_position_to_sim_index(
10121016
)
10131017
# Invalidate FK timestamp so body poses are recomputed on next access.
10141018
self.data._fk_timestamp = -1.0
1019+
SimulationManager.invalidate_fk()
10151020
# Need to invalidate the buffer to trigger the update with the new root pose.
10161021
# Only invalidate if the buffer has been accessed (not None).
10171022
if self.data._body_link_vel_w is not None:
@@ -1066,6 +1071,7 @@ def write_joint_position_to_sim_mask(
10661071
)
10671072
# Invalidate FK timestamp so body poses are recomputed on next access.
10681073
self.data._fk_timestamp = -1.0
1074+
SimulationManager.invalidate_fk()
10691075
# Need to invalidate the buffer to trigger the update with the new root pose.
10701076
# Only invalidate if the buffer has been accessed (not None).
10711077
if self.data._body_link_vel_w is not None:

source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class NewtonManager(PhysicsManager):
9393
_collision_pipeline = None
9494
_newton_contact_sensors: dict = {} # Maps sensor_key to NewtonContactSensor
9595
_report_contacts: bool = False
96+
_fk_dirty: bool = False
9697

9798
# CUDA graphing
9899
_graph = None
@@ -158,6 +159,7 @@ def reset(cls, soft: bool = False) -> None:
158159
def forward(cls) -> None:
159160
"""Update articulation kinematics without stepping physics."""
160161
eval_fk(cls._model, cls._state_0.joint_q, cls._state_0.joint_qd, cls._state_0, None)
162+
cls._fk_dirty = False
161163

162164
@classmethod
163165
def pre_render(cls) -> None:
@@ -298,6 +300,13 @@ def step(cls) -> None:
298300
else:
299301
logger.warning("Newton deferred CUDA graph capture failed; using eager execution")
300302

303+
# Ensure body_q is up-to-date before collision detection.
304+
# After env resets, joint_q is written but body_q (used by
305+
# broadphase/narrowphase) is stale until FK runs.
306+
if cls._fk_dirty and cls._needs_collision_pipeline:
307+
eval_fk(cls._model, cls._state_0.joint_q, cls._state_0.joint_qd, cls._state_0, None)
308+
cls._fk_dirty = False
309+
301310
# Step simulation (graphed or not; _graph is None when capture is disabled or failed)
302311
if cfg is not None and cfg.use_cuda_graph and cls._graph is not None and "cuda" in device: # type: ignore[union-attr]
303312
wp.capture_launch(cls._graph)
@@ -358,6 +367,7 @@ def clear(cls):
358367
cls._collision_pipeline = None
359368
cls._newton_contact_sensors = {}
360369
cls._report_contacts = False
370+
cls._fk_dirty = False
361371
cls._graph = None
362372
cls._graph_capture_pending = False
363373
cls._newton_stage_path = None
@@ -377,6 +387,16 @@ def add_model_change(cls, change: SolverNotifyFlags) -> None:
377387
"""Register a model change to notify the solver."""
378388
cls._model_changes.add(change)
379389

390+
@classmethod
391+
def invalidate_fk(cls) -> None:
392+
"""Mark forward kinematics as needing recomputation.
393+
394+
Called by articulation write methods that modify ``joint_q`` or root
395+
transforms. The flag is checked in :meth:`step` before collision
396+
detection to ensure ``body_q`` is up-to-date.
397+
"""
398+
cls._fk_dirty = True
399+
380400
@classmethod
381401
def start_simulation(cls) -> None:
382402
"""Start simulation by finalizing model and initializing state.

source/isaaclab_newton/test/assets/test_articulation.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2406,5 +2406,86 @@ def test_write_joint_frictions_to_sim(sim, num_articulations, device, add_ground
24062406
assert torch.allclose(joint_friction_coeff_sim_2, friction_2)
24072407

24082408

2409+
@pytest.mark.parametrize("num_articulations", [2])
2410+
@pytest.mark.parametrize("device", ["cuda:0"])
2411+
@pytest.mark.parametrize("articulation_type", ["anymal"])
2412+
def test_body_q_consistent_after_root_write(num_articulations, device, articulation_type):
2413+
"""Test that body_q is fresh when collide() runs after a root pose write.
2414+
2415+
Regression test for a NaN bug where collide() used stale body_q after env
2416+
reset because eval_fk was not called between write_root_pose and collide.
2417+
2418+
Uses ``use_mujoco_contacts=False`` so the Newton collision pipeline is
2419+
active, then patches ``_simulate_physics_only`` to capture body_q at
2420+
the moment collide() is called and asserts it matches joint_q.
2421+
"""
2422+
from unittest.mock import patch
2423+
2424+
sim_cfg = SimulationCfg(
2425+
dt=1 / 200,
2426+
physics=NewtonCfg(
2427+
solver_cfg=MJWarpSolverCfg(
2428+
njmax=70,
2429+
nconmax=70,
2430+
integrator="implicitfast",
2431+
use_mujoco_contacts=False,
2432+
),
2433+
num_substeps=1,
2434+
use_cuda_graph=False,
2435+
),
2436+
)
2437+
with build_simulation_context(sim_cfg=sim_cfg, device=device) as sim:
2438+
sim._app_control_on_stop_handle = None
2439+
articulation_cfg = generate_articulation_cfg(articulation_type=articulation_type)
2440+
articulation, env_pos = generate_articulation(articulation_cfg, num_articulations, device)
2441+
2442+
sim.reset()
2443+
2444+
model = SimulationManager.get_model()
2445+
jc_starts = model.joint_coord_world_start.numpy()
2446+
body_starts = model.body_world_start.numpy()
2447+
2448+
for _ in range(5):
2449+
sim.step()
2450+
articulation.update(sim.cfg.dt)
2451+
2452+
# Teleport env 0 by 10m (simulating a reset)
2453+
new_pose = wp.to_torch(articulation.data.default_root_pose).clone()
2454+
new_pose[0, 0] += 10.0
2455+
new_pose[0, 1] += 5.0
2456+
articulation.write_root_pose_to_sim_index(
2457+
root_pose=new_pose[0:1],
2458+
env_ids=torch.tensor([0], device=device, dtype=torch.int32),
2459+
)
2460+
2461+
# Patch _simulate_physics_only to capture body_q before collide runs
2462+
captured = {}
2463+
original_simulate = SimulationManager._simulate_physics_only.__func__
2464+
2465+
@classmethod # type: ignore[misc]
2466+
def _patched_simulate(cls):
2467+
if cls._needs_collision_pipeline:
2468+
bq = wp.to_torch(cls._state_0.body_q)
2469+
jq = wp.to_torch(cls._state_0.joint_q)
2470+
b0 = int(body_starts[0])
2471+
jc0 = int(jc_starts[0])
2472+
captured["bq_root"] = bq[b0, :3].clone()
2473+
captured["jq_root"] = jq[jc0 : jc0 + 3].clone()
2474+
original_simulate(cls)
2475+
2476+
with patch.object(SimulationManager, "_simulate_physics_only", _patched_simulate):
2477+
sim.step()
2478+
articulation.update(sim.cfg.dt)
2479+
2480+
assert captured, "collision pipeline did not run — _needs_collision_pipeline is False"
2481+
2482+
bq_root = captured["bq_root"]
2483+
jq_root = captured["jq_root"]
2484+
diff = (jq_root - bq_root).abs().max().item()
2485+
assert diff < 0.01, (
2486+
f"body_q was stale when collide() ran: diff={diff:.4f}m, jq={jq_root.tolist()}, bq={bq_root.tolist()}"
2487+
)
2488+
2489+
24092490
if __name__ == "__main__":
24102491
pytest.main([__file__, "-v", "--maxfail=1"])

0 commit comments

Comments
 (0)