Skip to content

Commit a101fc8

Browse files
lgulichclaude
andauthored
Add WBC-AGILE end-to-end velocity policy for G1 (#489)
## Summary The G1 whole-body controller currently only supports the Homie V2 lower-body policy. This MR adds the WBC-AGILE end-to-end velocity policy as an alternative lower-body controller for the G1 robot, and wires it into the environment so it can be used via a new `g1_wbc_agile_joint` embodiment. ## Changes ### AGILE policy implementation - Add `G1AgilePolicy` ONNX-based end-to-end velocity policy (`g1_agile_policy.py`) - Add `AgileConfig` dataclass and `g1_agile.yaml` joint ordering / model I/O config - Register `"agile"` variant in `wbc_policy_factory.py` ### Model download - Add `docker/setup/download_wbc_models.sh` to download and verify the AGILE ONNX model at Docker build time (SHA256 checked), removing the need for runtime download ### Environment integration - Add `AgileConfig` branch in `G1DecoupledWBCJointAction` so `wbc_version="agile"` is accepted by the action term - Register `G1WBCAgileJointEmbodiment` (`g1_wbc_agile_joint`) — mirrors `g1_wbc_joint` but uses the AGILE lower-body policy ### Tests - Add unit tests and stability tests for `G1AgilePolicy` in `test_g1_agile_policy.py` ## Results Run the AGILE policy with the G1 robot: ```bash /isaac-sim/python.sh isaaclab_arena/evaluation/policy_runner.py \ --policy_type zero_action \ --num_envs 1 \ --num_steps 1000 \ --enable_cameras \ --visualizer kit \ galileo_g1_locomanip_pick_and_place \ --embodiment g1_wbc_agile_joint ``` ## Test plan - [x] `pytest isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/tests/test_g1_agile_policy.py` — unit and stability tests pass - [x] Run `policy_runner.py` with `--embodiment g1_wbc_agile_joint` — environment loads and steps without errors - [x] Run `policy_runner.py` with `--embodiment g1_wbc_joint` — existing Homie V2 path is unaffected --------- Signed-off-by: Lionel Gulich <lgulich@nvidia.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4c111d1 commit a101fc8

10 files changed

Lines changed: 414 additions & 27 deletions

File tree

isaaclab_arena/embodiments/g1/g1.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,32 @@ def __init__(
136136
self.camera_config._camera_offset = camera_offset
137137

138138

139+
@register_asset
140+
class G1WBCAgileJointEmbodiment(G1EmbodimentBase):
141+
"""Embodiment for the G1 robot with AGILE WBC policy and direct joint upperbody control.
142+
143+
By default uses tiled camera for efficient parallel evaluation.
144+
"""
145+
146+
name = "g1_wbc_agile_joint"
147+
148+
def __init__(
149+
self,
150+
enable_cameras: bool = False,
151+
initial_pose: Pose | None = None,
152+
camera_offset: Pose | None = _DEFAULT_G1_CAMERA_OFFSET,
153+
use_tiled_camera: bool = True,
154+
):
155+
super().__init__(enable_cameras, initial_pose)
156+
self.action_config = G1WBCAgileJointActionCfg()
157+
self.observation_config = G1WBCJointObservationsCfg()
158+
self.observation_config.policy.concatenate_terms = self.concatenate_observation_terms
159+
self.observation_config.wbc.concatenate_terms = self.concatenate_observation_terms
160+
self.event_config = G1WBCJointEventCfg()
161+
self.camera_config._is_tiled_camera = use_tiled_camera
162+
self.camera_config._camera_offset = camera_offset
163+
164+
139165
@configclass
140166
class G1SceneCfg:
141167

@@ -599,6 +625,13 @@ class G1WBCJointActionCfg:
599625
g1_action: ActionTermCfg = G1DecoupledWBCJointActionCfg(asset_name="robot", joint_names=[".*"])
600626

601627

628+
@configclass
629+
class G1WBCAgileJointActionCfg:
630+
"""Action specifications for the MDP, for G1 AGILE WBC action."""
631+
632+
g1_action: ActionTermCfg = G1DecoupledWBCJointActionCfg(asset_name="robot", joint_names=[".*"], wbc_version="agile")
633+
634+
602635
@configclass
603636
class G1WBCPinkActionCfg:
604637
"""Action specifications for the MDP, for G1 WBC action."""

isaaclab_arena_g1/g1_env/mdp/actions/g1_decoupled_wbc_joint_action.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from isaaclab.assets.articulation import Articulation
1616
from isaaclab.managers.action_manager import ActionTerm
1717

18-
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.config.configs import HomieV2Config
18+
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.config.configs import AgileConfig, HomieV2Config
1919
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.policy_constants import (
2020
G1_NUM_JOINTS,
2121
NUM_BASE_HEIGHT_CMD,
@@ -70,6 +70,8 @@ def __init__(self, cfg: G1DecoupledWBCJointActionCfg, env: ManagerBasedEnv):
7070

7171
if self._wbc_version == "homie_v2":
7272
wbc_config = HomieV2Config()
73+
elif self._wbc_version == "agile":
74+
wbc_config = AgileConfig()
7375
else:
7476
raise ValueError(f"Invalid WBC version: {self._wbc_version}")
7577

isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/config/configs.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,24 @@ class HomieV2Config(BaseConfig):
7171

7272
policy_config_path: str = "config/g1_homie_v2.yaml"
7373
"""Policy related configuration to specify inputs/outputs dim"""
74+
75+
76+
@dataclass
77+
class AgileConfig(BaseConfig):
78+
"""Config for the WBC-AGILE end-to-end velocity policy for G1."""
79+
80+
# WBC Configuration
81+
wbc_version: Literal["agile"] = "agile"
82+
"""Version of the whole body controller."""
83+
84+
wbc_model_path: str = (
85+
"https://github.com/nvidia-isaac/WBC-AGILE/raw/v1.2/agile/data/policy/velocity_g1/unitree_g1_velocity_e2e.onnx"
86+
)
87+
"""Path to WBC model file (GitHub URL, resolved via retrieve_file_path)"""
88+
89+
# Robot Configuration
90+
enable_waist: bool = False
91+
"""Whether to include waist joints in IK."""
92+
93+
policy_config_path: str = "config/g1_agile.yaml"
94+
"""Policy related configuration to specify inputs/outputs dim"""
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
2+
# All rights reserved.
3+
#
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
# Joint ordering for ONNX model inputs (29 body joints in agile order).
7+
# Must match the element_names from unitree_g1_velocity_e2e.yaml.
8+
onnx_input_joint_names:
9+
- left_hip_pitch_joint
10+
- right_hip_pitch_joint
11+
- waist_yaw_joint
12+
- left_hip_roll_joint
13+
- right_hip_roll_joint
14+
- waist_roll_joint
15+
- left_hip_yaw_joint
16+
- right_hip_yaw_joint
17+
- waist_pitch_joint
18+
- left_knee_joint
19+
- right_knee_joint
20+
- left_shoulder_pitch_joint
21+
- right_shoulder_pitch_joint
22+
- left_ankle_pitch_joint
23+
- right_ankle_pitch_joint
24+
- left_shoulder_roll_joint
25+
- right_shoulder_roll_joint
26+
- left_ankle_roll_joint
27+
- right_ankle_roll_joint
28+
- left_shoulder_yaw_joint
29+
- right_shoulder_yaw_joint
30+
- left_elbow_joint
31+
- right_elbow_joint
32+
- left_wrist_roll_joint
33+
- right_wrist_roll_joint
34+
- left_wrist_pitch_joint
35+
- right_wrist_pitch_joint
36+
- left_wrist_yaw_joint
37+
- right_wrist_yaw_joint
38+
39+
# Joint ordering for ONNX model outputs (14 controlled joints in agile output order).
40+
# Must match the action_joint_pos element_names from unitree_g1_velocity_e2e.yaml.
41+
controlled_joint_names:
42+
- left_hip_pitch_joint
43+
- right_hip_pitch_joint
44+
- left_hip_roll_joint
45+
- right_hip_roll_joint
46+
- waist_roll_joint
47+
- left_hip_yaw_joint
48+
- right_hip_yaw_joint
49+
- waist_pitch_joint
50+
- left_knee_joint
51+
- right_knee_joint
52+
- left_ankle_pitch_joint
53+
- right_ankle_pitch_joint
54+
- left_ankle_roll_joint
55+
- right_ankle_roll_joint
56+
57+
num_actions: 14
58+
num_body_joints: 29
59+
60+
# Initial commands
61+
cmd_init: [0.0, 0.0, 0.0]
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
# Copyright (c) 2025-2026, The Isaac Lab Arena Project Developers (https://github.com/isaac-sim/IsaacLab-Arena/blob/main/CONTRIBUTORS.md).
2+
# All rights reserved.
3+
#
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
import numpy as np
7+
import pathlib
8+
import torch
9+
from typing import Any
10+
11+
from isaaclab.utils.assets import retrieve_file_path
12+
13+
from isaaclab_arena_g1.g1_env.robot_model import RobotModel
14+
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.base import WBCPolicy
15+
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.utils.homie_utils import load_config
16+
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.utils.onnx_utils import OnnxInferenceSession
17+
18+
# ONNX feedback state keys and their per-environment shapes (excluding batch dim).
19+
_STATE_KEYS_AND_SHAPES: dict[str, tuple[int, ...]] = {
20+
"last_actions": (14,),
21+
"base_ang_vel_history": (5, 3),
22+
"projected_gravity_history": (5, 3),
23+
"velocity_commands_history": (5, 3),
24+
"controlled_joint_pos_history": (5, 14),
25+
"controlled_joint_vel_history": (5, 14),
26+
"actions_history": (5, 14),
27+
}
28+
29+
30+
class G1AgilePolicy(WBCPolicy):
31+
"""G1 robot policy using the WBC-AGILE end-to-end neural network.
32+
33+
This policy uses a single ONNX model that takes raw sensor inputs and
34+
manages observation history internally via feedback connections. The model
35+
outputs target joint positions along with per-joint Kp/Kd gains for 14
36+
controlled joints (legs + waist_roll + waist_pitch).
37+
"""
38+
39+
def __init__(self, robot_model: RobotModel, config_path: str, model_path: str, num_envs: int = 1):
40+
"""Initialize G1AgilePolicy.
41+
42+
Args:
43+
robot_model: Robot model containing joint ordering info.
44+
config_path: Path to policy YAML configuration file (relative to wbc_policy dir).
45+
model_path: Path to the ONNX model file. Can be an S3/Nucleus URL
46+
(resolved and cached by retrieve_file_path) or a local path.
47+
num_envs: Number of environments.
48+
"""
49+
parent_dir = pathlib.Path(__file__).parent.parent
50+
self.config = load_config(str(parent_dir / config_path))
51+
self.robot_model = robot_model
52+
self.num_envs = num_envs
53+
54+
# Resolve model path via OV asset API (handles S3 download + local caching).
55+
# Same pattern as G1HomiePolicyV2: retrieve_file_path returns a local path
56+
# (absolute for S3 cache, relative for local files), then join with parent_dir.
57+
model_local_path = retrieve_file_path(model_path, force_download=True)
58+
model_full_path = parent_dir / model_local_path
59+
self.session = OnnxInferenceSession(str(model_full_path))
60+
61+
# Build joint index mappings between WBC order and agile ONNX order
62+
self._build_joint_mappings()
63+
64+
# Initialize state
65+
self._init_state()
66+
67+
def _build_joint_mappings(self):
68+
"""Build index mappings between WBC joint order and agile ONNX joint order."""
69+
wbc_order = self.robot_model.wbc_g1_joints_order # {joint_name: wbc_index}
70+
71+
# Mapping for ONNX input: indices into the WBC-ordered observation to select
72+
# the 29 body joints in the order the ONNX model expects.
73+
onnx_input_names = self.config["onnx_input_joint_names"]
74+
self.wbc_to_agile_input = np.array([wbc_order[name] for name in onnx_input_names])
75+
76+
# Mapping for ONNX output: for each of the 14 agile output joints, the
77+
# position in the 15-element lower_body array to write to.
78+
controlled_names = self.config["controlled_joint_names"]
79+
lower_body_indices = self.robot_model.get_joint_group_indices("lower_body")
80+
self.agile_idx_to_lower_body_idx = np.array(
81+
[lower_body_indices.index(wbc_order[name]) for name in controlled_names]
82+
)
83+
84+
self.num_lower_body = len(lower_body_indices)
85+
86+
def _init_state(self):
87+
"""Initialize all per-environment state variables."""
88+
self.observation = None
89+
self.cmd = np.tile(self.config["cmd_init"], (self.num_envs, 1))
90+
91+
# Batched ONNX feedback state: each array has shape (num_envs, ...).
92+
self.state = {
93+
key: np.zeros((self.num_envs, *shape), dtype=np.float32) for key, shape in _STATE_KEYS_AND_SHAPES.items()
94+
}
95+
96+
def reset(self, env_ids: torch.Tensor):
97+
"""Reset the policy state for the given environment ids.
98+
99+
Args:
100+
env_ids: The environment ids to reset.
101+
"""
102+
idx = env_ids.cpu().numpy()
103+
for key, shape in _STATE_KEYS_AND_SHAPES.items():
104+
self.state[key][idx] = np.zeros(shape, dtype=np.float32)
105+
self.cmd[idx] = self.config["cmd_init"]
106+
107+
def set_observation(self, observation: dict[str, Any]):
108+
"""Store the current observation for the next get_action call.
109+
110+
Args:
111+
observation: Dictionary containing robot state from prepare_observations().
112+
"""
113+
self.observation = observation
114+
115+
def set_goal(self, goal: dict[str, Any]):
116+
"""Set the goal for the policy.
117+
118+
Args:
119+
goal: Dictionary containing goals. Supported keys:
120+
- "navigate_cmd": velocity command array of shape (num_envs, 3)
121+
"""
122+
if "navigate_cmd" in goal:
123+
self.cmd = goal["navigate_cmd"]
124+
125+
def get_action(self, time: float | None = None) -> dict[str, Any]:
126+
"""Compute and return the next action based on current observation.
127+
128+
Returns:
129+
Dictionary with "body_action" key containing joint position targets
130+
of shape (num_envs, num_lower_body_joints).
131+
"""
132+
if self.observation is None:
133+
raise ValueError("No observation set. Call set_observation() first.")
134+
135+
obs = self.observation
136+
137+
# Build batched ONNX inputs (all envs at once)
138+
ort_inputs = {
139+
"root_link_quat_w": obs["floating_base_pose"][:, 3:7].astype(np.float32),
140+
"root_ang_vel_b": obs["floating_base_vel"][:, 3:6].astype(np.float32),
141+
"velocity_commands": self.cmd.astype(np.float32),
142+
"joint_pos": obs["q"][:, self.wbc_to_agile_input].astype(np.float32),
143+
"joint_vel": obs["dq"][:, self.wbc_to_agile_input].astype(np.float32),
144+
**{key: self.state[key] for key in _STATE_KEYS_AND_SHAPES},
145+
}
146+
147+
# Run batched inference
148+
result = self.session.run(ort_inputs)
149+
150+
# Update feedback state for next step
151+
for key in _STATE_KEYS_AND_SHAPES:
152+
self.state[key] = result[f"{key}_out"]
153+
154+
# Map 14 agile output joints to the 15-joint lower_body array.
155+
# waist_yaw (not controlled by agile) stays at 0.0.
156+
body_action = np.zeros((self.num_envs, self.num_lower_body), dtype=np.float32)
157+
body_action[:, self.agile_idx_to_lower_body_idx] = result["action_joint_pos"]
158+
159+
return {"body_action": body_action}

isaaclab_arena_g1/g1_whole_body_controller/wbc_policy/policy/g1_homie_policy.py

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,13 @@
77
import numpy as np
88
import pathlib
99
import torch
10-
from collections.abc import Callable
1110
from typing import Any
1211

13-
import onnxruntime as ort
1412
from isaaclab.utils.assets import retrieve_file_path
1513

1614
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.policy.base import WBCPolicy
1715
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.utils.homie_utils import get_gravity_orientation, load_config
16+
from isaaclab_arena_g1.g1_whole_body_controller.wbc_policy.utils.onnx_utils import OnnxInferenceSession
1817

1918

2019
class G1HomiePolicyV2(WBCPolicy):
@@ -38,8 +37,8 @@ def __init__(self, robot_model, config_path: str, model_path: str, num_envs: int
3837
model_path_1_local = retrieve_file_path(model_path_1, force_download=True)
3938
model_path_2_local = retrieve_file_path(model_path_2, force_download=True)
4039

41-
self.policy_1 = self.load_onnx_policy(str(parent_dir / model_path_1_local))
42-
self.policy_2 = self.load_onnx_policy(str(parent_dir / model_path_2_local))
40+
self.policy_1 = OnnxInferenceSession(str(parent_dir / model_path_1_local))
41+
self.policy_2 = OnnxInferenceSession(str(parent_dir / model_path_2_local))
4342

4443
# Initialize observation history buffer
4544
self.observation = None
@@ -81,26 +80,6 @@ def reset(self, env_ids: torch.Tensor):
8180
self.pitch_cmd = self.config["rpy_cmd"][1]
8281
self.yaw_cmd = self.config["rpy_cmd"][2]
8382

84-
def load_onnx_policy(self, model_path: str) -> Callable[[torch.Tensor], torch.Tensor]:
85-
"""Load the ONNX policy from the model path.
86-
87-
Args:
88-
model_path: The path to the ONNX policy model
89-
90-
Returns:
91-
The ONNX policy model runnable for forward pass.
92-
"""
93-
model = ort.InferenceSession(model_path)
94-
95-
def run_inference(input_tensor):
96-
ort_inputs = {model.get_inputs()[0].name: input_tensor.cpu().numpy()}
97-
ort_outs = model.run(None, ort_inputs)
98-
return torch.tensor(ort_outs[0], device="cpu")
99-
100-
print(f"Successfully loaded ONNX policy from {model_path}")
101-
102-
return run_inference
103-
10483
def compute_observation(self, observation: dict[str, Any]) -> tuple[np.ndarray, int]:
10584
"""Compute the observation vector from current state"""
10685
# Get body joint indices (excluding waist roll and pitch)
@@ -245,7 +224,9 @@ def get_action(self) -> dict[str, Any]:
245224
# Use walking policy for movement commands
246225
policy = self.policy_2
247226

248-
self.action = policy(self.obs_tensor).detach().numpy()
227+
ort_inputs = {policy.input_names[0]: self.obs_tensor.cpu().numpy()}
228+
result = policy.run(ort_inputs)
229+
self.action = list(result.values())[0]
249230

250231
# Transform action to target_dof_pos
251232
assert self.use_policy_action

0 commit comments

Comments
 (0)