diff --git a/source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/extension.py b/source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/extension.py index 89368c532109..2c3b70428b13 100644 --- a/source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/extension.py +++ b/source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/extension.py @@ -468,6 +468,26 @@ def make_env_isaaclab() -> tuple: isaac_env_cfg.scene.num_envs = self.cfg.init_params.num_envs env = gym.make(self.isaaclab_env_id, cfg=isaac_env_cfg, render_mode="rgb_array").unwrapped + + _original_reset = env.reset + + import omni.kit.app + + _app = omni.kit.app.get_app() + + def _patched_reset(*args, **kwargs): + obs, extras = _original_reset(*args, **kwargs) + env.sim.set_setting("/app/player/playSimulations", False) + _app.update() + env.sim.set_setting("/app/player/playSimulations", True) + for sensor in env.scene.sensors.values(): + sensor.update(dt=0.0, force_recompute=True) + obs = env.observation_manager.compute(update_history=True) + env.obs_buf = obs + return obs, extras + + env.reset = _patched_reset + return env, sim_app return make_env_isaaclab @@ -481,7 +501,6 @@ def _wrap_obs(self, obs: dict) -> dict: - ``"extra_view_images"``: ``(B, N, H, W, C)`` — stacked extra cameras. - ``"states"``: ``(B, D)`` — concatenated state vector. - ``"task_descriptions"``: ``list[str]`` — task descriptions. - Config is read from the YAML file via :func:`_get_isaaclab_cfg`. Args: diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/__init__.py new file mode 100644 index 000000000000..5c1f1f496b15 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/__init__.py @@ -0,0 +1,35 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configuration for Install Trocar task with G129 + Dex3 robot. + +This module registers the Install Trocar task in IsaacLab's gymnasium registry, +allowing it to be discovered and used through IsaacLab's standard task interfaces. +""" + +import gymnasium as gym + +## +# Register Gym environments. +## + + +gym.register( + id="Isaac-Assemble-Trocar-G129-Dex3-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.g129_dex3_env_cfg:G1AssembleTrocarEnvCfg", + }, + disable_env_checker=True, +) + +gym.register( + id="Isaac-Assemble-Trocar-G129-Dex3-Eval-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.g129_dex3_env_cfg:G1AssembleTrocarEvalEnvCfg", + }, + disable_env_checker=True, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/__init__.py new file mode 100644 index 000000000000..43eb229b697d --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from .camera_config import CameraBaseCfg, CameraPresets +from .robot_config import G1RobotPresets + +__all__ = ["G1RobotPresets", "CameraBaseCfg", "CameraPresets"] diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/camera_config.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/camera_config.py new file mode 100644 index 000000000000..a93379945944 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/camera_config.py @@ -0,0 +1,131 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +public camera configuration +include the basic configuration for different types of cameras, support scene-specific parameter customization +""" + +from collections.abc import Sequence + +import isaaclab.sim as sim_utils +from isaaclab.sensors import CameraCfg, TiledCameraCfg +from isaaclab.utils import configclass + + +@configclass +class CameraBaseCfg: + """camera base configuration class + + provide the default configuration for different types of cameras, support scene-specific parameter customization + """ + + @classmethod + def get_camera_config( + cls, + prim_path: str = "/World/envs/env_.*/Robot/d435_link/front_cam", + update_period: float = 0.02, + height: int = 480, + width: int = 640, + focal_length: float = 7.6, + focus_distance: float = 400.0, + horizontal_aperture: float = 20.0, + clipping_range: tuple[float, float] = (0.1, 1.0e5), + pos_offset: tuple[float, float, float] = (0.0, 0.0, 0.0), + rot_offset: tuple[float, float, float, float] = (0.5, -0.5, 0.5, -0.5), + data_types: Sequence[str] | None = None, + ) -> CameraCfg: + """Get a pinhole camera configuration. + + Args: + prim_path: the path of the camera in the scene + update_period: update period (seconds) + height: image height (pixels) + width: image width (pixels) + focal_length: focal length + focus_distance: focus distance + horizontal_aperture: horizontal aperture + clipping_range: clipping range (near clipping plane, far clipping plane) + pos_offset: position offset (x, y, z) + rot_offset: rotation offset quaternion + data_types: data type list + + Returns: + CameraCfg: camera configuration + """ + if data_types is None: + data_types = ("rgb",) + + return TiledCameraCfg( + prim_path=prim_path, + update_period=update_period, + height=height, + width=width, + data_types=list(data_types), + spawn=sim_utils.PinholeCameraCfg( + focal_length=focal_length, + focus_distance=focus_distance, + horizontal_aperture=horizontal_aperture, + clipping_range=clipping_range, + ), + offset=TiledCameraCfg.OffsetCfg(pos=pos_offset, rot=rot_offset, convention="ros"), + ) + + +@configclass +class CameraPresets: + """camera preset configuration collection + + include the common camera configuration preset for different scenes + """ + + @classmethod + def g1_front_camera(cls, **overrides) -> CameraCfg: + params = { + "height": 224, + "width": 224, + "focal_length": 10.5, + "horizontal_aperture": 14.25, # Match original vertical FOV after crop + } + params.update(overrides) + return CameraBaseCfg.get_camera_config(**params) + + @classmethod + def left_dex3_wrist_camera(cls, **overrides) -> CameraCfg: + """left wrist camera configuration""" + params = { + "prim_path": "/World/envs/env_.*/Robot/left_hand_camera_base_link/left_wrist_camera", + "height": 224, + "width": 224, + "update_period": 0.02, + "data_types": ["rgb"], + "focal_length": 12.0, + "focus_distance": 400.0, + "horizontal_aperture": 14.25, # Match original vertical FOV after crop + "clipping_range": (0.1, 1.0e5), + "pos_offset": (-0.04012, -0.07441, 0.15711), + "rot_offset": (0.00539, 0.86024, 0.0424, 0.50809), + } + params.update(overrides) + return CameraBaseCfg.get_camera_config(**params) + + @classmethod + def right_dex3_wrist_camera(cls, **overrides) -> CameraCfg: + """right wrist camera configuration""" + params = { + "prim_path": "/World/envs/env_.*/Robot/right_hand_camera_base_link/right_wrist_camera", + "height": 224, + "width": 224, + "update_period": 0.02, + "data_types": ["rgb"], + "focal_length": 12.0, + "focus_distance": 400.0, + "horizontal_aperture": 14.25, # Match original vertical FOV after crop + "clipping_range": (0.1, 1.0e5), + "pos_offset": (-0.04012, 0.07441, 0.15711), + "rot_offset": (0.00539, 0.86024, 0.0424, 0.50809), + } + params.update(overrides) + return CameraBaseCfg.get_camera_config(**params) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/gr00t_config.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/gr00t_config.py new file mode 100644 index 000000000000..540b0edbc3a0 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/gr00t_config.py @@ -0,0 +1,144 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""GR00T data configuration for IsaacLab tasks. + +This module defines customizable GR00T data configurations for different +embodiments. Users can create their own data config classes by subclassing +BaseDataConfig or copying/modifying the examples here. + +Example usage in run.sh: + export RLINF_DATA_CONFIG="policy.gr00t_config" + export RLINF_DATA_CONFIG_CLASS="policy.gr00t_config:IsaacLabDataConfig" +""" + +from gr00t.data.dataset import ModalityConfig +from gr00t.data.transform.base import ComposedModalityTransform +from gr00t.data.transform.concat import ConcatTransform +from gr00t.data.transform.state_action import StateActionSinCosTransform, StateActionToTensor, StateActionTransform +from gr00t.data.transform.video import VideoColorJitter, VideoToNumpy, VideoToTensor +from gr00t.experiment.data_config import DATA_CONFIG_MAP, BaseDataConfig +from gr00t.model.transforms import GR00TTransform + + +class IsaacLabDataConfig(BaseDataConfig): + """Generic GR00T data config for IsaacLab tasks with G1 + Dex3.""" + + # Video modality keys (from gr00t_mapping.video in RLINF_OBS_MAP_JSON) + video_keys = [ + "video.left_wrist_view", + "video.right_wrist_view", + "video.room_view", + ] + + # State modality keys (from gr00t_mapping.state in RLINF_OBS_MAP_JSON) + state_keys = [ + "state.left_arm", + "state.right_arm", + "state.left_hand", + "state.right_hand", + ] + + # Action modality keys (output from GR00T model) + action_keys = [ + "action.left_arm", + "action.right_arm", + "action.left_hand", + "action.right_hand", + ] + + # Language annotation key + language_keys = ["annotation.human.task_description"] + + # Observation and action indices + observation_indices = [0] + action_indices = list(range(16)) + + def modality_config(self) -> dict[str, ModalityConfig]: + """Define modality configurations for video, state, action, and language.""" + video_modality = ModalityConfig( + delta_indices=self.observation_indices, + modality_keys=self.video_keys, + ) + + state_modality = ModalityConfig( + delta_indices=self.observation_indices, + modality_keys=self.state_keys, + ) + + action_modality = ModalityConfig( + delta_indices=self.action_indices, + modality_keys=self.action_keys, + ) + + language_modality = ModalityConfig( + delta_indices=self.observation_indices, + modality_keys=self.language_keys, + ) + + return { + "video": video_modality, + "state": state_modality, + "action": action_modality, + "language": language_modality, + } + + def transform(self): + """Define the transform pipeline for processing observations and actions.""" + transforms = [ + # Video transforms + VideoToTensor(apply_to=self.video_keys), + # Disabled: camera already outputs 224×224 via TiledCameraCfg. + # To avoid VideoToTensor size-check errors, either: + # 1. Disable input size validation in VideoToTensor, OR + # 2. Set modality meta height/width to 224 to match actual input. + # Re-enable VideoCrop/VideoResize if camera resolution changes. + # VideoCrop(apply_to=self.video_keys, scale=0.95), + # VideoResize( + # apply_to=self.video_keys, + # height=224, + # width=224, + # interpolation="linear", + # ), + VideoColorJitter( + apply_to=self.video_keys, + brightness=0.3, + contrast=0.4, + saturation=0.5, + hue=0.08, + ), + VideoToNumpy(apply_to=self.video_keys), + # State transforms + StateActionToTensor(apply_to=self.state_keys), + StateActionSinCosTransform(apply_to=self.state_keys), + # Action transforms + StateActionToTensor(apply_to=self.action_keys), + StateActionTransform( + apply_to=self.action_keys, + normalization_modes={key: "min_max" for key in self.action_keys}, + ), + # Concat transforms + ConcatTransform( + video_concat_order=self.video_keys, + state_concat_order=self.state_keys, + action_concat_order=self.action_keys, + ), + # Model-specific transform + GR00TTransform( + state_horizon=len(self.observation_indices), + action_horizon=len(self.action_indices), + max_state_dim=64, + max_action_dim=32, + ), + ] + return ComposedModalityTransform(transforms=transforms) + + +# -------------------------------------------------------------------------- +# Register data configs into GR00T's DATA_CONFIG_MAP +# -------------------------------------------------------------------------- + +# This allows load_data_config("policy.gr00t_config:IsaacLabDataConfig") to work +DATA_CONFIG_MAP["isaaclab_g1_dex3"] = IsaacLabDataConfig() diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/isaaclab_ppo_gr00t_assemble_trocar.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/isaaclab_ppo_gr00t_assemble_trocar.yaml new file mode 100644 index 000000000000..b130a12a8a53 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/isaaclab_ppo_gr00t_assemble_trocar.yaml @@ -0,0 +1,298 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +defaults: + - override hydra/job_logging: stdout + +hydra: + run: + dir: . + output_subdir: null + +cluster: + num_nodes: 1 + component_placement: + actor,env,rollout: all + +runner: + task_type: embodied + logger: + log_path: "../results" + project_name: rlinf + experiment_name: "test_gr00t" + logger_backends: ["tensorboard"] # wandb, swanlab + + max_epochs: 1000 + max_steps: -1 + + only_eval: False + eval_policy_path: null # Optional: .pt file or None, if None, will use the checkpoint in rollout.model.model_path + val_check_interval: -1 + save_interval: 2 + seq_length: 4096 + max_prompt_length: 30 + + resume_dir: null + +algorithm: + normalize_advantages: True + kl_penalty: kl # how to estimate kl divergence: kl or kl_penalty + group_size: 1 + reward_coef: 1.0 + rollout_epoch: 2 + eval_rollout_epoch: 1 # set eval_rollout_epoch > 0 when enable runner.only_eval or runner.val_check_interval > 0 + + reward_type: chunk_level + logprob_type: chunk_level + entropy_type: chunk_level + + update_epoch: 4 + adv_type: gae + loss_type: actor_critic + loss_agg_func: "token-mean" + kl_beta: 0.0 + entropy_bonus: 0 + clip_ratio_high: 0.2 + clip_ratio_low: 0.2 + clip_ratio_c: 3.0 + value_clip: 0.2 + huber_delta: 10.0 + + gamma: 0.99 + gae_lambda: 0.95 + + filter_rewards: False + rewards_lower_bound: 0.1 + rewards_upper_bound: 0.9 + # params for generation + sampling_params: + do_sample: True + temperature_train: 1.0 + temperature_eval: 0.6 + top_k: 50 + top_p: 1.0 + repetition_penalty: 1.0 + add_BOS: False + + # length argument for autoregressive sampling + # max length means max amount of tokens to generate + length_params: + max_new_token: null + max_length: 1024 + min_length: 1 + +# --------------------------------------------------------------------------- +# Environment +# --------------------------------------------------------------------------- +env: + group_name: "EnvGroup" + channel: + name: "env_buffer_list" + queue_name: "obs_buffer" + queue_size: 0 + enable_offload: False + + train: + env_type: isaaclab + total_num_envs: 4 + auto_reset: False + ignore_terminations: False + use_rel_reward: True + seed: 0 + group_size: 1 + reward_coef: 1.0 + use_fixed_reset_state_ids: True + max_steps_per_rollout_epoch: 256 + max_episode_steps: 256 + video_cfg: + save_video: False + info_on_video: True + video_base_dir: ${runner.logger.log_path}/video/train + init_params: + id: "Isaac-Assemble-Trocar-G129-Dex3-v0" + num_envs: null + max_episode_steps: ${env.train.max_episode_steps} + task_description: "assemble trocar from tray" + + # ======================================================================== + # IsaacLab -> RLinf -> GR00T observation/action mapping configuration + # This section defines how IsaacLab observations are converted to GR00T format + # ======================================================================== + isaaclab: &isaaclab_config # YAML anchor for reuse in eval + # Task description for language conditioning + task_description: "assemble trocar from tray" + + # --- IsaacLab -> RLinf observation mapping --- + # main_images: single camera key for main view + main_images: "front_camera" + # extra_view_images: list of camera keys to stack as (B, N, H, W, C) + extra_view_images: + - "left_wrist_camera" + - "right_wrist_camera" + # states: list of state specs with optional slicing + # Each entry can be a string (use full tensor) or dict with "key" and "slice" + states: + - key: "robot_joint_state" + slice: [15, 29] # G129 shoulder joints + - key: "robot_dex3_joint_state" + # slice: null # Use full tensor + + # --- RLinf -> GR00T format conversion --- + gr00t_mapping: + video: + main_images: "video.room_view" + extra_view_images: + - "video.left_wrist_view" + - "video.right_wrist_view" + state: + # Slice concatenated states into GR00T state keys + # Total states: 14 (shoulder) + 14 (dex3) = 28 dims + - gr00t_key: "state.left_arm" + slice: [0, 7] + - gr00t_key: "state.right_arm" + slice: [7, 14] + - gr00t_key: "state.left_hand" + slice: [14, 21] + - gr00t_key: "state.right_hand" + slice: [21, 28] + + # --- GR00T -> IsaacLab action conversion --- + action_mapping: + prefix_pad: 15 # Pad zeros at front for G129 body joints (not controlled) + suffix_pad: 0 + + # --- GR00T model configuration (single source of truth) --- + # actor.model.embodiment_tag and obs_converter_type reference these values via ${} + obs_converter_type: "dex3" + embodiment_tag: "new_embodiment" + embodiment_tag_id: 31 + data_config_class: "gr00t_config:IsaacLabDataConfig" + + eval: + env_type: isaaclab + total_num_envs: 4 + auto_reset: True + ignore_terminations: True + use_rel_reward: True + seed: 0 + group_size: 1 + reward_coef: 1.0 + use_fixed_reset_state_ids: True + max_steps_per_rollout_epoch: 256 + max_episode_steps: 256 + video_cfg: + save_video: True + info_on_video: True + video_base_dir: ${runner.logger.log_path}/video/eval + init_params: + id: "Isaac-Assemble-Trocar-G129-Dex3-Eval-v0" + num_envs: null + max_episode_steps: ${env.eval.max_episode_steps} + task_description: "install trocar from box" + # Reuse IsaacLab config from train section via YAML anchor + isaaclab: *isaaclab_config + +# --------------------------------------------------------------------------- +# Rollout +# --------------------------------------------------------------------------- +rollout: + group_name: "RolloutGroup" + channel: + name: ${env.channel.name} + queue_name: "action_buffer" + queue_size: 0 + mode: "colocate" + backend: "huggingface" + enable_offload: True + pipeline_stage_num: 1 + + model: + model_path: "/mnt/ckpt/g1_install_trocar_sim_box_v3_60_train_bs32_1_gpus_cos_30k_tune_visual/" + precision: ${actor.model.precision} + obs_converter_type: ${env.train.isaaclab.obs_converter_type} + embodiment_tag: ${env.train.isaaclab.embodiment_tag} + +# --------------------------------------------------------------------------- +# Actor +# --------------------------------------------------------------------------- +actor: + group_name: "ActorGroup" + channel: + name: ${env.channel.name} + queue_name: "replay_buffer" + queue_size: 0 + training_backend: "fsdp" + micro_batch_size: 2 + global_batch_size: 4 + seed: 1234 + enable_offload: False + + model: + model_type: "gr00t" + model_path: "/mnt/ckpt/g1_install_trocar_sim_box_v3_60_train_bs32_1_gpus_cos_30k_tune_visual/" + precision: "bf16" + trust_remote_code: True + is_lora: false + action_dim: 28 + num_action_chunks: 1 + denoising_steps: 4 + policy_setup: "widowx_bridge" + obs_converter_type: ${env.train.isaaclab.obs_converter_type} + embodiment_tag: ${env.train.isaaclab.embodiment_tag} + add_value_head: True + rl_head_config: + joint_logprob: False + noise_method: "flow_sde" + ignore_last: False + safe_get_logprob: False + noise_anneal: False + noise_params: [0.7, 0.3, 400] + noise_level: 0.3 + add_value_head: ${actor.model.add_value_head} + chunk_critic_input: False + detach_critic_input: True + disable_dropout: True + use_vlm_value: False + value_vlm_mode: "mean_token" + padding_value: 850 + + optim: + lr: 5e-6 + value_lr: 1e-4 + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + weight_decay: 0.01 + critic_warmup_steps: 0 + + fsdp_config: + strategy: "fsdp" + sharding_strategy: "full_shard" + gradient_checkpointing: False + cpu_offload: False + offload_pin_memory: False + reshard_after_forward: True + enable_gradient_accumulation: True + forward_prefetch: False + limit_all_gathers: False + backward_prefetch: null + use_orig_params: False + use_liger_kernel: False + fsdp_size: -1 + mixed_precision: + param_dtype: ${actor.model.precision} + reduce_dtype: ${actor.model.precision} + buffer_dtype: ${actor.model.precision} + amp: + enabled: False + precision: "bf16" + use_grad_scaler: False + +reward: + use_reward_model: False + +critic: + use_critic_model: False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/robot_config.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/robot_config.py new file mode 100644 index 000000000000..dd820a675980 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/robot_config.py @@ -0,0 +1,310 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Robot configuration for the `install_trocar` task. + +This file is intentionally **minimal**: +- Supported robot: **Unitree G1 (29 DOF body)** +- Supported hands: **Dex3** + +The only public entry point expected by the task is +`G1RobotPresets.g1_29dof_dex3_base_fix(...)`. +""" + +import numpy as np + +import isaaclab.sim as sim_utils +from isaaclab.actuators import IdealPDActuatorCfg, ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg +from isaaclab.utils import configclass + +HEALTHCARE_S3 = "https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/Healthcare/0.5.0/132c82d" + +# Default joint positions for the supported setup (G1 29DOF + Dex3). +DEFAULT_JOINT_POS: dict[str, float] = { + # legs + "left_hip_pitch_joint": 0.0, + "left_hip_roll_joint": 0.0, + "left_hip_yaw_joint": 0.0, + "left_knee_joint": 0.0, + "left_ankle_pitch_joint": 0.0, + "left_ankle_roll_joint": 0.0, + "right_hip_pitch_joint": 0.0, + "right_hip_roll_joint": 0.0, + "right_hip_yaw_joint": 0.0, + "right_knee_joint": 0.0, + "right_ankle_pitch_joint": 0.0, + "right_ankle_roll_joint": 0.0, + # waist + "waist_yaw_joint": 0.0, + "waist_roll_joint": 0.0, + "waist_pitch_joint": 0.0, + # arms + "left_shoulder_pitch_joint": -0.754599, + "left_shoulder_roll_joint": 0.550010, + "left_shoulder_yaw_joint": -0.399298, + "left_elbow_joint": 0.278886, + "left_wrist_roll_joint": 0.320559, + "left_wrist_pitch_joint": -0.203525, + "left_wrist_yaw_joint": -0.387435, + "right_shoulder_pitch_joint": -0.340858, + "right_shoulder_roll_joint": -0.186152, + "right_shoulder_yaw_joint": 0.015023, + "right_elbow_joint": -0.777159, + "right_wrist_roll_joint": 0.019805, + "right_wrist_pitch_joint": 1.182285, + "right_wrist_yaw_joint": -0.022848, + # dex3 hands (left) + "left_hand_index_0_joint": -60.0 * np.pi / 180.0, + "left_hand_middle_0_joint": -60.0 * np.pi / 180.0, + "left_hand_thumb_0_joint": 0.0, + "left_hand_index_1_joint": -40.0 * np.pi / 180.0, + "left_hand_middle_1_joint": -40.0 * np.pi / 180.0, + "left_hand_thumb_1_joint": 0.0, + "left_hand_thumb_2_joint": 0.0, + # dexterous hand joint - right hand + "right_hand_index_0_joint": 60.0 * np.pi / 180.0, + "right_hand_middle_0_joint": 60.0 * np.pi / 180.0, + "right_hand_thumb_0_joint": 0.0, + "right_hand_index_1_joint": 40.0 * np.pi / 180.0, + "right_hand_middle_1_joint": 40.0 * np.pi / 180.0, + "right_hand_thumb_1_joint": 0.0, + "right_hand_thumb_2_joint": 0.0, +} + + +G129_CFG_WITH_DEX3_BASE_FIX = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"{HEALTHCARE_S3}/Robots/UnitreeG1/g1_29dof_with_dex3_base_fix/g1_29dof_with_dex3_base_fix.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + retain_accelerations=False, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=1000.0, + max_depenetration_velocity=1.0, + solver_position_iteration_count=4, + solver_velocity_iteration_count=0, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, solver_position_iteration_count=4, solver_velocity_iteration_count=0 + ), + ), + prim_path="/World/envs/env_.*/Robot", + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 0.75), + joint_pos={ + # legs joints + "left_hip_yaw_joint": 0.0, + "left_hip_roll_joint": 0.0, + "left_hip_pitch_joint": -0.05, + "left_knee_joint": 0.2, + "left_ankle_pitch_joint": -0.15, + "left_ankle_roll_joint": 0.0, + "right_hip_yaw_joint": 0.0, + "right_hip_roll_joint": 0.0, + "right_hip_pitch_joint": -0.05, + "right_knee_joint": 0.2, + "right_ankle_pitch_joint": -0.15, + "right_ankle_roll_joint": 0.0, + # waist joints + "waist_yaw_joint": 0.0, + "waist_roll_joint": 0.0, + "waist_pitch_joint": 0.0, + "left_shoulder_pitch_joint": 0.0, + "left_shoulder_roll_joint": 0.0, + "left_shoulder_yaw_joint": 0.0, + "left_elbow_joint": -0.3, + "left_wrist_roll_joint": 0.0, + "left_wrist_pitch_joint": 0.0, + "left_wrist_yaw_joint": 0.0, + "right_shoulder_pitch_joint": 0.0, + "right_shoulder_roll_joint": 0.0, + "right_shoulder_yaw_joint": 0.0, + "right_elbow_joint": -0.3, + "right_wrist_roll_joint": 0.0, + "right_wrist_pitch_joint": 0.0, + "right_wrist_yaw_joint": 0.0, + # fingers joints + "left_hand_index_0_joint": 0.0, + "left_hand_middle_0_joint": 0.0, + "left_hand_thumb_0_joint": 0.0, + "left_hand_index_1_joint": 0.0, + "left_hand_middle_1_joint": 0.0, + "left_hand_thumb_1_joint": 0.0, + "left_hand_thumb_2_joint": 0.0, + "right_hand_index_0_joint": 0.0, + "right_hand_middle_0_joint": 0.0, + "right_hand_thumb_0_joint": 0.0, + "right_hand_index_1_joint": 0.0, + "right_hand_middle_1_joint": 0.0, + "right_hand_thumb_1_joint": 0.0, + "right_hand_thumb_2_joint": 0.0, + }, + joint_vel={".*": 0.0}, + ), + soft_joint_pos_limit_factor=0.9, + actuators={ + "legs": IdealPDActuatorCfg( + joint_names_expr=[ + ".*_hip_yaw_joint", + ".*_hip_roll_joint", + ".*_hip_pitch_joint", + ".*_knee_joint", + ], + effort_limit={ + ".*_hip_yaw_joint": 88.0, + ".*_hip_roll_joint": 88.0, + ".*_hip_pitch_joint": 88.0, + ".*_knee_joint": 139.0, + }, + velocity_limit={ + ".*_hip_yaw_joint": 32.0, + ".*_hip_roll_joint": 32.0, + ".*_hip_pitch_joint": 32.0, + ".*_knee_joint": 20.0, + }, + stiffness={ + ".*_hip_yaw_joint": 150.0, + ".*_hip_roll_joint": 150.0, + ".*_hip_pitch_joint": 150.0, + ".*_knee_joint": 300.0, + }, + damping={ + ".*_hip_yaw_joint": 2.0, + ".*_hip_roll_joint": 2.0, + ".*_hip_pitch_joint": 2.0, + ".*_knee_joint": 4.0, + }, + armature={ + ".*_hip_.*": 0.03, + ".*_knee_joint": 0.03, + }, + ), + "feet": IdealPDActuatorCfg( + joint_names_expr=[".*_ankle_pitch_joint", ".*_ankle_roll_joint"], + stiffness={ + ".*_ankle_pitch_joint": 40.0, + ".*_ankle_roll_joint": 40.0, + }, + damping={ + ".*_ankle_pitch_joint": 2, + ".*_ankle_roll_joint": 2, + }, + effort_limit={ + ".*_ankle_pitch_joint": 50.0, + ".*_ankle_roll_joint": 50.0, + }, + velocity_limit={ + ".*_ankle_pitch_joint": 37.0, + ".*_ankle_roll_joint": 37.0, + }, + armature=0.03, + friction=0.03, + ), + "waist": ImplicitActuatorCfg( + joint_names_expr=["waist_yaw_joint", "waist_roll_joint", "waist_pitch_joint"], + effort_limit=1000.0, # set a large torque limit + velocity_limit=0.0, # set the velocity limit to 0 + stiffness={"waist_yaw_joint": 10000.0, "waist_roll_joint": 10000.0, "waist_pitch_joint": 10000.0}, + damping={"waist_yaw_joint": 10000.0, "waist_roll_joint": 10000.0, "waist_pitch_joint": 10000.0}, + armature=None, + ), + "arms": IdealPDActuatorCfg( + joint_names_expr=[ + ".*_shoulder_pitch_joint", + ".*_shoulder_roll_joint", + ".*_shoulder_yaw_joint", + ".*_elbow_joint", + ".*_wrist_.*_joint", + ], + effort_limit={ + ".*_shoulder_pitch_joint": 25.0, + ".*_shoulder_roll_joint": 25.0, + ".*_shoulder_yaw_joint": 25.0, + ".*_elbow_joint": 25.0, + ".*_wrist_roll_joint": 25.0, + ".*_wrist_pitch_joint": 5.0, + ".*_wrist_yaw_joint": 5.0, + }, + velocity_limit={ + ".*_shoulder_pitch_joint": 37.0, + ".*_shoulder_roll_joint": 37.0, + ".*_shoulder_yaw_joint": 37.0, + ".*_elbow_joint": 37.0, + ".*_wrist_roll_joint": 37.0, + ".*_wrist_pitch_joint": 22.0, + ".*_wrist_yaw_joint": 22.0, + }, + stiffness={ + ".*_shoulder_pitch_joint": 100.0, + ".*_shoulder_roll_joint": 100.0, + ".*_shoulder_yaw_joint": 40.0, + ".*_elbow_joint": 40.0, + ".*_wrist_.*_joint": 20.0, + }, + damping={ + ".*_shoulder_pitch_joint": 15.0, + ".*_shoulder_roll_joint": 15.0, + ".*_shoulder_yaw_joint": 8.0, + ".*_elbow_joint": 8.0, + ".*_wrist_.*_joint": 4.0, + }, + armature={".*_shoulder_.*": 0.03, ".*_elbow_.*": 0.03, ".*_wrist_.*_joint": 0.03}, + friction=0.03, + ), + # NOTE(peterd, 9/25/2025): The follow hand joint values are tested and working with Leapmotion and Mimic + "hands": IdealPDActuatorCfg( + joint_names_expr=[ + ".*_hand_.*", + ], + effort_limit=5.0, + velocity_limit=10.0, + stiffness=8.0, + damping=1.5, + armature=0.03, + friction=0.5, + ), + }, +) + + +def make_g1_29dof_dex3_cfg( + *, + prim_path: str = "/World/envs/env_.*/Robot", + init_pos: tuple[float, float, float] = (-0.15, 0.0, 0.744), + init_rot: tuple[float, float, float, float] = (0, 0, 0.7071, 0.7071), + custom_joint_pos: dict[str, float] | None = None, + base_config: ArticulationCfg = G129_CFG_WITH_DEX3_BASE_FIX, +) -> ArticulationCfg: + """Create the only supported robot articulation cfg for this task.""" + joint_pos = DEFAULT_JOINT_POS.copy() + if custom_joint_pos: + joint_pos.update(custom_joint_pos) + return base_config.replace( + prim_path=prim_path, + init_state=ArticulationCfg.InitialStateCfg( + pos=init_pos, + rot=init_rot, + joint_pos=joint_pos, + joint_vel={".*": 0.0}, + ), + ) + + +@configclass +class G1RobotPresets: + """G1 robot preset configuration collection""" + + @classmethod + def g1_29dof_dex3_base_fix( + cls, + init_pos: tuple[float, float, float] = (-0.15, 0.0, 0.76), + init_rot: tuple[float, float, float, float] = (0, 0, 0.7071, 0.7071), + ) -> ArticulationCfg: + """pick-place task configuration - dex3 hand""" + return make_g1_29dof_dex3_cfg(init_pos=init_pos, init_rot=init_rot) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/g129_dex3_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/g129_dex3_env_cfg.py new file mode 100644 index 000000000000..c77bea7f2e94 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/g129_dex3_env_cfg.py @@ -0,0 +1,427 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_physx.physics import PhysxCfg + +import isaaclab.envs.mdp as base_mdp +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg, ViewerCfg +from isaaclab.managers import EventTermCfg, SceneEntityCfg +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.utils import configclass + +from isaaclab_tasks.manager_based.manipulation.assemble_trocar import mdp + +from isaaclab_tasks.manager_based.manipulation.assemble_trocar.config import ( # isort: skip + CameraPresets, + G1RobotPresets, +) + +joint_names = [ + "left_hip_pitch_joint", + "right_hip_pitch_joint", + "left_hip_roll_joint", + "right_hip_roll_joint", + "left_hip_yaw_joint", + "right_hip_yaw_joint", + "left_knee_joint", + "right_knee_joint", + "left_ankle_pitch_joint", + "right_ankle_pitch_joint", + "left_ankle_roll_joint", + "right_ankle_roll_joint", + "waist_yaw_joint", + "waist_roll_joint", + "waist_pitch_joint", + "left_shoulder_pitch_joint", + "left_shoulder_roll_joint", + "left_shoulder_yaw_joint", + "left_elbow_joint", + "left_wrist_roll_joint", + "left_wrist_pitch_joint", + "left_wrist_yaw_joint", + "right_shoulder_pitch_joint", + "right_shoulder_roll_joint", + "right_shoulder_yaw_joint", + "right_elbow_joint", + "right_wrist_roll_joint", + "right_wrist_pitch_joint", + "right_wrist_yaw_joint", + "left_hand_thumb_0_joint", + "left_hand_thumb_1_joint", + "left_hand_thumb_2_joint", + "left_hand_middle_0_joint", + "left_hand_middle_1_joint", + "left_hand_index_0_joint", + "left_hand_index_1_joint", + "right_hand_thumb_0_joint", + "right_hand_thumb_1_joint", + "right_hand_thumb_2_joint", + "right_hand_middle_0_joint", + "right_hand_middle_1_joint", + "right_hand_index_0_joint", + "right_hand_index_1_joint", +] +offset_dict = { + "left_elbow_joint": -0.3, + "right_elbow_joint": -0.3, +} + +HEALTHCARE_S3 = "https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/Healthcare/0.5.0/132c82d" +USD_ROOT = f"{HEALTHCARE_S3}/Props/LightWheel" + + +@configclass +class AssembleTrocarSceneCfg(InteractiveSceneCfg): + """Scene configuration for the assemble_trocar task (robot + objects + lights).""" + + # humanoid robot configuration + robot: ArticulationCfg = G1RobotPresets.g1_29dof_dex3_base_fix( + init_pos=(-1.84919, 1.94, 0.81168), init_rot=(0.0, 0.0, 0.0, 1.0) + ) + # add camera configuration + front_camera = CameraPresets.g1_front_camera() + left_wrist_camera = CameraPresets.left_dex3_wrist_camera() + right_wrist_camera = CameraPresets.right_dex3_wrist_camera() + + scene = AssetBaseCfg( + prim_path="/World/envs/env_.*/Scene", + spawn=UsdFileCfg( + usd_path=f"{USD_ROOT}/scene03.usd", + ), + ) + + trocar_1 = RigidObjectCfg( + prim_path="/World/envs/env_.*/trocar_1", + spawn=UsdFileCfg( + usd_path=f"{USD_ROOT}/Assets/Trocar002/Trocar002-xform-wo.usd", + collision_props=sim_utils.CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.001, + rest_offset=-0.001, + ), + ), + init_state=RigidObjectCfg.InitialStateCfg( + pos=[-1.60202, 1.91362, 0.87183], + rot=[-0.0, 0.70711, 0.70711, 0.0], + ), + ) + + trocar_2 = RigidObjectCfg( + prim_path="/World/envs/env_.*/trocar_2", + spawn=UsdFileCfg( + usd_path=( + f"{USD_ROOT}/Assets/" + "DisposableLaparoscopicPunctureDevice001/" + "DisposableLaparoscopicPunctureDevice005-xform.usd" + ), + rigid_props=sim_utils.RigidBodyPropertiesCfg( + rigid_body_enabled=True, + disable_gravity=False, + ), + ), + init_state=RigidObjectCfg.InitialStateCfg( + rot=[-0.71475, -0.000243, 0.05853, 0.69692], pos=[-1.50635, 1.90997, 0.8631] + ), + ) + tray = ArticulationCfg( + prim_path="/World/envs/env_.*/surgical_tray", + spawn=UsdFileCfg( + usd_path=f"{USD_ROOT}/Assets/SurgicalTray001/SurgicalTray001.usd", + ), + init_state=ArticulationCfg.InitialStateCfg(pos=[-1.54919, 2.03365, 0.84554], rot=[0.0, 0.0, -0.70711, 0.70711]), + actuators={}, # Empty dict for passive articulation (no motors) + ) + + # Lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg( + color=(0.75, 0.75, 0.75), + intensity=1000.0, + ), + ) + + +## +# MDP settings +## +@configclass +class ActionsCfg: + """defines the action configuration related to robot control, using direct joint angle control""" + + joint_pos = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=joint_names, + scale=1.0, + use_default_offset=False, + offset=offset_dict, + preserve_order=True, + ) + + +@configclass +class ObservationsCfg: + """defines all available observation information""" + + @configclass + class PolicyCfg(ObsGroup): + """policy group observation configuration class + defines all state observation values for policy decision + inherit from ObsGroup base class + """ + + # robot joint state observation + robot_joint_state = ObsTerm(func=mdp.get_robot_body_joint_states) + # dex3 hand joint state observation + robot_dex3_joint_state = ObsTerm(func=mdp.get_robot_dex3_joint_states) + + def __post_init__(self): + """post initialization function + set the basic attributes of the observation group + """ + self.enable_corruption = False # disable observation value corruption + self.concatenate_terms = False # disable observation item connection + + @configclass + class CameraImagesCfg(ObsGroup): + """Observations from the robot's cameras.""" + + front_camera = ObsTerm( + func=base_mdp.image, + params={"sensor_cfg": SceneEntityCfg("front_camera"), "data_type": "rgb", "normalize": False}, + ) + left_wrist_camera = ObsTerm( + func=base_mdp.image, + params={"sensor_cfg": SceneEntityCfg("left_wrist_camera"), "data_type": "rgb", "normalize": False}, + ) + right_wrist_camera = ObsTerm( + func=base_mdp.image, + params={"sensor_cfg": SceneEntityCfg("right_wrist_camera"), "data_type": "rgb", "normalize": False}, + ) + + def __post_init__(self): + self.concatenate_terms = False + + # observation groups + # create policy observation group instance + policy: PolicyCfg = PolicyCfg() + camera_images: CameraImagesCfg = CameraImagesCfg() + + +@configclass +class TerminationsCfg: + """Termination conditions for the environment.""" + + # Time out termination + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + # Task success termination (all stages completed) + task_success = DoneTerm( + func=mdp.task_success_termination, + time_out=False, # This is a success termination, not a failure + params={ + "print_log": False, + "success_stage": 4, + }, + ) + object_drop = DoneTerm( + func=mdp.object_drop_termination, + time_out=True, # Treat as timeout/failure + params={ + "drop_height_threshold": 0.5, # Objects below this Z height are considered dropped + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + }, + ) + + +@configclass +class RewardsCfg: + """Reward configuration for sparse reward mode. + + Each stage gives 1.0 reward on completion → Total reward for full task = 4.0 + This ensures clear reward signal for each stage transition. + """ + + # Stage 0: Lift trocars + lift_trocars = RewTerm( + func=mdp.lift_trocars_reward, + weight=1.0, # Give 1.0 reward when stage 0->1 completes + params={ + "table_height": 0.85483, + "lift_threshold": 0.15, + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + # Stage transition thresholds + "tip_align_threshold": 0.015, # Threshold for tip alignment (m) + "insertion_dist_threshold": 0.05, + "insertion_angle_threshold": 0.15, + "placement_x_min": -1.8, + "placement_x_max": -1.4, + "placement_y_min": 1.5, + "placement_y_max": 1.8, + "use_sparse_reward": True, + "print_log": False, + }, + ) + + # Stage 1: Tip alignment (find hole) + tip_alignment = RewTerm( + func=mdp.trocar_tip_alignment_reward, + weight=1.0, # Give 1.0 reward when stage 1->2 completes + params={ + "tip_dist_std": 0.02, # Std for tip distance reward shaping + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + "use_sparse_reward": True, + "print_log": False, + }, + ) + + # Stage 2: Insertion (push in) + insert_trocars = RewTerm( + func=mdp.trocar_insertion_reward, + weight=1.0, # Give 1.0 reward when stage 2->3 completes + params={ + "angle_std": 0.2, # Std for angle alignment reward + "angle_threshold": 0.10, # ~5.7 degrees tolerance for parallelism + "center_dist_std": 0.05, # Std for center distance reward + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + "use_sparse_reward": True, + "print_log": False, + }, + ) + + # Stage 3: Placement (place in tray) + placement_trocars = RewTerm( + func=mdp.trocar_placement_reward, + weight=1.0, # Give 1.0 reward when stage 3->4 completes + params={ + "x_min": -1.8, + "x_max": -1.4, + "y_min": 1.5, + "y_max": 1.8, + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + "use_sparse_reward": True, + "print_log": False, + }, + ) + + +@configclass +class EventCfg: + """Event configuration for scene reset.""" + + # Reset scene when episode terminates (timeout or success) + reset_scene = EventTermCfg(func=base_mdp.reset_scene_to_default, mode="reset") + + # Reset task stage tracker when environment resets + reset_task_stage = EventTermCfg(func=mdp.reset_task_stage, mode="reset") + + # Random rotation for tray and trocars + reset_tray_random_rotation = EventTermCfg( + func=mdp.reset_tray_with_random_rotation, + mode="reset", + params={ + "tray_cfg": SceneEntityCfg("tray"), + "trocar_1_cfg": SceneEntityCfg("trocar_1"), + "trocar_2_cfg": SceneEntityCfg("trocar_2"), + "rotation_range": [0, 10], + }, + ) + + +@configclass +class G1AssembleTrocarEnvCfg(ManagerBasedRLEnvCfg): + """Unitree G1 robot assemble trocar environment configuration class + inherits from ManagerBasedRLEnvCfg, defines all configuration parameters for the entire environment + """ + + # scene settings + scene: AssembleTrocarSceneCfg = AssembleTrocarSceneCfg( + num_envs=1, + env_spacing=6.0, + replicate_physics=True, + ) + # viewer settings + viewer: ViewerCfg = ViewerCfg( + eye=(-0.5, 2.4, 1.6), + lookat=(-5.4, 0.2, -1.2), + cam_prim_path="/OmniverseKit_Persp", + ) + # basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + # MDP settings + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + commands = None + rewards: RewardsCfg = RewardsCfg() + curriculum = None + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 4 + self.episode_length_s = 20.0 + # simulation settings + self.sim.dt = 1 / 200 + self.sim.render_interval = self.decimation + self.sim.physics = PhysxCfg(bounce_threshold_velocity=0.01) + self.sim.render.enable_translucency = True + self.sim.render.carb_settings = { + "rtx.raytracing.fractionalCutoutOpacity": True, + } + self.sim.render.rendering_mode = "quality" + self.sim.render.antialiasing_mode = "DLAA" + + +@configclass +class EventCfgFixTrayRotation(EventCfg): + """Event configuration with a deterministic-but-different yaw per env index. + + This is useful for eval with many parallel envs: + - env 0..N-1 get different yaw angles, + - for a fixed global seed, the set of N angles is reproducible across runs/resets. + + Notes: + - Determinism is tied to torch's global seed (set by env reset seed in IsaacLab). + - Angle unit is degrees. + """ + + reset_tray_random_rotation = EventTermCfg( + func=mdp.reset_tray_with_random_rotation, + mode="reset", + params={ + "tray_cfg": SceneEntityCfg("tray"), + "trocar_1_cfg": SceneEntityCfg("trocar_1"), + "trocar_2_cfg": SceneEntityCfg("trocar_2"), + "rotation_range": [0, 10], + "deterministic_per_env": True, + # Use torch.initial_seed() by default to follow the env reset seed. + "deterministic_seed": None, + }, + ) + + +@configclass +class G1AssembleTrocarEvalEnvCfg(G1AssembleTrocarEnvCfg): + """Eval-friendly env cfg. + + This is currently an alias of `G1AssembleTrocarEnvCfg`, but registered under a + separate Gym id for compatibility with RLinf configs. + """ + + # Override events to enforce deterministic per-env tray yaw on every reset. + events: EventCfgFixTrayRotation = EventCfgFixTrayRotation() diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/__init__.py new file mode 100644 index 000000000000..4de5284dfe23 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/__init__.py @@ -0,0 +1,36 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""MDP utilities for the assemble_trocar task.""" + +from isaaclab.envs.mdp import JointPositionActionCfg, time_out + +from .events import reset_robot_to_default_joint_positions, reset_task_stage, reset_tray_with_random_rotation +from .observations import get_robot_body_joint_states, get_robot_dex3_joint_states +from .rewards import ( + lift_trocars_reward, + trocar_insertion_reward, + trocar_placement_reward, + trocar_tip_alignment_reward, + update_task_stage, +) +from .terminations import object_drop_termination, task_success_termination + +__all__ = [ + "JointPositionActionCfg", + "time_out", + "get_robot_body_joint_states", + "get_robot_dex3_joint_states", + "reset_tray_with_random_rotation", + "reset_robot_to_default_joint_positions", + "reset_task_stage", + "update_task_stage", + "lift_trocars_reward", + "trocar_tip_alignment_reward", + "trocar_insertion_reward", + "trocar_placement_reward", + "task_success_termination", + "object_drop_termination", +] diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/events.py new file mode 100644 index 000000000000..1e66e69b4c99 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/events.py @@ -0,0 +1,259 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Custom event functions for pick place surgical environment.""" + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import torch +import warp as wp + +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import quat_apply, quat_mul + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +__all__ = [ + "reset_tray_with_random_rotation", + "reset_robot_to_default_joint_positions", + "reset_task_stage", +] + + +def reset_task_stage( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + print_log: bool = False, +) -> None: + """Reset task stage to 0 for specified environments. + + This should be called during environment reset events. + Also resets all locked reward caches to maintain continuity. + + Args: + env: The environment instance + env_ids: Indices of environments to reset + print_log: If True, print debug information. + """ + if hasattr(env, "_task_stage"): + env._task_stage[env_ids] = 0 + + # Reset all locked reward caches (for dense rewards) + if hasattr(env, "_lift_reward_locked"): + env._lift_reward_locked[env_ids] = 0 + if hasattr(env, "_tip_reward_locked"): + env._tip_reward_locked[env_ids] = 0 + if hasattr(env, "_insertion_reward_locked"): + env._insertion_reward_locked[env_ids] = 0 + if hasattr(env, "_placement_reward_locked"): + env._placement_reward_locked[env_ids] = 0 + + # Reset all previous stage trackers (for sparse rewards) + if hasattr(env, "_prev_stage_lift"): + env._prev_stage_lift[env_ids] = 0 + if hasattr(env, "_prev_stage_tip"): + env._prev_stage_tip[env_ids] = 0 + if hasattr(env, "_prev_stage_insert"): + env._prev_stage_insert[env_ids] = 0 + if hasattr(env, "_prev_stage_place"): + env._prev_stage_place[env_ids] = 0 + + # Reset debug print tracker + if hasattr(env, "_last_debug_print_step"): + env._last_debug_print_step = -1 + + if print_log: + print(f"Reset task stage for {len(env_ids)} environment(s)") + + +def reset_tray_with_random_rotation( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + tray_cfg: SceneEntityCfg, + trocar_1_cfg: SceneEntityCfg, + trocar_2_cfg: SceneEntityCfg, + rotation_range: tuple[float, float] | float = (-5.0, 5.0), # (min, max) degrees or ±value + deterministic_per_env: bool = False, + deterministic_seed: int | None = None, +): + """Reset tray with random rotation while keeping relative positions of trocars. + + This function: + 1. Applies a random yaw rotation within rotation_range to the tray + 2. Rotates trocar_1 and trocar_2 around the tray center to maintain relative positions + 3. Uses separate pose/velocity writes to ensure instant teleportation (no interpolation) + + Args: + env: The environment instance. + env_ids: The environment indices to reset. + tray_cfg: Scene entity config for the tray. + trocar_1_cfg: Scene entity config for trocar_1. + trocar_2_cfg: Scene entity config for trocar_2. + rotation_range: Rotation angle range in degrees. Can be: + - tuple (min, max): Random rotation between min and max degrees + - float value: Random rotation between -value and +value degrees + Examples: (0, 10), (-5, 15), 5.0 (equivalent to (-5, 5)) + """ + if len(env_ids) == 0: + return + + # Parse rotation_range parameter + if isinstance(rotation_range, (tuple, list)): + # User provided (min, max) range + min_angle_deg, max_angle_deg = rotation_range[0], rotation_range[1] + else: + # User provided single value (symmetric range ±value) + min_angle_deg, max_angle_deg = -rotation_range, rotation_range + + # Get assets + tray = env.scene[tray_cfg.name] + trocar_1 = env.scene[trocar_1_cfg.name] + trocar_2 = env.scene[trocar_2_cfg.name] + + # Get default states (initial positions from config) + # note: default_root_state is the local coordinate relative to the environment origin + tray_default_state = wp.to_torch(tray.data.default_root_state)[env_ids].clone() + trocar_1_default_state = wp.to_torch(trocar_1.data.default_root_state)[env_ids].clone() + trocar_2_default_state = wp.to_torch(trocar_2.data.default_root_state)[env_ids].clone() + + # get the world coordinate offset for each environment (multiple environment support) + env_origins = env.scene.env_origins[env_ids] # (num_envs, 3) + + # convert local coordinate to world coordinate + tray_default_state[:, :3] += env_origins + trocar_1_default_state[:, :3] += env_origins + trocar_2_default_state[:, :3] += env_origins + + # Tray center position (pivot point for rotation) - now is world coordinate + tray_center = tray_default_state[:, :3] # (num_envs, 3) + + # Generate yaw angles (in radians) + # Convert degrees to radians + min_angle_rad = min_angle_deg * math.pi / 180.0 + max_angle_rad = max_angle_deg * math.pi / 180.0 + + # Generate angles uniformly distributed in [min_angle, max_angle] + if deterministic_per_env: + # Derive a stable "random" number per env id, so each env gets a distinct yaw, + # but it is repeatable across resets/runs given the same seed + env_id. + # + # If deterministic_seed is not provided, we tie it to torch's global seed. + # IsaacLab typically seeds torch during env reset with the provided seed. + if deterministic_seed is None: + deterministic_seed = int(torch.initial_seed()) + u = _deterministic_uniform_0_1_from_ids(env, env_ids, deterministic_seed) # (num_envs,) + else: + u = torch.rand(len(env_ids), device=env.device) + random_yaw = u * (max_angle_rad - min_angle_rad) + min_angle_rad # (num_envs,) + + # Create rotation quaternion for yaw (rotation around Z-axis) + # XYZW: quat = [x, y, z, w] = [0, 0, sin(θ/2), cos(θ/2)] + half_angle = random_yaw / 2.0 + delta_quat = torch.zeros(len(env_ids), 4, device=env.device) + delta_quat[:, 2] = torch.sin(half_angle) # z + delta_quat[:, 3] = torch.cos(half_angle) # w + + # Apply rotation to tray quaternion + tray_new_quat = quat_mul(delta_quat, tray_default_state[:, 3:7]) + + # Update tray state + tray_new_state = tray_default_state.clone() + tray_new_state[:, 3:7] = tray_new_quat + + # Rotate trocar positions around tray center + trocar_1_relative_pos = trocar_1_default_state[:, :3] - tray_center + trocar_2_relative_pos = trocar_2_default_state[:, :3] - tray_center + + # Rotate relative positions using the delta quaternion + trocar_1_new_relative_pos = quat_apply(delta_quat, trocar_1_relative_pos) + trocar_2_new_relative_pos = quat_apply(delta_quat, trocar_2_relative_pos) + + # New absolute positions + trocar_1_new_state = trocar_1_default_state.clone() + trocar_2_new_state = trocar_2_default_state.clone() + + trocar_1_new_state[:, :3] = tray_center + trocar_1_new_relative_pos + trocar_2_new_state[:, :3] = tray_center + trocar_2_new_relative_pos + + # Also rotate trocar orientations + trocar_1_new_state[:, 3:7] = quat_mul(delta_quat, trocar_1_default_state[:, 3:7]) + trocar_2_new_state[:, 3:7] = quat_mul(delta_quat, trocar_2_default_state[:, 3:7]) + + zero_velocity = torch.zeros(len(env_ids), 6, device=env.device) # [lin_vel(3), ang_vel(3)] + + tray.write_root_pose_to_sim_index(root_pose=tray_new_state[:, :7], env_ids=env_ids) + trocar_1.write_root_pose_to_sim_index(root_pose=trocar_1_new_state[:, :7], env_ids=env_ids) + trocar_2.write_root_pose_to_sim_index(root_pose=trocar_2_new_state[:, :7], env_ids=env_ids) + + tray.write_root_velocity_to_sim_index(root_velocity=zero_velocity, env_ids=env_ids) + trocar_1.write_root_velocity_to_sim_index(root_velocity=zero_velocity, env_ids=env_ids) + trocar_2.write_root_velocity_to_sim_index(root_velocity=zero_velocity, env_ids=env_ids) + + +def _deterministic_uniform_0_1_from_ids( + env: ManagerBasedRLEnv, + ids: torch.Tensor, + seed: int, +) -> torch.Tensor: + """Deterministically map env ids -> floats in [0, 1) via a seeded lookup table. + + We generate a length-(env.num_envs) random table with a local torch.Generator + seeded by `seed`, then return table[ids]. This is deterministic and avoids + uint64 bitwise ops (which may not be supported on CPU). + """ + device = env.device + num_envs = int(env.num_envs) + seed = int(seed) + + cache = getattr(env, "_deterministic_u_table_cache", None) + cache_key = (seed, num_envs, str(device)) + if cache is None or cache.get("key") != cache_key: + gen = torch.Generator(device=device) + gen.manual_seed(seed & 0xFFFFFFFFFFFFFFFF) + u_table = torch.rand((num_envs,), generator=gen, device=device, dtype=torch.float32) + cache = {"key": cache_key, "u_table": u_table} + setattr(env, "_deterministic_u_table_cache", cache) + + return cache["u_table"][ids] + + +def reset_robot_to_default_joint_positions( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + robot_cfg: SceneEntityCfg, +): + """Reset robot joint positions directly to default values. + + This function directly writes joint positions and velocities to the simulation, + bypassing the PD controller. This prevents the "drive to target" behavior + that causes arms to swing from 0 position to the target position. + + Args: + env: The environment instance. + env_ids: The environment indices to reset. + robot_cfg: Scene entity config for the robot. + """ + if len(env_ids) == 0: + return + + # Get robot asset + robot = env.scene[robot_cfg.name] + + # Get default joint positions and velocities + default_joint_pos = wp.to_torch(robot.data.default_joint_pos)[env_ids].clone() + default_joint_vel = wp.to_torch(robot.data.default_joint_vel)[env_ids].clone() + + # Directly write joint state to simulation (bypasses PD controller) + robot.write_joint_position_to_sim_index(position=default_joint_pos, env_ids=env_ids) + robot.write_joint_velocity_to_sim_index(velocity=default_joint_vel, env_ids=env_ids) + + # Also reset root state + default_root_state = wp.to_torch(robot.data.default_root_state)[env_ids].clone() + robot.write_root_pose_to_sim_index(root_pose=default_root_state[:, :7], env_ids=env_ids) + robot.write_root_velocity_to_sim_index(root_velocity=default_root_state[:, 7:13], env_ids=env_ids) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/observations.py new file mode 100644 index 000000000000..c58a4553a44d --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/observations.py @@ -0,0 +1,159 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +G1 29DOF (body) + Dex3 joint state helpers for the assemble_trocar task. + +Notes: +- DDS has been removed (simulation-only observations). +- These functions are designed to be used as Isaac Lab observation terms. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch +import warp as wp + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +# Observation cache: index tensors + preallocated output buffers (body joints) +_body_obs_cache = { + "device": None, + "batch": None, + "idx_t": None, + "idx_batch": None, + "pos_buf": None, + "vel_buf": None, + "torque_buf": None, + "combined_buf": None, +} + + +def get_robot_body_joint_states(env: ManagerBasedRLEnv) -> torch.Tensor: + """Return body joint states as a single tensor: [pos(29) | vel(29) | torque(29)].""" + joint_pos = wp.to_torch(env.scene["robot"].data.joint_pos) + joint_vel = wp.to_torch(env.scene["robot"].data.joint_vel) + joint_torque = wp.to_torch(env.scene["robot"].data.applied_torque) + device = joint_pos.device + batch = joint_pos.shape[0] + + # Precompute and cache column indices + global _body_obs_cache + if _body_obs_cache["device"] != device or _body_obs_cache["idx_t"] is None: + body_joint_indices = [ + 0, + 3, + 6, + 9, + 13, + 17, + 1, + 4, + 7, + 10, + 14, + 18, + 2, + 5, + 8, + 11, + 15, + 19, + 21, + 23, + 25, + 27, + 12, + 16, + 20, + 22, + 24, + 26, + 28, + ] + _body_obs_cache["idx_t"] = torch.tensor(body_joint_indices, dtype=torch.long, device=device) + _body_obs_cache["device"] = device + _body_obs_cache["batch"] = None # force re-init batch-shaped buffers + + idx_t = _body_obs_cache["idx_t"] + n = idx_t.numel() + + # Preallocate/reuse batch-shaped indices and output buffers + if _body_obs_cache["batch"] != batch or _body_obs_cache["idx_batch"] is None: + _body_obs_cache["idx_batch"] = idx_t.unsqueeze(0).expand(batch, n) + _body_obs_cache["pos_buf"] = torch.empty(batch, n, device=device, dtype=joint_pos.dtype) + _body_obs_cache["vel_buf"] = torch.empty(batch, n, device=device, dtype=joint_pos.dtype) + _body_obs_cache["torque_buf"] = torch.empty(batch, n, device=device, dtype=joint_pos.dtype) + _body_obs_cache["combined_buf"] = torch.empty(batch, n * 3, device=device, dtype=joint_pos.dtype) + _body_obs_cache["batch"] = batch + + idx_batch = _body_obs_cache["idx_batch"] + pos_buf = _body_obs_cache["pos_buf"] + vel_buf = _body_obs_cache["vel_buf"] + torque_buf = _body_obs_cache["torque_buf"] + combined_buf = _body_obs_cache["combined_buf"] + + # Fill buffers using gather(out=...) to avoid new tensor allocations + try: + torch.gather(joint_pos, 1, idx_batch, out=pos_buf) + torch.gather(joint_vel, 1, idx_batch, out=vel_buf) + torch.gather(joint_torque, 1, idx_batch, out=torque_buf) + except TypeError: + pos_buf.copy_(torch.gather(joint_pos, 1, idx_batch)) + vel_buf.copy_(torch.gather(joint_vel, 1, idx_batch)) + torque_buf.copy_(torch.gather(joint_torque, 1, idx_batch)) + + # Combine into a single buffer to avoid cat allocations + combined_buf[:, 0:n].copy_(pos_buf) + combined_buf[:, n : 2 * n].copy_(vel_buf) + combined_buf[:, 2 * n : 3 * n].copy_(torque_buf) + return combined_buf + + +# Observation cache: index tensors + preallocated output buffers (Dex3 hand joints) +_dex3_obs_cache = { + "device": None, + "batch": None, + "idx_t": None, + "idx_batch": None, + "pos_buf": None, +} + + +def get_robot_dex3_joint_states(env: ManagerBasedRLEnv) -> torch.Tensor: + """Return Dex3 joint positions [batch, 14].""" + joint_pos = wp.to_torch(env.scene["robot"].data.joint_pos) + device = joint_pos.device + batch = joint_pos.shape[0] + + global _dex3_obs_cache + if _dex3_obs_cache["device"] != device or _dex3_obs_cache["idx_t"] is None: + # Dex3 joint indices in the full robot joint vector (14 DOF) + dex3_joint_indices = [31, 37, 41, 30, 36, 29, 35, 34, 40, 42, 33, 39, 32, 38] + _dex3_obs_cache["idx_t"] = torch.tensor(dex3_joint_indices, dtype=torch.long, device=device) + _dex3_obs_cache["device"] = device + _dex3_obs_cache["batch"] = None + + idx_t = _dex3_obs_cache["idx_t"] + n = idx_t.numel() + + if _dex3_obs_cache["batch"] != batch or _dex3_obs_cache["idx_batch"] is None: + _dex3_obs_cache["idx_batch"] = idx_t.unsqueeze(0).expand(batch, n) + _dex3_obs_cache["pos_buf"] = torch.empty(batch, n, device=device, dtype=joint_pos.dtype) + _dex3_obs_cache["batch"] = batch + + idx_batch = _dex3_obs_cache["idx_batch"] + pos_buf = _dex3_obs_cache["pos_buf"] + + try: + torch.gather(joint_pos, 1, idx_batch, out=pos_buf) + except TypeError: + pos_buf.copy_(torch.gather(joint_pos, 1, idx_batch)) + + return pos_buf diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/rewards.py new file mode 100644 index 000000000000..f635c74a1309 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/rewards.py @@ -0,0 +1,721 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch +import warp as wp + +from isaaclab.assets import RigidObject +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import quat_apply + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +__all__ = [ + "update_task_stage", + "lift_trocars_reward", + "trocar_tip_alignment_reward", + "trocar_insertion_reward", + "trocar_placement_reward", +] + + +def get_task_stage(env: ManagerBasedRLEnv) -> torch.Tensor: + """Get or initialize task stage tracker for each environment. + + Stage 0: Initial state (need to lift) + Stage 1: Lifted (need to find hole - tip alignment) + Stage 2: Hole found (need to insert - push in) + Stage 3: Inserted (need to place) + Stage 4: Placed (task complete) + + Returns: + torch.Tensor: Current stage for each environment (num_envs,) + """ + if not hasattr(env, "_task_stage"): + env._task_stage = torch.zeros(env.num_envs, dtype=torch.long, device=env.device) + return env._task_stage + + +def should_print_debug(env: ManagerBasedRLEnv, print_interval: int = 50, print_log: bool = True) -> bool: + """Check if debug info should be printed based on episode step counter. + + Uses the environment's built-in episode_length_buf to track steps, + and ensures each step only prints once (first call). + + Args: + env: Environment instance + print_interval: Print every N steps + + Returns: + bool: True if should print (only on first call per step) + """ + # Hard gate: allow callers to disable all logs from this module. + if not print_log: + return False + + # Use environment's episode step counter (standard in Isaac Lab) + if not hasattr(env, "episode_length_buf"): + return False + + current_step = env.episode_length_buf[0].item() + + # Skip step 0 and non-interval steps + if current_step == 0 or current_step % print_interval != 0: + return False + + # Track last printed step to avoid duplicate prints in same step + if not hasattr(env, "_last_debug_print_step"): + env._last_debug_print_step = -1 + + # Only print once per step (on first function call) + if env._last_debug_print_step == current_step: + return False # Already printed this step + + # Mark this step as printed and return True + env._last_debug_print_step = current_step + return True + + +def update_task_stage( + env: ManagerBasedRLEnv, + asset_cfg1: SceneEntityCfg, + asset_cfg2: SceneEntityCfg, + table_height: float = 0.85483, + lift_threshold: float = 0.05, + tip_align_threshold: float = 0.015, + insertion_dist_threshold: float = 0.03, + insertion_angle_threshold: float = 0.15, + placement_x_min: float = -1.8, + placement_x_max: float = -1.4, + placement_y_min: float = 1.5, + placement_y_max: float = 1.8, + placement_z_min: float = 0.9, + print_log: bool = False, +) -> torch.Tensor: + """Update task stage based on current state. + + This function checks conditions and advances stages automatically. + Once a stage is completed, it never goes back. + """ + stage = get_task_stage(env) + + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + pos1 = wp.to_torch(obj1.data.root_pos_w) + pos2 = wp.to_torch(obj2.data.root_pos_w) + quat1 = wp.to_torch(obj1.data.root_quat_w) + quat2 = wp.to_torch(obj2.data.root_quat_w) + + # Store old stage to detect changes (BEFORE any stage transitions) + old_stage = stage.clone() + + # Stage 0 -> 1: Check if lifted + target_z = table_height + lift_threshold + is_lifted_1 = pos1[:, 2] > target_z + is_lifted_2 = pos2[:, 2] > target_z + both_lifted = is_lifted_1 & is_lifted_2 + stage = torch.where((stage == 0) & both_lifted, torch.ones_like(stage), stage) + + # Stage 1 -> 2: Check if tips are aligned (hole found) + # Get tip positions + tip_pos1 = get_trocar_tip_position(env, asset_cfg1) + tip_pos2 = get_trocar_tip_position(env, asset_cfg2) + tip_dist = torch.norm(tip_pos1 - tip_pos2, dim=-1) + + # Tip alignment success + tip_aligned = tip_dist < tip_align_threshold + stage = torch.where((stage == 1) & tip_aligned, torch.full_like(stage, 2), stage) + + # Stage 2 -> 3: Check if inserted (parallel + center close) + # Get center distance + center_dist = torch.norm(pos1 - pos2, dim=-1) + + # Check alignment + target_axis1 = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + target_axis2 = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + axis1 = quat_apply(quat1, target_axis1) + axis2 = quat_apply(quat2, target_axis2) + dot_prod = torch.sum(axis1 * axis2, dim=-1) + abs_dot = torch.clamp(torch.abs(dot_prod), max=1.0) + angle = torch.acos(abs_dot) + + # Insertion success: parallel + center close + is_parallel = angle < insertion_angle_threshold + center_close = center_dist < insertion_dist_threshold + is_inserted = is_parallel & center_close + + stage = torch.where((stage == 2) & is_inserted, torch.full_like(stage, 3), stage) + + # Stage 3 -> 4: Check if placed in target zone + # Get environment origins to handle multi-env spatial offsets + env_origins = env.scene.env_origins # shape: (num_envs, 3) + + # Adjust target zone relative to each environment's origin + curr_x_min = env_origins[:, 0] + min(placement_x_min, placement_x_max) # (num_envs,) + curr_x_max = env_origins[:, 0] + max(placement_x_min, placement_x_max) + curr_y_min = env_origins[:, 1] + min(placement_y_min, placement_y_max) + curr_y_max = env_origins[:, 1] + max(placement_y_min, placement_y_max) + + in_zone_1 = ( + (pos1[:, 0] >= curr_x_min) + & (pos1[:, 0] <= curr_x_max) + & (pos1[:, 1] >= curr_y_min) + & (pos1[:, 1] <= curr_y_max) + & (pos1[:, 2] < placement_z_min) + ) + in_zone_2 = ( + (pos2[:, 0] >= curr_x_min) + & (pos2[:, 0] <= curr_x_max) + & (pos2[:, 1] >= curr_y_min) + & (pos2[:, 1] <= curr_y_max) + & (pos2[:, 2] < placement_z_min) + ) + both_in_zone = in_zone_1 & in_zone_2 + stage = torch.where((stage == 3) & both_in_zone, torch.full_like(stage, 4), stage) + + # Print stage transitions (AFTER all stage transitions - always print when stage changes) + if print_log and (stage != old_stage).any(): + for env_id in range(env.num_envs): + if stage[env_id] != old_stage[env_id]: + print(f"Env {env_id}: Stage {old_stage[env_id].item()} → {stage[env_id].item()}") + + env._task_stage = stage + return stage + + +def lift_trocars_reward( + env: ManagerBasedRLEnv, + table_height: float = 0.85483, + lift_threshold: float = 0.05, + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + tip_align_threshold: float = 0.015, + insertion_dist_threshold: float = 0.035, + insertion_angle_threshold: float = 0.17, + placement_x_min: float = -1.8, + placement_x_max: float = -1.4, + placement_y_min: float = 1.5, + placement_y_max: float = 1.8, + placement_z_min: float = 0.9, + use_sparse_reward: bool = True, + print_log: bool = False, +) -> torch.Tensor: + """Reward for lifting both trocars above the table. + + Only active in Stage 0. Once completed, this reward is locked at the achieved value. + + Args: + use_sparse_reward: If True, only give reward (1.0) when stage transitions from 0->1. + If False, give continuous reward based on current state. + print_log: If True, print debug information. + """ + # Update task stage first - check ALL stage transitions once per step + stage = update_task_stage( + env, + asset_cfg1, + asset_cfg2, + table_height, + lift_threshold, + tip_align_threshold, + insertion_dist_threshold, + insertion_angle_threshold, + placement_x_min, + placement_x_max, + placement_y_min, + placement_y_max, + placement_z_min, + print_log=print_log, + ) + + # Get the rigid objects from the scene + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + # Get positions (num_envs, 3) + pos1 = wp.to_torch(obj1.data.root_pos_w) + pos2 = wp.to_torch(obj2.data.root_pos_w) + + target_z = table_height + lift_threshold + + # Check if lifted + is_lifted_1 = pos1[:, 2] > target_z + is_lifted_2 = pos2[:, 2] > target_z + both_lifted = is_lifted_1 & is_lifted_2 + + if use_sparse_reward: + # Sparse reward mode: give 1.0 ONLY when stage transitions from 0 to 1 + # Track previous stage + if not hasattr(env, "_prev_stage_lift"): + # Initialize prev_stage to current stage to avoid false positives on first call + env._prev_stage_lift = stage.clone() + + # Reward = 1.0 only on transition step (prev_stage=0, curr_stage=1) + stage_just_completed = (env._prev_stage_lift == 0) & (stage >= 1) + reward = torch.where( + stage_just_completed, + torch.ones(env.num_envs, device=env.device) / env.step_dt, + torch.zeros(env.num_envs, device=env.device), + ) + + # Update previous stage for next step + env._prev_stage_lift = stage.clone() + else: + # Dense reward mode: continuous reward with locking for continuity + # Initialize locked reward cache + if not hasattr(env, "_lift_reward_locked"): + env._lift_reward_locked = torch.zeros(env.num_envs, device=env.device) + + # Current reward value + current_reward = both_lifted.float() + + # Lock the reward when transitioning to stage 1 + env._lift_reward_locked = torch.where( + (stage >= 1) & (env._lift_reward_locked == 0), + current_reward, # Lock at current value when stage changes + env._lift_reward_locked, + ) + + # Stage 0: give reward based on current state + # Stage >= 1: return locked value (preserves continuity) + reward = torch.where(stage == 0, current_reward, env._lift_reward_locked) + + # Print debug info periodically (every 50 steps) + if should_print_debug(env, print_log=print_log): + mode_str = "Sparse" if use_sparse_reward else "Dense" + print( + f" Stage: {stage[0].item()}" + f" | Lift ({mode_str}): {reward[0].item():.2f}" + f" | z1: {pos1[0, 2]:.3f}" + f" | z2: {pos2[0, 2]:.3f}" + ) + + return reward + + +def get_trocar_tip_position( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("trocar_1"), +) -> torch.Tensor: + """Get trocar tip position (White_pos or Red_pos) in world coordinates. + + Calculates tip world position using trocar root's dynamic position and rotation, + plus the tip's relative offset. + + Args: + env: Environment instance + asset_cfg: Trocar asset configuration (trocar_1 or trocar_2) + + Returns: + torch.Tensor: Shape (num_envs, 3) - Position in world coordinates + """ + from pxr import Gf, Usd, UsdGeom + + import isaaclab.utils.math as math_utils + + # Cache the tip offset to avoid recalculating every step + cache_key = f"_tip_offset_{asset_cfg.name}" + if not hasattr(env, cache_key): + # Get tip's local offset relative to root (only calculate once, from USD) + # Note: Local offset is the same in all environments (same asset structure), so get from env_0 + stage = env.scene.stage + + if asset_cfg.name == "trocar_1": + tip_path = "/World/envs/env_0/trocar_1/Trocar002/White_pos" + root_path = "/World/envs/env_0/trocar_1" + elif asset_cfg.name == "trocar_2": + tip_path = "/World/envs/env_0/trocar_2/DisposableLaparoscopicPunctureDevice001/Red_pos" + root_path = "/World/envs/env_0/trocar_2" + else: + raise ValueError(f"Invalid asset configuration: {asset_cfg.name}") + + tip_prim = stage.GetPrimAtPath(tip_path) + root_prim = stage.GetPrimAtPath(root_path) + + if not tip_prim.IsValid(): + print(f"Warning: Tip prim not found at {tip_path}, using zero offset") + tip_offset_local = torch.zeros(3, dtype=torch.float32, device=env.device) + else: + tip_xform = UsdGeom.Xformable(tip_prim) + root_xform = UsdGeom.Xformable(root_prim) + + tip_world_transform = tip_xform.ComputeLocalToWorldTransform(Usd.TimeCode.Default()) + root_world_transform = root_xform.ComputeLocalToWorldTransform(Usd.TimeCode.Default()) + + tip_world_pos = tip_world_transform.ExtractTranslation() + root_world_pos = root_world_transform.ExtractTranslation() + + root_rotation_mat = root_world_transform.ExtractRotationMatrix() + root_rotation_quat = root_rotation_mat.ExtractRotation().GetQuat() + + tip_offset_world = Gf.Vec3d( + tip_world_pos[0] - root_world_pos[0], + tip_world_pos[1] - root_world_pos[1], + tip_world_pos[2] - root_world_pos[2], + ) + + # Convert world coordinate offset to root's local coordinate system + # Using inverse of root rotation: local_offset = root_quat^{-1} * world_offset + root_quat_inv = root_rotation_quat.GetInverse() + tip_offset_local_gf = root_quat_inv.Transform(tip_offset_world) + + tip_offset_local = torch.tensor( + [tip_offset_local_gf[0], tip_offset_local_gf[1], tip_offset_local_gf[2]], + dtype=torch.float32, + device=env.device, + ) + + print(f"Cached tip offset for {asset_cfg.name}: {tip_offset_local}") + + # Cache the offset + setattr(env, cache_key, tip_offset_local) + + tip_offset_local = getattr(env, cache_key) + + obj: RigidObject = env.scene[asset_cfg.name] + root_pos_w = wp.to_torch(obj.data.root_pos_w) # Shape: (num_envs, 3) + root_quat_w = wp.to_torch(obj.data.root_quat_w) # Shape: (num_envs, 4) XYZW + + tip_offset_local_batch = tip_offset_local.unsqueeze(0).repeat(env.num_envs, 1) + + tip_offset_world = math_utils.quat_apply(root_quat_w, tip_offset_local_batch) + tip_pos_world = root_pos_w + tip_offset_world + + return tip_pos_world # Shape: (num_envs, 3) + + +def trocar_tip_alignment_reward( + env: ManagerBasedRLEnv, + tip_dist_std: float = 0.02, # Std for tip distance reward + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + use_sparse_reward: bool = True, + print_log: bool = False, +) -> torch.Tensor: + """Reward for aligning trocar tips (Stage 1: Finding the hole). + + Reward based on tip distance - encourages bringing tips close together. + + Only active in Stage 1. Once completed (stage >= 2), this reward is locked at the achieved value. + + Args: + env: Environment instance + tip_dist_std: Standard deviation for tip distance reward shaping + asset_cfg1: Configuration for trocar 1 + asset_cfg2: Configuration for trocar 2 + use_sparse_reward: If True, only give reward (1.0) when stage >= 2. + If False, give continuous reward based on tip distance. + print_log: If True, print debug information. + + Returns: + torch.Tensor: Reward tensor (num_envs,) + """ + # Read current stage + stage = get_task_stage(env) + + # Get tip positions + tip_pos1 = get_trocar_tip_position(env, asset_cfg1) + tip_pos2 = get_trocar_tip_position(env, asset_cfg2) + + # Calculate tip distance + tip_dist = torch.norm(tip_pos1 - tip_pos2, dim=-1) # (num_envs,) + + if use_sparse_reward: + # Sparse reward mode: give 1.0 ONLY when stage transitions from 1 to 2 + # Track previous stage + if not hasattr(env, "_prev_stage_tip"): + # Initialize prev_stage to current stage to avoid false positives on first call + env._prev_stage_tip = stage.clone() + + # Reward = 1.0 only on transition step (prev_stage=1, curr_stage=2) + stage_just_completed = (env._prev_stage_tip == 1) & (stage >= 2) + reward = torch.where( + stage_just_completed, + torch.ones(env.num_envs, device=env.device) / env.step_dt, + torch.zeros(env.num_envs, device=env.device), + ) + + # Update previous stage for next step + env._prev_stage_tip = stage.clone() + else: + # Dense reward mode: continuous reward with locking for continuity + # Reward: exponential decay based on tip distance + tip_reward = torch.exp(-torch.square(tip_dist) / (2 * tip_dist_std**2)) + + # Initialize locked reward cache + if not hasattr(env, "_tip_reward_locked"): + env._tip_reward_locked = torch.zeros(env.num_envs, device=env.device) + + # Lock the reward when transitioning to stage 2 + env._tip_reward_locked = torch.where( + (stage >= 2) & (env._tip_reward_locked == 0), + tip_reward, # Lock at current value when stage changes + env._tip_reward_locked, + ) + + # Stage 0: no reward (not lifted yet) + # Stage 1: give reward based on tip distance + # Stage >= 2: return locked value (preserves continuity) + reward = torch.where( + stage < 1, + torch.zeros(env.num_envs, device=env.device), + torch.where(stage == 1, tip_reward, env._tip_reward_locked), + ) + + # Debug info + if should_print_debug(env, print_log=print_log) and stage[0].item() == 1: + mode_str = "Sparse" if use_sparse_reward else "Dense" + print( + f" └─ Stage 1 (Find Hole, {mode_str}):" + f" tip_pos_1=({tip_pos1[0, 0]:.3f}, {tip_pos1[0, 1]:.3f}, {tip_pos1[0, 2]:.3f})" + f" | tip_pos_2=({tip_pos2[0, 0]:.3f}, {tip_pos2[0, 1]:.3f}, {tip_pos2[0, 2]:.3f})" + f" | tip_d={tip_dist[0].item():.4f}" + f" | reward={reward[0].item():.3f}" + ) + + return reward + + +def trocar_insertion_reward( + env: ManagerBasedRLEnv, + angle_std: float = 0.2, # Std for angle alignment reward + angle_threshold: float = 0.15, # Tolerance for parallelism (radians) + center_dist_std: float = 0.05, # Std for center distance reward + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + use_sparse_reward: bool = True, + print_log: bool = False, +) -> torch.Tensor: + """Reward for inserting trocar_2 into trocar_1 (Stage 2: Pushing in). + + Reward based on: + 1. Orientation alignment (parallelism) + 2. Center distance (pushing in) + + Only active in Stage 2. Once completed (stage >= 3), this reward is locked at the achieved value. + + Args: + env: Environment instance + angle_std: Standard deviation for angle reward shaping + angle_threshold: Angle threshold for parallelism (radians) + center_dist_std: Standard deviation for center distance reward shaping + asset_cfg1: Configuration for trocar 1 + asset_cfg2: Configuration for trocar 2 + use_sparse_reward: If True, only give reward (1.0) when stage >= 3. + If False (default), give continuous reward based on alignment and distance. + print_log: If True, print debug information. + Returns: + torch.Tensor: Reward tensor (num_envs,) + """ + # Read current stage + stage = get_task_stage(env) + + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + # Positions and Rotations + pos1 = wp.to_torch(obj1.data.root_pos_w) + quat1 = wp.to_torch(obj1.data.root_quat_w) + pos2 = wp.to_torch(obj2.data.root_pos_w) + quat2 = wp.to_torch(obj2.data.root_quat_w) + + # Calculate center distance + center_dist = torch.norm(pos1 - pos2, dim=-1) # (num_envs,) + + # Calculate alignment (parallelism) + target_axis1 = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + target_axis2 = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + + axis1 = quat_apply(quat1, target_axis1) + axis2 = quat_apply(quat2, target_axis2) + + dot_prod = torch.sum(axis1 * axis2, dim=-1) + abs_dot = torch.clamp(torch.abs(dot_prod), max=1.0) + angle = torch.acos(abs_dot) + + is_parallel = angle < angle_threshold + + if use_sparse_reward: + # Sparse reward mode: give 1.0 ONLY when stage transitions from 2 to 3 + # Track previous stage + if not hasattr(env, "_prev_stage_insert"): + # Initialize prev_stage to current stage to avoid false positives on first call + env._prev_stage_insert = stage.clone() + + # Reward = 1.0 only on transition step (prev_stage=2, curr_stage=3) + stage_just_completed = (env._prev_stage_insert == 2) & (stage >= 3) + reward = torch.where( + stage_just_completed, + torch.ones(env.num_envs, device=env.device) / env.step_dt, + torch.zeros(env.num_envs, device=env.device), + ) + + # Update previous stage for next step + env._prev_stage_insert = stage.clone() + else: + # Dense reward mode: continuous reward with locking for continuity + # Reward component 1: Alignment (parallelism) + excess_angle = torch.clamp(angle - angle_threshold, min=0.0) + align_reward = torch.exp(-torch.square(excess_angle) / (2 * angle_std**2)) + + # Reward component 2: Center distance (pushing in) + # Only reward center distance if already parallel + center_reward = torch.exp(-torch.square(center_dist) / (2 * center_dist_std**2)) + center_reward = torch.where(is_parallel, center_reward, torch.zeros_like(center_reward)) + + # Combined reward: alignment * center_distance + insertion_reward = align_reward * center_reward + + # Initialize locked reward cache + if not hasattr(env, "_insertion_reward_locked"): + env._insertion_reward_locked = torch.zeros(env.num_envs, device=env.device) + + # Lock the reward when transitioning to stage 3 + env._insertion_reward_locked = torch.where( + (stage >= 3) & (env._insertion_reward_locked == 0), + insertion_reward, # Lock at current value when stage changes + env._insertion_reward_locked, + ) + + # Stage < 2: no reward (not ready yet) + # Stage 2: give reward based on current state + # Stage >= 3: return locked value (preserves continuity) + reward = torch.where( + stage < 2, + torch.zeros(env.num_envs, device=env.device), + torch.where(stage == 2, insertion_reward, env._insertion_reward_locked), + ) + + # Debug info + if should_print_debug(env, print_log=print_log) and stage[0].item() == 2: + mode_str = "Sparse" if use_sparse_reward else "Dense" + print( + f" └─ Stage 2 (Push In, {mode_str}): angle={angle[0].item():.3f} | " + f"center_d={center_dist[0].item():.4f} | " + f"is_parallel={is_parallel.item()} | reward={reward[0].item():.3f}" + ) + + return reward + + +def trocar_placement_reward( + env: ManagerBasedRLEnv, + x_min: float = -1.8, + x_max: float = -1.4, + y_min: float = 1.5, + y_max: float = 1.8, + z_min: float = 0.9, + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + use_sparse_reward: bool = True, + print_log: bool = False, +) -> torch.Tensor: + """Reward for placing both trocars in the target tray region (Stage 3). + + Only active in Stage 3. Once completed (stage >= 4), this reward is locked at the achieved value. + + Args: + env: Environment instance + x_min, x_max: X bounds of target zone (relative to env origin) + y_min, y_max: Y bounds of target zone (relative to env origin) + z_min: Z threshold (below this is considered placed) + asset_cfg1: Configuration for trocar 1 + asset_cfg2: Configuration for trocar 2 + use_sparse_reward: If True, only give reward (1.0) when stage >= 4. + If False (default), give continuous reward based on placement status. + print_log: If True, print debug information. + + Returns: + torch.Tensor: Reward tensor (num_envs,) + """ + # Read current stage + stage = get_task_stage(env) + + # Get rigid objects + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + # Get root positions (num_envs, 3) + pos1 = wp.to_torch(obj1.data.root_pos_w) + pos2 = wp.to_torch(obj2.data.root_pos_w) + + # Get environment origins to handle multi-env spatial offsets + env_origins = env.scene.env_origins # shape: (num_envs, 3) + + # Adjust target zone relative to each environment's origin + curr_x_min = env_origins[:, 0] + min(x_min, x_max) # shape: (num_envs,) + curr_x_max = env_origins[:, 0] + max(x_min, x_max) + curr_y_min = env_origins[:, 1] + min(y_min, y_max) + curr_y_max = env_origins[:, 1] + max(y_min, y_max) + + # Check bounds for object 1 + in_x_1 = (pos1[:, 0] >= curr_x_min) & (pos1[:, 0] <= curr_x_max) + in_y_1 = (pos1[:, 1] >= curr_y_min) & (pos1[:, 1] <= curr_y_max) + in_z_1 = pos1[:, 2] < z_min + in_zone_1 = in_x_1 & in_y_1 & in_z_1 + + # Check bounds for object 2 + in_x_2 = (pos2[:, 0] >= curr_x_min) & (pos2[:, 0] <= curr_x_max) + in_y_2 = (pos2[:, 1] >= curr_y_min) & (pos2[:, 1] <= curr_y_max) + in_z_2 = pos2[:, 2] < z_min + in_zone_2 = in_x_2 & in_y_2 & in_z_2 + + both_in_zone = in_zone_1 & in_zone_2 + + if use_sparse_reward: + # Sparse reward mode: give 1.0 ONLY when stage transitions from 3 to 4 + # Track previous stage + if not hasattr(env, "_prev_stage_place"): + # Initialize prev_stage to current stage to avoid false positives on first call + env._prev_stage_place = stage.clone() + + # Reward = 1.0 only on transition step (prev_stage=3, curr_stage=4) + stage_just_completed = (env._prev_stage_place == 3) & (stage >= 4) + reward = torch.where( + stage_just_completed, + torch.ones(env.num_envs, device=env.device) / env.step_dt, + torch.zeros(env.num_envs, device=env.device), + ) + + # Update previous stage for next step + env._prev_stage_place = stage.clone() + else: + # Dense reward mode: continuous reward with locking for continuity + placement_reward = both_in_zone.float() + + # Initialize locked reward cache + if not hasattr(env, "_placement_reward_locked"): + env._placement_reward_locked = torch.zeros(env.num_envs, device=env.device) + + # Lock the reward when transitioning to stage 4 + env._placement_reward_locked = torch.where( + (stage >= 4) & (env._placement_reward_locked == 0), + placement_reward, # Lock at current value when stage changes + env._placement_reward_locked, + ) + + # Stage < 3: no reward (not inserted yet) + # Stage 3: give reward based on current state + # Stage >= 4: return locked value (preserves continuity) + reward = torch.where( + stage < 3, + torch.zeros(env.num_envs, device=env.device), + torch.where(stage == 3, placement_reward, env._placement_reward_locked), + ) + + # Debug info + if should_print_debug(env, print_log=print_log) and stage[0].item() == 3: + mode_str = "Sparse" if use_sparse_reward else "Dense" + print( + f" └─ Stage 3 (Placement, {mode_str}): in_zone={both_in_zone[0].item()} | " + f"z1={pos1[0, 2]:.3f} | z2={pos2[0, 2]:.3f}" + ) + + return reward diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/terminations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/terminations.py new file mode 100644 index 000000000000..8c9b18765847 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/terminations.py @@ -0,0 +1,79 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch +import warp as wp + +from isaaclab.assets import RigidObject +from isaaclab.managers import SceneEntityCfg + +from .rewards import get_task_stage + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def object_drop_termination( + env: ManagerBasedRLEnv, + drop_height_threshold: float = 0.5, + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + print_log: bool = False, +) -> torch.Tensor: + """Termination function that triggers when objects drop below threshold. + + This can be used as an alternative to auto-reset, marking the episode as terminated + so the training framework handles the reset. + + Args: + env: The environment instance + drop_height_threshold: Height below which objects are considered dropped + asset_cfg1: Configuration for first trocar + asset_cfg2: Configuration for second trocar + print_log: If True, print debug information. + Returns: + Boolean tensor indicating which environments should terminate due to drops + """ + # Get rigid objects + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + # Get positions + pos1 = wp.to_torch(obj1.data.root_pos_w) + pos2 = wp.to_torch(obj2.data.root_pos_w) + + # Check if either object has dropped + dropped_1 = pos1[:, 2] < drop_height_threshold + dropped_2 = pos2[:, 2] < drop_height_threshold + + dropped = dropped_1 | dropped_2 + + if print_log and dropped.any(): + print(f"Drop termination triggered for {dropped.sum().item()} environment(s)") + + return dropped + + +def task_success_termination( + env: ManagerBasedRLEnv, + success_stage: int = 4, + print_log: bool = False, +) -> torch.Tensor: + """Termination condition: task is complete when stage reaches 4. + + Returns: + torch.Tensor: Boolean tensor indicating which environments should terminate (num_envs,) + """ + stage = get_task_stage(env) + task_complete = stage >= success_stage + + if print_log and task_complete.any(): + print(f"Task completed in {task_complete.sum().item()} environment(s)!") + + return task_complete