Refactor Policy args with a dataclass (#332)

xyao-nv · web-flow · commit 1a0441004d13 · 2026-01-15T15:08:15.000-08:00
## Summary
Enables users to specify policy related configs thru cli args parser
(policy runner) or a dict (multi-task eval).

## Detailed description
- Before this change, configs for policy can only be specified using cli
args
- Multi-task eval expects user to submit eval jobs thru json dict.
- Instead of json dict -&gt; cli args list -&gt; parser, it allows two paths,
using the follow orders
1. Json dict -&gt; PolicyConfigClass -&gt; Policy.from_dict()
2. Json dict -&gt; cli args list -&gt; args parser -&gt; Policy.from_args()
Depending on the availability of  policy's method.
diff --git a/isaaclab_arena/evaluation/eval_runner.py b/isaaclab_arena/evaluation/eval_runner.py
@@ -42,16 +42,23 @@ def load_env(arena_env_args: list[str], job_name: str):
 
 
 def get_policy_from_job(job: Job) -> "PolicyBase":
-
+    """
+    Create a policy from a job configuration. Two paths are supported:
+    1. JSON → dict → ConfigDataclass → init cls (preferred, if policy has config_class)
+    2. JSON → dict → CLI args → init cls (if policy has add_args_to_parser() and from_args())
+    """
     # Each job can be evaluated with a different policy checkpoint, or even a different policy type
     policy_cls = get_policy_cls(job.policy_type)
 
-    # As jobs may run diff policies, create a new parser for each job avoiding data fields conflicts
-    policy_args_parser = get_isaaclab_arena_cli_parser()
-    policy_added_args_parser = policy_cls.add_args_to_parser(policy_args_parser)
-    # only for policy related arguments
-    policy_args = policy_added_args_parser.parse_args(job.policy_args)
-    policy = policy_cls.from_args(policy_args)
+    # Use direct from_dict if the policy class has config_class defined
+    if hasattr(policy_cls, "config_class") and policy_cls.config_class is not None:
+        # Use the inherited from_dict() method from PolicyBase
+        policy = policy_cls.from_dict(job.policy_config_dict)
+    else:
+        policy_args_parser = get_isaaclab_arena_cli_parser()
+        policy_added_args_parser = policy_cls.add_args_to_parser(policy_args_parser)
+        policy_args = policy_added_args_parser.parse_args(job.policy_config_dict)
+        policy = policy_cls.from_args(policy_args)
     return policy
 
 
diff --git a/isaaclab_arena/evaluation/job_manager.py b/isaaclab_arena/evaluation/job_manager.py
@@ -23,8 +23,8 @@ def __init__(
         arena_env_args: dict,
         policy_type: str,
         num_steps: int = None,
-        policy_args: dict = {},
-        status: Status = Status.PENDING,
+        policy_config_dict: dict = None,
+        status: Status = None,
     ):
         """Initialize a Job instance.
 
@@ -33,15 +33,15 @@ def __init__(
             arena_env_args: Dictionary of arguments for configuring the arena environment
             num_steps: Number of steps to run the policy for
             policy_type: Type of policy to use
-            policy_args: Dictionary of arguments for the policy. These are passed to the policy class's from_args method.
+            policy_config_dict: Dictionary configuration for the policy.
             status: Job status (defaults to PENDING)
         """
         self.name = name
         self.arena_env_args = arena_env_args
         self.num_steps = num_steps
         self.policy_type = policy_type
-        self.policy_args = policy_args
-        self.status = status
+        self.policy_config_dict = policy_config_dict if policy_config_dict is not None else {}
+        self.status = status if status is not None else Status.PENDING
         self.start_time = None
         self.end_time = None
         self.metrics = {}
@@ -83,7 +83,7 @@ def from_dict(cls, data: dict) -> "Job":
             arena_env_args=cls.convert_args_dict_to_cli_args_list(data["arena_env_args"]),
             policy_type=data["policy_type"],
             num_steps=num_steps,
-            policy_args=cls.convert_args_dict_to_cli_args_list(data["policy_args"]),
+            policy_config_dict=data["policy_args"],
             status=status,
         )
 
diff --git a/isaaclab_arena/policy/policy_base.py b/isaaclab_arena/policy/policy_base.py
@@ -8,13 +8,50 @@
 import torch
 from abc import ABC, abstractmethod
 from gymnasium.spaces.dict import Dict as GymSpacesDict
+from typing import Any
 
 
 class PolicyBase(ABC):
-    def __init__(self):
+    """
+    Base class for policies.
+
+    Subclasses should define a `config_class` class variable pointing to their configuration dataclass
+    to enable configuration from dictionaries via the from_dict() method.
+    """
+
+    # Optional: Subclasses can define this to enable from_dict()
+    config_class: type | None = None
+
+    def __init__(self, config: Any):
         """
         Base class for policies.
         """
+        self.config = config
+
+    @classmethod
+    def from_dict(cls, config_dict: dict[str, Any]) -> "PolicyBase":
+        """
+        Create a policy instance from a configuration dictionary.
+
+        This method instantiates the policy's config_class from the dict and then
+        creates the policy from that config.
+
+        Path: dict → ConfigDataclass → Policy instance
+
+        Args:
+            config_dict: Dictionary containing the configuration fields
+
+        Returns:
+            Policy instance
+        """
+        if cls.config_class is None:
+            raise NotImplementedError(f"{cls.__name__} must define 'config_class' to use from_dict()")
+
+        # Create config from dict
+        config = cls.config_class(**config_dict)  # type: ignore[misc]
+
+        # Create policy from config
+        return cls(config)  # type: ignore[call-arg]
 
     @abstractmethod
     def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor:
diff --git a/isaaclab_arena/policy/replay_action_policy.py b/isaaclab_arena/policy/replay_action_policy.py
@@ -6,6 +6,7 @@
 import argparse
 import gymnasium as gym
 import torch
+from dataclasses import dataclass, field
 from gymnasium.spaces.dict import Dict as GymSpacesDict
 
 from isaaclab.utils.datasets import HDF5DatasetFileHandler
@@ -14,6 +15,57 @@
 from isaaclab_arena.policy.policy_base import PolicyBase
 
 
+@dataclass
+class ReplayActionPolicyArgs:
+    """
+    Configuration dataclass for ReplayActionPolicy.
+
+    This dataclass serves as the single source of truth for policy configuration,
+    supporting both dict-based (from JSON) and CLI-based configuration paths.
+
+    Field metadata is used to auto-generate argparse arguments, ensuring consistency
+    between the dataclass definition and CLI argument parsing.
+    """
+
+    replay_file_path: str = field(
+        metadata={
+            "help": "Path to the HDF5 file containing the episode",
+            "required": True,
+        }
+    )
+
+    device: str = field(
+        default="cuda",
+        metadata={
+            "help": "Device to use for loading the dataset",
+        },
+    )
+
+    episode_name: str | None = field(
+        default=None,
+        metadata={
+            "help": "Name of the episode to replay. If not provided, the first episode will be replayed",
+        },
+    )
+
+    @classmethod
+    def from_cli_args(cls, args: argparse.Namespace) -> "ReplayActionPolicyArgs":
+        """
+        Create configuration from parsed CLI arguments.
+
+        Args:
+            args: Parsed command line arguments
+
+        Returns:
+            ReplayActionPolicyArgs instance
+        """
+        return cls(
+            replay_file_path=args.replay_file_path,
+            device=getattr(args, "device", "cuda"),
+            episode_name=args.episode_name,
+        )
+
+
 @register_policy
 class ReplayActionPolicy(PolicyBase):
     """
@@ -22,24 +74,32 @@ class ReplayActionPolicy(PolicyBase):
     """
 
     name = "replay"
-
-    def __init__(self, replay_file_path: str, device: str = "cuda", episode_name: str | None = None):
-        super().__init__()
-        self.episode_name = episode_name
+    # enable from_dict() from policy_base.PolicyBase
+    config_class = ReplayActionPolicyArgs
+
+    def __init__(self, config: ReplayActionPolicyArgs):
+        """
+        Initialize ReplayActionPolicy from a configuration dataclass.
+
+        Args:
+            config: ReplayActionPolicyArgs configuration dataclass
+        """
+        super().__init__(config)
+        self.episode_name = config.episode_name
         self.dataset_file_handler = HDF5DatasetFileHandler()
-        self.dataset_file_handler.open(replay_file_path)
+        self.dataset_file_handler.open(config.replay_file_path)
         self.available_episode_names = list(self.dataset_file_handler.get_episode_names())
 
         # Take the first episode if no episode name is provided
         if self.episode_name is None:
             self.episode_name = self.available_episode_names[0]
         else:
             assert self.episode_name in self.available_episode_names, (
-                f"Episode {self.episode_name} not found in {replay_file_path}."
+                f"Episode {self.episode_name} not found in {config.replay_file_path}."
                 f"Available episodes: {self.available_episode_names}"
             )
 
-        self.episode_data = self.dataset_file_handler.load_episode(self.episode_name, device=device)
+        self.episode_data = self.dataset_file_handler.load_episode(self.episode_name, device=config.device)
         self.current_action_index = 0
 
     def __len__(self) -> int:
@@ -84,23 +144,35 @@ def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentPars
         replay_group.add_argument(
             "--replay_file_path",
             type=str,
-            help="Path to the HDF5 file containing the episode (required with --policy_type replay)",
+            required=True,
+            help="Path to the HDF5 file containing the episode",
+        )
+        replay_group.add_argument(
+            "--device",
+            type=str,
+            default="cuda",
+            help="Device to use for loading the dataset (default: cuda)",
         )
         replay_group.add_argument(
             "--episode_name",
             type=str,
             default=None,
-            help=(
-                "Name of the episode to replay. If not provided, the first episode will be"
-                "replayed (only used with --policy_type replay)"
-            ),
+            help="Name of the episode to replay. If not provided, the first episode will be replayed",
         )
         return parser
 
     @staticmethod
     def from_args(args: argparse.Namespace) -> "ReplayActionPolicy":
-        """Create a replay action policy from the arguments."""
-        return ReplayActionPolicy(
-            replay_file_path=args.replay_file_path,
-            episode_name=args.episode_name,
-        )
+        """
+        Create a ReplayActionPolicy instance from parsed CLI arguments.
+
+        Path: CLI args → ConfigDataclass → init cls
+
+        Args:
+            args: Parsed command line arguments
+
+        Returns:
+            ReplayActionPolicy instance
+        """
+        config = ReplayActionPolicyArgs.from_cli_args(args)
+        return ReplayActionPolicy(config)
diff --git a/isaaclab_arena/policy/zero_action_policy.py b/isaaclab_arena/policy/zero_action_policy.py
@@ -6,19 +6,52 @@
 import argparse
 import gymnasium as gym
 import torch
+from dataclasses import dataclass
 from gymnasium.spaces.dict import Dict as GymSpacesDict
 
 from isaaclab_arena.assets.register import register_policy
 from isaaclab_arena.policy.policy_base import PolicyBase
 
 
+@dataclass
+class ZeroActionPolicyArgs:
+    """
+    Configuration dataclass for ZeroActionPolicy.
+
+    This policy has no configuration parameters, but the dataclass is provided
+    for consistency with other policies following the unified configuration pattern.
+    """
+
+    @classmethod
+    def from_cli_args(cls, args: argparse.Namespace) -> "ZeroActionPolicyArgs":
+        """
+        Create configuration from parsed CLI arguments.
+
+        Args:
+            args: Parsed command line arguments
+
+        Returns:
+            ZeroActionPolicyArgs instance
+        """
+        _ = args  # Unused, but kept for API consistency
+        return cls()
+
+
 @register_policy
 class ZeroActionPolicy(PolicyBase):
 
     name = "zero_action"
+    # enable from_dict() from policy_base.PolicyBase
+    config_class = ZeroActionPolicyArgs
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, config: ZeroActionPolicyArgs):
+        """
+        Initialize ZeroActionPolicy.
+
+        Args:
+            config: ZeroActionPolicyArgs configuration dataclass (optional, not used)
+        """
+        super().__init__(config)
 
     def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor:
         """
@@ -28,11 +61,32 @@ def get_action(self, env: gym.Env, observation: GymSpacesDict) -> torch.Tensor:
 
     @staticmethod
     def add_args_to_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
-        """Add zero action policy specific arguments to the parser."""
+        """
+        Add zero action policy specific arguments to the parser.
+
+        This policy has no configuration parameters, so no arguments are added.
+
+        Args:
+            parser: The argument parser to add arguments to
+
+        Returns:
+            The updated argument parser (unchanged)
+        """
         # No additional command line arguments for zero action policy
         return parser
 
     @staticmethod
     def from_args(args: argparse.Namespace) -> "ZeroActionPolicy":
-        """Create a zero action policy from the arguments."""
-        return ZeroActionPolicy()
+        """
+        Create a ZeroActionPolicy instance from parsed CLI arguments.
+
+        Path: CLI args → ConfigDataclass → init cls
+
+        Args:
+            args: Parsed command line arguments
+
+        Returns:
+            ZeroActionPolicy instance
+        """
+        config = ZeroActionPolicyArgs.from_cli_args(args)
+        return ZeroActionPolicy(config)
diff --git a/isaaclab_arena_environments/eval_jobs_configs/gr00t_jobs_config.json b/isaaclab_arena_environments/eval_jobs_configs/gr00t_jobs_config.json
@@ -4,7 +4,7 @@
             "name": "gr1_open_microwave_cracker_box",
             "arena_env_args": {
                 "enable_cameras": true,
-                "env_name":"gr1_open_microwave",
+                "environment":"gr1_open_microwave",
                 "object":"cracker_box",
                 "embodiment":"gr1_joint"
             },
@@ -19,7 +19,7 @@
             "name": "g1_locomanip_pick_and_place_brown_box",
             "arena_env_args": {
                 "enable_cameras": true,
-                "env_name":"galileo_g1_locomanip_pick_and_place",
+                "environment":"galileo_g1_locomanip_pick_and_place",
                 "object":"brown_box",
                 "embodiment":"g1_wbc_joint"
             },
diff --git a/isaaclab_arena_gr00t/policy/gr00t_closedloop_policy.py b/isaaclab_arena_gr00t/policy/gr00t_closedloop_policy.py
diff --git a/isaaclab_arena_gr00t/policy/replay_lerobot_action_policy.py b/isaaclab_arena_gr00t/policy/replay_lerobot_action_policy.py