rai-opensource
diff --git a/‎.vscode/launch.json‎
Lines changed: 15 additions & 0 deletions b/‎.vscode/launch.json‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎…ples/exporter_scripts/export_isaaclab.py‎ ‎…rter_scripts/isaaclab/export_isaaclab.py‎examples/exporter_scripts/export_isaaclab.py renamed to examples/exporter_scripts/isaaclab/export_isaaclab.py
Lines changed: 14 additions & 7 deletions b/‎…ples/exporter_scripts/export_isaaclab.py‎ ‎…rter_scripts/isaaclab/export_isaaclab.py‎examples/exporter_scripts/export_isaaclab.py renamed to examples/exporter_scripts/isaaclab/export_isaaclab.py
Lines changed: 14 additions & 7 deletions
diff --git a/‎exploy.code-workspace‎
Lines changed: 2 additions & 1 deletion b/‎exploy.code-workspace‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎exploy/exporter/core/actor.py‎
Lines changed: 103 additions & 0 deletions b/‎exploy/exporter/core/actor.py‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎exploy/exporter/core/evaluator.py‎
Lines changed: 20 additions & 9 deletions b/‎exploy/exporter/core/evaluator.py‎
Lines changed: 20 additions & 9 deletions
diff --git a/‎exploy/exporter/core/exporter.py‎
Lines changed: 1 addition & 2 deletions b/‎exploy/exporter/core/exporter.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎exploy/exporter/core/session_wrapper.py‎
Lines changed: 8 additions & 8 deletions b/‎exploy/exporter/core/session_wrapper.py‎
Lines changed: 8 additions & 8 deletions
@@ -13,5 +13,20 @@
             "python": "${workspaceFolder}/.pixi/envs/isaaclab/bin/python",
             "justMyCode": false
         },
+        {
+            "name": "[Core] Tests.",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "pytest",
+            "args": [
+                "exploy/exporter/core/tests/test_export_environment.py"
+            ],
+            "console": "integratedTerminal",
+            "python": "${workspaceFolder}/.pixi/envs/core/bin/python",
+            "justMyCode": false,
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            }
+        },
     ]
 }
@@ -55,12 +55,19 @@ def make_simulation_app() -> tuple[SimulationApp, argparse.Namespace]:
 from isaaclab.sim import SimulationContext
 from isaaclab_rl.rsl_rl import RslRlVecEnvWrapper
 from isaaclab_tasks.utils import parse_env_cfg
+from rsl_rl.algorithms.ppo import PPO
 from rsl_rl.runners import OnPolicyRunner
 
 from exploy.exporter.core.evaluator import evaluate
 from exploy.exporter.core.exporter import export_environment_as_onnx
 from exploy.exporter.core.session_wrapper import SessionWrapper
-from exploy.exporter.frameworks.isaaclab import inputs, memory, outputs
+from exploy.exporter.frameworks.isaaclab import (
+    environments,  # noqa: F401
+    inputs,
+    memory,
+    outputs,
+)
+from exploy.exporter.frameworks.isaaclab.actor import make_exportable_actor
 from exploy.exporter.frameworks.isaaclab.env import IsaacLabExportableEnvironment
 
 
@@ -82,17 +89,17 @@ def export_isaaclab(
     env = RslRlVecEnvWrapper(gym.make(task_name, cfg=env_cfg, render_mode=None))
     runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=test_dir, device=agent_cfg.device)
 
-    # Get the policy and its normalizer.
-    policy = runner.alg.policy.actor.to(env.device)
-    normalizer = runner.alg.policy.actor_obs_normalizer.to(env.device)
-    actor = torch.nn.Sequential(normalizer, policy).eval()
-
     # Export to ONNX.
     onnx_export_dir = test_dir
     onnx_export_file = "test_export.onnx"
 
     exportable_env = IsaacLabExportableEnvironment(env.unwrapped)
 
+    # Get the policy and its normalizer.
+    alg: PPO = runner.alg
+    assert isinstance(alg, PPO), f"Expected PPO algorithm, got: {type(alg).__name__}"
+    actor = make_exportable_actor(exportable_env, alg.policy, device=task_device)
+
     articulations = env.unwrapped.scene.articulations
     context_manager = exportable_env.context_manager()
 
@@ -145,7 +152,7 @@ def export_isaaclab(
     session_wrapper = SessionWrapper(
         onnx_folder=onnx_export_dir,
         onnx_file_name=onnx_export_file,
-        policy=actor,
+        actor=actor,
         optimize=True,
     )
 
 
@@ -10,7 +10,8 @@
 			"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaaclab/source/isaaclab",
 			"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaaclab/source/isaaclab_rl",
 			"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaaclab/source/isaaclab_tasks",
-			"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaacsim/exts/isaacsim.core.utils"
+			"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaacsim/exts/isaacsim.core.utils",
+			"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages",
 		]
 	}
 }
@@ -0,0 +1,103 @@
+# Copyright (c) 2026 Robotics and AI Institute LLC dba RAI Institute. All rights reserved.
+
+import abc
+from collections.abc import Callable
+
+import torch
+
+from exploy.exporter.core.components import Connection, Memory
+from exploy.exporter.core.context_manager import ContextManager
+
+
+class ExportableActor(torch.nn.Module, abc.ABC):
+    """Abstract interface for an actor that can be exported to ONNX."""
+
+    def __init__(self):
+        super().__init__()
+
+    @abc.abstractmethod
+    def forward(self, obs: torch.Tensor) -> torch.Tensor:
+        """Given a batch of observations, compute the corresponding actions.
+
+        Args:
+            obs: A tensor of shape (batch_size, obs_dim) containing the observations."""
+        raise NotImplementedError("forward() method must be implemented by subclasses.")
+
+    def reset(self, dones: torch.Tensor):
+        """Reset the actor's internal state (e.g., RNN hidden states) based on the done flags.
+
+        Args:
+            dones: A tensor of shape (batch_size,) containing boolean flags indicating which
+                   environments have been reset.
+        """
+        pass
+
+    def get_state(self) -> tuple[torch.Tensor, ...] | None:
+        """Get the actor's internal state as a tuple of tensors, or None if there is no state."""
+        return None
+
+
+def make_exportable_actor(actor: torch.nn.Module) -> ExportableActor:
+    """Convert a torch.nn.Module actor to an ExportableActor.
+
+    Args:
+        actor: The actor to convert.
+    """
+
+    class Actor(ExportableActor):
+        def __init__(self, actor: torch.nn.Module):
+            super().__init__()
+            self._actor = actor
+
+        def forward(self, obs: torch.Tensor) -> torch.Tensor:
+            return self._actor(obs)
+
+    return Actor(actor)
+
+
+def add_actor_memory(
+    context_manager: ContextManager,
+    get_hidden_states_func: Callable[[], tuple[torch.Tensor, ...]],
+):
+    """Add inputs for actor hidden states.
+
+    Args:
+        context_manager: The context manager to add the inputs to.
+        get_hidden_states_func: A function that returns a tuple of hidden state tensors, used to get the hidden states to add as inputs.
+    """
+    actor_state = get_hidden_states_func()
+    if actor_state is None:
+        return
+
+    assert isinstance(actor_state, tuple), (
+        f"Expected actor hidden states to be a tuple of tensors, got: {type(actor_state).__name__}"
+    )
+
+    for i_hs in range(len(actor_state)):
+
+        def get_hidden_state(
+            _i_hs: int = i_hs,
+            _get_cb: Callable = get_hidden_states_func,
+        ) -> torch.Tensor:
+            return _get_cb()[_i_hs]
+
+        def set_hidden_state(
+            value: torch.Tensor,
+            _i_hs: int = i_hs,
+            _get_cb: Callable = get_hidden_states_func,
+        ):
+            _get_cb()[_i_hs][:] = value
+
+        component_name = f"actor_hidden_state_{i_hs}"
+        memory_comp = Memory(
+            name=component_name,
+            get_from_env_cb=get_hidden_state,
+        )
+        context_manager.add_component(memory_comp)
+        context_manager.add_component(
+            Connection(
+                name=f"connection_{component_name}",
+                getter=memory_comp.get_from_env_cb,
+                setter=set_hidden_state,
+            )
+        )
@@ -184,25 +184,23 @@ def evaluate(
     context_manager: ContextManager,
     session_wrapper: SessionWrapper,
     num_steps: int,
-    observations: torch.Tensor | None = None,
     verbose: bool = True,
     reset_from_onnx_counter_steps: int = 50,
     atol: float = 1.0e-5,
     rtol: float = 1.0e-5,
     pause_on_failure: bool = True,
 ) -> tuple[bool, torch.Tensor]:
-    """Evaluate an ONNX exported model against the original IsaacLab environment and torch policy.
+    """Evaluate an ONNX exported model against an `ExportableEnvironment` stepped through a `SessionWrapper`.
 
     This function runs the simulation for a specified number of steps and compares the
-    outputs of the ONNX model with the environment's state and the original torch model's
-    outputs at each step. This is useful for verifying the correctness of the ONNX export.
+    outputs of the ONNX model with the environment's state and actor's actions at each step.
+    This is useful for verifying the correctness of the ONNX export.
 
     Args:
         env: The environment to run the evaluation in.
         context_manager: The context manager handling inputs and outputs.
         session_wrapper: An ONNX session wrapper.
         num_steps: The number of steps to run the evaluation for.
-        observations: The initial observations. If None, the environment is reset. Defaults to None.
         verbose: Whether to print verbose output during evaluation. Defaults to True.
         reset_from_onnx_counter_steps: Set after how many steps we should set memory inputs from ONNX instead of using
             the environment's state.
@@ -220,7 +218,15 @@ def evaluate(
         the final observations tensor.
     """
 
-    obs = observations.clone() if observations is not None else env.observations_reset()
+    # Reset both the environment and the actor.
+    obs = env.observations_reset()
+
+    actor = session_wrapper.get_actor()
+    if actor is None:
+        raise ValueError(
+            "Session wrapper has no actor. Cannot evaluate ONNX model without access to original actor for comparison."
+        )
+    actor.reset(torch.tensor([True], device=obs.device))
 
     # Print ONNX graph structure if verbose
     if verbose:
@@ -259,9 +265,10 @@ def reset():
     )
 
     # Compute actions for the initial observations.
-    env_actions: torch.Tensor = session_wrapper.get_torch_model()(obs)
+    env_actions: torch.Tensor = actor(obs)
 
     reset_memory_from_env = False
+    env.context_manager().read_inputs()
 
     while step_ctr < num_steps:
         reset_memory_from_env = (
@@ -270,15 +277,16 @@ def reset():
         next_obs, is_reset_step = env.step(env_actions)
         # Use the environment's observations for the next step.
         obs[:] = next_obs
-        # Compute actions from the new observations.
-        env_actions = session_wrapper.get_torch_model()(obs)
 
         # Check if the environment was reset.
         if is_reset_step:
             # Re-read the ONNX inputs from the environment after a reset to avoid mismatch between
             # ONNX inputs and environment state after reset.
             env.context_manager().read_inputs()
 
+            # Reset the actor state.
+            actor.reset(torch.tensor([is_reset_step], device=env_actions.device))
+
             # We need to reset the memory inputs from the environment after a reset.
             reset_memory_from_env = True
 
@@ -328,6 +336,9 @@ def reset():
             for component in context_manager.get_output_components()
         }
 
+        # Compute actions from the new observations.
+        env_actions = actor(obs)
+
         # Compare outputs from environment and ONNX model.
         step_export_ok, msg = _compare_step_outputs(
             env_obs=obs,
 
@@ -1,6 +1,5 @@
 # Copyright (c) 2026 Robotics and AI Institute LLC dba RAI Institute. All rights reserved.
 
-import copy
 import datetime
 import json
 import os
@@ -80,7 +79,7 @@ def __init__(
         self._env: ExportableEnvironment = env
 
         self.verbose = verbose
-        self.actor = copy.deepcopy(actor)
+        self.actor = actor
 
         self.export_mode = ExportMode.Default
 
 
@@ -5,9 +5,9 @@
 import numpy as np
 import onnx
 import onnxruntime as ort
-import torch
 from onnx import helper
 
+from exploy.exporter.core.actor import ExportableActor
 from exploy.exporter.core.utils.paths import prepare_onnx_paths
 
 
@@ -18,15 +18,15 @@ def __init__(
         self,
         onnx_folder: pathlib.Path,
         onnx_file_name: str,
-        policy: torch.nn.Module | None = None,
+        actor: ExportableActor | None = None,
         optimize: bool = True,
     ):
         """Construct a `SessionWrapper` to use it for policy inference.
 
         Args:
             onnx_folder: The folder containing an ONNX file to load.
             onnx_file_name: The name of the ONNX file contained in `ONNX_folder`.
-            policy: A `torch.nn.Module` representing the actor.
+            actor: An `ExportableActor` representing the actor.
             optimize: If true, optimize the ONNX graph, save it to file, and use it for inference.
         """
         # Prepare file paths
@@ -59,7 +59,7 @@ def __init__(
         self.session = session
         self.input_names = [inp.name for inp in session.get_inputs()]
         self.output_names = [val.name for val in session.get_outputs()]
-        self._policy = policy
+        self._actor = actor
         self.metadata = session.get_modelmeta()
 
         self._results = None
@@ -81,13 +81,13 @@ def __call__(self, **kwargs):
         self._results = self.session.run(self.output_names, in_kwargs)
         return self._results
 
-    def get_torch_model(self) -> torch.nn.Module:
-        """Get the original torch policy model.
+    def get_actor(self) -> ExportableActor | None:
+        """Get the original `ExportableActor` object used by this session wrapper.
 
         Returns:
-            The torch.nn.Module representing the policy, or None if not provided.
+            The `ExportableActor` representing the actor, or None if not provided.
         """
-        return self._policy
+        return self._actor
 
     def get_output_value(self, output_name: str):
         """Get a specific output value from the last inference run.
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,8 @@`
`10`	`10`	`"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaaclab/source/isaaclab",`
`11`	`11`	`"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaaclab/source/isaaclab_rl",`
`12`	`12`	`"${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaaclab/source/isaaclab_tasks",`
`13`		`- "${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaacsim/exts/isaacsim.core.utils"`
	`13`	`+ "${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages/isaacsim/exts/isaacsim.core.utils",`
	`14`	`+ "${workspaceFolder}/.pixi/envs/isaaclab/lib/python3.11/site-packages",`
`14`	`15`	`]`
`15`	`16`	`}`
`16`	`17`	`}`