OpenDriveLab
diff --git a/‎src/openpi/models/model.py‎
Lines changed: 2 additions & 0 deletions b/‎src/openpi/models/model.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/openpi/models/pi0_config.py‎
Lines changed: 28 additions & 0 deletions b/‎src/openpi/models/pi0_config.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎src/openpi/models/pi0_rtc.py‎
Lines changed: 1 addition & 1 deletion b/‎src/openpi/models/pi0_rtc.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/openpi/policies/agilex_policy.py‎
Lines changed: 2 additions & 2 deletions b/‎src/openpi/policies/agilex_policy.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/openpi/policies/arx_policy.py‎
Lines changed: 2 additions & 2 deletions b/‎src/openpi/policies/arx_policy.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/openpi/policies/droid_policy.py‎
Lines changed: 1 addition & 1 deletion b/‎src/openpi/policies/droid_policy.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/openpi/training/config.py‎
Lines changed: 20 additions & 3 deletions b/‎src/openpi/training/config.py‎
Lines changed: 20 additions & 3 deletions
diff --git a/‎stage_advantage/README.md‎
Lines changed: 4 additions & 0 deletions b/‎stage_advantage/README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎stage_advantage/awbc/README.md‎
Lines changed: 5 additions & 1 deletion b/‎stage_advantage/awbc/README.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎train_deploy_alignment/README.md‎
Lines changed: 11 additions & 0 deletions b/‎train_deploy_alignment/README.md‎
Lines changed: 11 additions & 0 deletions
@@ -33,6 +33,8 @@ class ModelType(enum.Enum):
     PI0 = "pi0"
     PI0_FAST = "pi0_fast"
     PI05 = "pi05"
+    PI0_RTC = "pi0_rtc"
+    PI05_RTC = "pi05_rtc"
 
 
 # The model always expects these images
 
@@ -13,6 +13,7 @@
 
 if TYPE_CHECKING:
     from openpi.models.pi0 import Pi0
+    from openpi.models.pi0_rtc import Pi0RTC
 
 
 @dataclasses.dataclass(frozen=True)
@@ -107,6 +108,33 @@ def get_freeze_filter(self) -> nnx.filterlib.Filter:
             return nnx.Nothing
         return nnx.All(*filters)
 
+
+@dataclasses.dataclass(frozen=True)
+class Pi0RTCConfig(Pi0Config):
+    """Config for Pi0RTC (real-time control) model. Uses same architecture as Pi0/Pi05 but sample_actions supports
+    prev_action_chunk, inference_delay, execute_horizon for RTC guidance. Use this config when serving
+    for RTC inference (e.g. agilex_inference_openpi_rtc.py). Set pi05=True for Pi05-based RTC (model_type PI05_RTC)."""
+
+    @property
+    @override
+    def model_type(self) -> _model.ModelType:
+        return _model.ModelType.PI05_RTC if self.pi05 else _model.ModelType.PI0_RTC
+
+    @override
+    def create(self, rng: at.KeyArrayLike) -> "Pi0RTC":
+        from openpi.models.pi0_rtc import Pi0RTC
+
+        return Pi0RTC(self, rngs=nnx.Rngs(rng))
+
+    @override
+    def load_pytorch(self, train_config, weight_path: str):
+        """RTC model is JAX-only; use a JAX checkpoint with serve_policy and Pi0RTCConfig."""
+        raise NotImplementedError(
+            "Pi0RTC is only supported with JAX checkpoints. Use a checkpoint saved from OpenPi JAX training "
+            "(params directory, not model.safetensors) and serve with --policy.config=pi05_rtc_flatten_fold_inference (or your RTC config name)."
+        )
+
+
 @dataclasses.dataclass(frozen=True)
 class AdvantageEstimatorConfig(Pi0Config):
     # * Custom
 
@@ -322,7 +322,7 @@ def rtc_step(carry):
             x_t_for_denoise = x_t
             if mask_prefix_delay and provided_dim > 0:
                 mask_time = (jnp.arange(self.action_horizon) < d).astype(bool)[None, :, None]
-                # 仅覆盖提供的维度，其余保持 x_t 原值
+                # Overwrite only the provided dims in the delay prefix; leave the rest as x_t.
                 overwrite = jnp.where(mask_time, prev_chunk[..., :provided_dim], x_t_for_denoise[..., :provided_dim])
                 x_t_for_denoise = x_t_for_denoise.at[..., :provided_dim].set(overwrite)
 
 
@@ -52,8 +52,8 @@ class AgilexInputs(transforms.DataTransformFn):
     mask_state: bool = False
 
     def __call__(self, data: dict) -> dict:
-        # We only mask padding for pi0 model, not pi0-FAST
-        mask_padding = self.model_type == _model.ModelType.PI0
+        # We only mask padding for pi0/pi0_rtc model, not pi05/pi05_rtc or pi0-FAST
+        mask_padding = self.model_type in (_model.ModelType.PI0, _model.ModelType.PI0_RTC)
 
         in_images = data["images"]
 
 
@@ -52,8 +52,8 @@ class ARXInputs(transforms.DataTransformFn):
     mask_state: bool = False
 
     def __call__(self, data: dict) -> dict:
-        # We only mask padding for pi0 model, not pi0-FAST
-        mask_padding = self.model_type == _model.ModelType.PI0
+        # We only mask padding for pi0/pi0_rtc model, not pi05/pi05_rtc or pi0-FAST
+        mask_padding = self.model_type in (_model.ModelType.PI0, _model.ModelType.PI0_RTC)
 
         in_images = data["images"]
 
 
@@ -45,7 +45,7 @@ def __call__(self, data: dict) -> dict:
         wrist_image = _parse_image(data["observation/wrist_image_left"])
 
         match self.model_type:
-            case _model.ModelType.PI0 | _model.ModelType.PI05:
+            case _model.ModelType.PI0 | _model.ModelType.PI05 | _model.ModelType.PI0_RTC | _model.ModelType.PI05_RTC:
                 names = ("base_0_rgb", "left_wrist_0_rgb", "right_wrist_0_rgb")
                 images = (base_image, wrist_image, np.zeros_like(base_image))
                 image_masks = (np.True_, np.True_, np.False_)
 
@@ -115,7 +115,7 @@ class ModelTransformFactory(GroupFactory):
 
     def __call__(self, model_config: _model.BaseModelConfig) -> _transforms.Group:
         match model_config.model_type:
-            case _model.ModelType.PI0:
+            case _model.ModelType.PI0 | _model.ModelType.PI0_RTC:
                 return _transforms.Group(
                     inputs=[
                         _transforms.InjectDefaultPrompt(self.default_prompt),
@@ -126,7 +126,7 @@ def __call__(self, model_config: _model.BaseModelConfig) -> _transforms.Group:
                         _transforms.PadStatesAndActions(model_config.action_dim),
                     ],
                 )
-            case _model.ModelType.PI05:
+            case _model.ModelType.PI05 | _model.ModelType.PI05_RTC:
                 assert isinstance(model_config, pi0_config.Pi0Config)
                 return _transforms.Group(
                     inputs=[
@@ -187,7 +187,7 @@ def create_base_config(self, assets_dirs: pathlib.Path, model_config: _model.Bas
             repo_id=repo_id,
             asset_id=asset_id,
             norm_stats=self._load_norm_stats(epath.Path(self.assets.assets_dir or assets_dirs), asset_id),
-            use_quantile_norm=model_config.model_type != ModelType.PI0,
+            use_quantile_norm=model_config.model_type not in (ModelType.PI0, ModelType.PI0_RTC),
         )
 
     def _load_norm_stats(self, assets_dir: epath.Path, asset_id: str | None) -> dict[str, _transforms.NormStats] | None:
@@ -1371,6 +1371,23 @@ def __post_init__(self) -> None:
         num_workers=8, 
         batch_size=256, 
     ),
+
+    #**************************FlattenFold RTC Inference*******************************
+    # Use this config when serving the policy for agilex_inference_openpi_rtc.py (JAX checkpoints only).
+    TrainConfig(
+        name="pi05_rtc_flatten_fold_inference",
+        model=pi0_config.Pi0RTCConfig(pi05=True),
+        data=LerobotAgilexDataConfig(
+            repo_id="<path_to_repo_root>/data/FlattenFold/base",
+            default_prompt="Flatten and fold the cloth.",
+            use_delta_joint_actions=False,
+        ),
+        weight_loader=weight_loaders.CheckpointWeightLoader("<path_to/pi05_base/checkpoint>"),
+        num_train_steps=100_000,
+        keep_period=5000,
+        num_workers=8,
+        batch_size=256,
+    ),
     # RoboArena & PolaRiS configs.
     *roboarena_config.get_roboarena_configs(),
     *polaris_config.get_polaris_configs(),
 
@@ -22,6 +22,8 @@ This module implements a pipeline for training an **Advantage Estimator** and us
 
 **End-to-end order for AWBC:** (1) Stage 0 on data with `progress` → optional for Stage 1. (2) Stage 1 → train estimator. (3) Stage 2 → run eval on your dataset so it gets `data_PI06_100000/` or `data_KAI0_100000/` with advantage columns. (4) Run Stage 0 again with `--advantage-source absolute_advantage` on that dataset (e.g. via `gt_labeling.sh` with `DATA_PATH` = the repo you ran eval on, and source subdirs `data_PI06_100000` / `data_KAI0_100000`). (5) Point AWBC config `repo_id` at the resulting advantage-labeled directory and run Stage 3 training.
 
+**Pre-annotated data:** The downloaded dataset includes **`data/Task_A/advantage`**, a fully annotated advantage dataset that can be used **directly for AWBC training** (Stage 3) without running Stage 0–2. Set the AWBC config `repo_id` to that path and run training.
+
 ---
 
 ## Stage 0: GT Data Labeling
@@ -287,6 +289,8 @@ So during AWBC training the model is conditioned on prompts that explicitly incl
 
 At **inference** time you must use the **same prompt format** as in training. To run the policy in the high-advantage regime, pass the **positive**-advantage prompt, e.g. `"<task>, Advantage: positive"` (with the same `<task>` wording as in your `tasks.jsonl`). Using a different format or omitting the advantage part can hurt performance, since the model was trained to condition on this exact style of prompt.
 
+**Where to set the prompt when deploying:** The language prompt is set in the **inference code** (e.g. the `lang_embeddings` variable in the Agilex inference scripts). See the [train_deploy_alignment/inference README](../train_deploy_alignment/inference/README.md) and [Agilex README — Prompt and AWBC](../train_deploy_alignment/inference/agilex/README.md#prompt-and-awbc-important) for how to configure it so it matches your training and, for AWBC, uses the positive-advantage format above.
+
 ### How it works (data flow)
 
 1. **Data**: The advantage dataset must contain `task_index` in each parquet and `meta/tasks.jsonl` mapping `task_index` → prompt string. This is produced by running Stage 2 (eval) to get advantage columns, then Stage 0 (`gt_label.py --advantage-source absolute_advantage`) to discretize into `task_index` and write `tasks.jsonl`.
 
@@ -17,7 +17,11 @@ Each uses `base_config=DataConfig(prompt_from_task=True)` so that the dataset’
 ## Prerequisites
 
 1. **Advantage dataset**  
-   The data must have `task_index` in each parquet and `meta/tasks.jsonl` (prompt strings per `task_index`). To build it:
+   The data must have `task_index` in each parquet and `meta/tasks.jsonl` (prompt strings per `task_index`).
+
+   **Pre-annotated data:** The downloaded dataset includes **`data/Task_A/advantage`**, a fully annotated advantage dataset that can be used **directly for AWBC training** (no need to run Stage 0–2 first). Set the AWBC config `repo_id` to that path and run the training commands below.
+
+   To build your own advantage dataset instead:
    - Run **Stage 2** (eval) on your dataset → get `data_PI06_100000/` or `data_KAI0_100000/` with advantage columns.
    - Run **Stage 0** on that output: `gt_label.py --advantage-source absolute_advantage` (or `gt_labeling.sh` with `DATA_PATH` = the eval repo). The resulting directory (with `data/`, `meta/tasks.jsonl`, `videos/`) is your advantage dataset.
    - Place or link it at e.g. `./data/FlattenFold/advantage` and set `repo_id` in config to that path.
 
@@ -0,0 +1,11 @@
+# Train–Deploy Alignment
+
+This directory contains three modules used to align training data and deployment/inference:
+
+| Module | Description |
+|--------|-------------|
+| **dagger** | DAgger-style data collection (policy-in-the-loop, intervention, save). See [dagger/README.md](dagger/README.md) for ARX and Agilex. |
+| **inference** | Deployment and inference code, including ARX, Agilex. |
+| **data_augment** | Data augmentation and format conversion (time scaling, space mirroring, HDF5 → LeRobot). See [data_augment/README.md](data_augment/README.md). |
+
+See each module’s README for setup and usage.