Add descriptive param names to skill factory options for LLM planner

yichao-liang · yichao-liang · commit ea06eb3dd403 · 2026-03-07T19:04:11.000Z
Update continuous parameter descriptions in pick, place, push, and pour
skill factories so the LLM planner sees informative names like
"approach_distance (dist behind target along facing dir to start push)"
instead of terse names like "offset_x". Also update docstrings and
test comments to match.
diff --git a/predicators/approaches/agent_option_learning_approach.py b/predicators/approaches/agent_option_learning_approach.py
@@ -40,8 +40,8 @@ class AgentOptionLearningApproach(AgentPlannerApproach):
 
     def __init__(self, initial_predicates: Set[Predicate],
                  initial_options: Set[ParameterizedOption], types: Set[Type],
-                 action_space: Box, train_tasks: List[Task],
-                 *args: Any, **kwargs: Any) -> None:
+                 action_space: Box, train_tasks: List[Task], *args: Any,
+                 **kwargs: Any) -> None:
         # Agent-specific state (before super().__init__)
         self._agent_proposed_options: Set[ParameterizedOption] = set()
 
@@ -82,13 +82,13 @@ def _get_agent_system_prompt(self) -> str:
 Continuous params: `(grasp_z_offset,)`.
 - `create_place_skill(name, types, config)` — place a held object \
 (move above, descend, release, retreat). No get_target_pose_fn; \
-target comes from continuous params: `(x, y, yaw, drop_z)`.
+target comes from continuous params: `(target_x, target_y, target_yaw, release_z)`.
 - `create_push_skill(name, types, config, get_target_pose_fn)` — \
 push with standard 4-waypoint trajectory. Requires \
 `config.robot_home_pos` to be set. Facing direction is \
 `(sin(yaw), cos(yaw))` from `get_target_pose_fn`. \
-Continuous params: `(offset_x, offset_z, offset_rot, \
-push_through_frac)`.
+Continuous params: `(approach_distance, contact_z_offset, \
+ee_yaw_offset, push_through_frac)`.
 - `create_pour_skill(name, types, config, get_target_pose_fn, \
 tilt_terminal_fn=None)` — pour from a held container \
 (move above, descend, tilt). Continuous params: `(pour_tilt,)`.
@@ -135,21 +135,21 @@ def _get_agent_tool_names(self) -> Optional[List[str]]:
     def _get_sandbox_reference_files(self) -> Dict[str, str]:
         return {
             "skill_factories/base.py":
-                "predicators/ground_truth_models/skill_factories/base.py",
+            "predicators/ground_truth_models/skill_factories/base.py",
             "skill_factories/__init__.py":
-                "predicators/ground_truth_models/skill_factories/__init__.py",
+            "predicators/ground_truth_models/skill_factories/__init__.py",
             "skill_factories/pick.py":
-                "predicators/ground_truth_models/skill_factories/pick.py",
+            "predicators/ground_truth_models/skill_factories/pick.py",
             "skill_factories/move_to.py":
-                "predicators/ground_truth_models/skill_factories/move_to.py",
+            "predicators/ground_truth_models/skill_factories/move_to.py",
             "skill_factories/place.py":
-                "predicators/ground_truth_models/skill_factories/place.py",
+            "predicators/ground_truth_models/skill_factories/place.py",
             "skill_factories/push.py":
-                "predicators/ground_truth_models/skill_factories/push.py",
+            "predicators/ground_truth_models/skill_factories/push.py",
             "skill_factories/pour.py":
-                "predicators/ground_truth_models/skill_factories/pour.py",
+            "predicators/ground_truth_models/skill_factories/pour.py",
             "skill_factories/wait.py":
-                "predicators/ground_truth_models/skill_factories/wait.py",
+            "predicators/ground_truth_models/skill_factories/wait.py",
         }
 
     # ------------------------------------------------------------------ #
@@ -163,8 +163,8 @@ def _get_all_options(self) -> Set[ParameterizedOption]:
         # Also include iteration_proposals.proposed_options as a fallback
         # in case the Docker sync to tool_context.options was incomplete.
         proposal_opts = self._tool_context.iteration_proposals.proposed_options
-        result = (self._initial_options | self._agent_proposed_options |
-                  self._tool_context.options | proposal_opts)
+        result = (self._initial_options | self._agent_proposed_options
+                  | self._tool_context.options | proposal_opts)
         if not result:
             logging.warning(
                 "_get_all_options() returning empty set. "
@@ -195,10 +195,10 @@ def _sync_tool_context(self) -> None:
     def _build_skill_factory_context(self) -> Dict[str, Any]:
         """Build exec context with skill factory functions for
         propose_options."""
-        from predicators.ground_truth_models.skill_factories import (
-            Phase, PhaseAction, PhaseSkill, SkillConfig, create_move_to_skill,
-            create_pick_skill, create_place_skill, create_pour_skill,
-            create_push_skill, create_wait_option, make_move_to_phase)
+        from predicators.ground_truth_models.skill_factories import Phase, \
+            PhaseAction, PhaseSkill, SkillConfig, create_move_to_skill, \
+            create_pick_skill, create_place_skill, create_pour_skill, \
+            create_push_skill, create_wait_option, make_move_to_phase
 
         context: Dict[str, Any] = {
             # Skill factory functions
@@ -296,8 +296,8 @@ def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]:
         policy = super()._solve(task, timeout)
 
         # Snapshot agent-proposed options (everything beyond initial)
-        self._agent_proposed_options = (
-            self._tool_context.options - self._initial_options)
+        self._agent_proposed_options = (self._tool_context.options -
+                                        self._initial_options)
 
         # Record iteration summary (options only)
         proposals = self._tool_context.iteration_proposals
diff --git a/predicators/ground_truth_models/skill_factories/__init__.py b/predicators/ground_truth_models/skill_factories/__init__.py
@@ -16,17 +16,19 @@
 
 Shared signature pattern
 ------------------------
-All factory functions (except ``create_wait_option``) share the same first
-four arguments::
+Most factory functions share the same first three arguments::
 
     create_<X>_skill(
         name: str,            # Option name for logging/matching
         types: Sequence[Type],# Object types (robot first)
-        params_space: Box,    # Continuous parameter space
         config: SkillConfig,  # Shared environment configuration
         ...                   # Skill-specific arguments
     )
 
+Each factory builds its ``params_space`` internally from canonical parameter
+definitions (e.g. ``_PICK_PARAMS``, ``_PLACE_PARAMS``).  The exception is
+``create_move_to_skill``, which takes an explicit ``params_space`` argument.
+
 ``create_wait_option`` uses ``(name, config, robot_type)`` since it always
 operates on a single robot type with no parameters.
 
diff --git a/predicators/ground_truth_models/skill_factories/pick.py b/predicators/ground_truth_models/skill_factories/pick.py
@@ -54,7 +54,7 @@ def _get_jug_pose(state, objects, params, config):
 
 # Canonical continuous parameters for Pick.
 _PICK_PARAMS = [
-    ("grasp_z_offset", 0.0, 0.1),
+    ("grasp_z_offset (height above object origin to close gripper)", 0.0, 0.1),
 ]
 
 
@@ -140,5 +140,9 @@ def _descend_pose(
         make_move_to_phase("Lift", _above_pose, "closed"),
     ]
 
-    return PhaseSkill(name, types, params_space, config, phases,
+    return PhaseSkill(name,
+                      types,
+                      params_space,
+                      config,
+                      phases,
                       params_description=params_description).build()
diff --git a/predicators/ground_truth_models/skill_factories/place.py b/predicators/ground_truth_models/skill_factories/place.py
@@ -4,14 +4,15 @@
 ``ParameterizedOption`` that places a held object by:
 
   1. Moving above the placement target at ``config.transport_z``.
-  2. Descending to ``drop_z`` (from params).
+  2. Descending to ``release_z`` (from params).
   3. Opening the gripper to release.
   4. Retreating back up to ``config.transport_z``.
 
-The placement target ``(x, y, yaw)`` and ``drop_z`` are all provided as
-continuous parameters -- no callback is needed.
+The placement target ``(target_x, target_y, target_yaw)`` and
+``release_z`` are all provided as continuous parameters -- no callback
+is needed.
 
-Continuous parameters: ``(x, y, yaw, drop_z)``
+Continuous parameters: ``(target_x, target_y, target_yaw, release_z)``
 
 Example::
 
@@ -28,20 +29,20 @@
 
 from typing import Sequence, Tuple
 
+import numpy as np
+
 from predicators.ground_truth_models.skill_factories.base import Phase, \
     PhaseAction, PhaseSkill, SkillConfig, build_params_space
 from predicators.ground_truth_models.skill_factories.move_to import \
     make_move_to_phase
 from predicators.structs import Array, Object, ParameterizedOption, State, Type
 
-import numpy as np
-
 # Canonical continuous parameters for Place.
 _PLACE_PARAMS = [
-    ("x", 0.4, 1.1),
-    ("y", 1.1, 1.6),
-    ("yaw", -np.pi, np.pi),
-    ("drop_z", 0.4, 0.6),
+    ("target_x (world x position for placement)", 0.4, 1.1),
+    ("target_y (world y position for placement)", 1.1, 1.6),
+    ("target_yaw (placement orientation in radians)", -np.pi, np.pi),
+    ("release_z (world z height to open gripper)", 0.4, 0.6),
 ]
 
 
@@ -55,15 +56,15 @@ def create_place_skill(
     Phases:
         0. **MoveAbove** -- Move end-effector above the placement at
            ``config.transport_z``, with fingers closed.
-        1. **Descend** -- Lower to ``drop_z`` (from params), with fingers
-           closed.
+        1. **Descend** -- Lower to ``release_z`` (from params), with
+           fingers closed.
         2. **OpenFingers** -- Open the gripper to release the object.
         3. **Retreat** -- Rise back to ``config.transport_z``, with fingers
            open.
 
     Continuous parameters:
-        ``(x, y, yaw, drop_z)`` -- placement position, orientation, and
-        release height.
+        ``(target_x, target_y, target_yaw, release_z)`` -- placement
+        position, orientation, and release height.
 
     Args:
         name: Option name used for logging and matching.
@@ -110,7 +111,7 @@ def _drop_pose(
     phases = [
         # Phase 0: Move above placement
         make_move_to_phase("MoveAbove", _above_pose, "closed"),
-        # Phase 1: Descend to drop height
+        # Phase 1: Descend to release height
         make_move_to_phase("Descend", _drop_pose, "closed"),
         # Phase 2: Open fingers to release
         Phase(
@@ -122,5 +123,9 @@ def _drop_pose(
         make_move_to_phase("Retreat", _above_pose, "open"),
     ]
 
-    return PhaseSkill(name, types, params_space, config, phases,
+    return PhaseSkill(name,
+                      types,
+                      params_space,
+                      config,
+                      phases,
                       params_description=params_description).build()
diff --git a/predicators/ground_truth_models/skill_factories/pour.py b/predicators/ground_truth_models/skill_factories/pour.py
@@ -47,7 +47,7 @@ def _get_pour_pose(state, objects, params, config):
 
 # Canonical continuous parameters for Pour.
 _POUR_PARAMS = [
-    ("pour_tilt", 0.5, 1.0),
+    ("pour_tilt (EE tilt angle for pouring, radians)", 0.5, 1.0),
 ]
 
 
@@ -110,8 +110,8 @@ def _tilt_target(
     ) -> Tuple[Pose, Pose, str]:
         pour_tilt = float(params[0])
         robot_obj = objects[0]
-        current_position = (state.get(robot_obj, "x"),
-                            state.get(robot_obj, "y"),
+        current_position = (state.get(robot_obj,
+                                      "x"), state.get(robot_obj, "y"),
                             state.get(robot_obj, "z"))
         current_orn = p.getQuaternionFromEuler(
             [0, state.get(robot_obj, "tilt"),
@@ -137,5 +137,9 @@ def _tilt_target(
         ),
     ]
 
-    return PhaseSkill(name, types, params_space, config, phases,
+    return PhaseSkill(name,
+                      types,
+                      params_space,
+                      config,
+                      phases,
                       params_description=params_description).build()
diff --git a/predicators/ground_truth_models/skill_factories/push.py b/predicators/ground_truth_models/skill_factories/push.py
@@ -6,8 +6,8 @@
 
   1. Closing the gripper.
   2. Moving above & behind the target at ``config.transport_z``.
-  3. Descending to contact height (target z + ``offset_z``).
-  4. Pushing through the target (``push_through_frac * offset_x``
+  3. Descending to contact height (target z + ``contact_z_offset``).
+  4. Pushing through the target (``push_through_frac * approach_distance``
      past the target along its facing direction).
   5. Retreating to ``config.robot_home_pos``.
   6. Opening the gripper.
@@ -18,7 +18,7 @@
 
 ``config.robot_home_pos`` **must** be set.
 
-Continuous parameters: ``(offset_x, offset_z, offset_rot, push_through_frac)``
+Continuous parameters: ``(approach_distance, contact_z_offset, ee_yaw_offset, push_through_frac)``
 
 Example::
 
@@ -61,10 +61,13 @@ def _get_domino_pose(state, objects, params, config):
 
 # Canonical continuous parameters for Push.
 _PUSH_PARAMS = [
-    ("offset_x", 0.03, 0.08),
-    ("offset_z", 0.0, 0.12),
-    ("offset_rot", -np.pi, np.pi),
-    ("push_through_frac", 0.0, 0.3),
+    ("approach_distance (dist behind target along facing dir to start push)",
+     0.03, 0.08),
+    ("contact_z_offset (height above target z for contact)", 0.0, 0.12),
+    ("ee_yaw_offset (EE rotation offset from target yaw, radians)", -np.pi,
+     np.pi),
+    ("push_through_frac (fraction of approach_distance to push past target)",
+     0.0, 0.3),
 ]
 
 
@@ -79,17 +82,18 @@ def create_push_skill(
     Phases:
         0. **CloseFingers** -- Close the gripper before approaching.
         1. **Waypoint_0** -- Move above & behind the target at
-           ``config.transport_z``, offset by ``offset_x`` opposite the
-           facing direction.
+           ``config.transport_z``, offset by ``approach_distance``
+           opposite the facing direction.
         2. **Waypoint_1** -- Descend to contact height
-           (target z + ``offset_z``) at the same behind position.
+           (target z + ``contact_z_offset``) at the same behind position.
         3. **Waypoint_2** -- Push forward through the target by
-           ``offset_x * push_through_frac`` along the facing direction.
+           ``approach_distance * push_through_frac`` along the facing
+           direction.
         4. **Waypoint_3** -- Retreat to ``config.robot_home_pos``.
         5. **OpenFingers** -- Open the gripper.
 
     Continuous parameters:
-        ``(offset_x, offset_z, offset_rot, push_through_frac)``
+        ``(approach_distance, contact_z_offset, ee_yaw_offset, push_through_frac)``
 
     Args:
         name: Option name used for logging and matching.
@@ -112,8 +116,14 @@ def create_push_skill(
     # -- Standard 4-waypoint trajectory ----------------------------------
 
     def _waypoints(
-        ox: float, oy: float, oz: float, oyaw: float, cfg: SkillConfig,
-        s_offset_x: float, s_offset_z: float, s_offset_rot: float,
+        ox: float,
+        oy: float,
+        oz: float,
+        oyaw: float,
+        cfg: SkillConfig,
+        s_offset_x: float,
+        s_offset_z: float,
+        s_offset_rot: float,
         s_push_frac: float,
     ) -> List[Tuple[float, float, float, float, str]]:
         assert cfg.robot_home_pos is not None
@@ -199,5 +209,9 @@ def _get_target(
               action_type=PhaseAction.CHANGE_FINGERS,
               target_fn=_open_fingers_target))
 
-    return PhaseSkill(name, types, params_space, config, phases,
+    return PhaseSkill(name,
+                      types,
+                      params_space,
+                      config,
+                      phases,
                       params_description=params_description).build()
diff --git a/tests/test_skill_factories.py b/tests/test_skill_factories.py