Skip to content

Commit ac49cb1

Browse files
committed
Move system prompt from agent_sdk/ to abstraction learning approach
The system prompt and iteration message builder were only used by AgentAbstractionLearningApproach, so move them into that file and delete the standalone module.
1 parent 66625f6 commit ac49cb1

2 files changed

Lines changed: 172 additions & 181 deletions

File tree

predicators/agent_sdk/system_prompt.py

Lines changed: 0 additions & 175 deletions
This file was deleted.

predicators/approaches/agent_abstraction_learning_approach.py

Lines changed: 172 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
from predicators import utils
1616
from predicators.agent_sdk.proposal_parser import ProposalBundle, \
1717
build_exec_context, exec_code_safely
18-
from predicators.agent_sdk.system_prompt import build_iteration_message, \
19-
build_system_prompt
2018
from predicators.approaches.agent_planner_approach import AgentPlannerApproach
2119
from predicators.approaches.agent_session_mixin import AgentSessionMixin
2220
from predicators.approaches.pp_online_process_learning_approach import \
@@ -87,11 +85,8 @@ def _get_log_dir(self) -> str:
8785
"""Use the mixin's simple log dir (no run_id subdirectory)."""
8886
return AgentSessionMixin._get_log_dir(self)
8987

90-
def _get_agent_model_name(self) -> str:
91-
return CFG.agent_sdk_model_name
92-
9388
def _get_agent_system_prompt(self) -> str:
94-
return build_system_prompt()
89+
return _SYSTEM_PROMPT
9590

9691
# ------------------------------------------------------------------ #
9792
# Overridable helpers (from AgentPlannerApproach)
@@ -666,3 +661,174 @@ def load(self, online_learning_cycle: Optional[int] = None) -> None:
666661
f"{len(self._learned_predicates)} learned predicates, "
667662
f"{len(self._offline_dataset.trajectories)} offline trajectories, "
668663
f"{len(self._online_dataset.trajectories)} online trajectories")
664+
665+
666+
# ------------------------------------------------------------------ #
667+
# Prompt helpers (abstraction-learning specific)
668+
# ------------------------------------------------------------------ #
669+
670+
_SYSTEM_PROMPT = """\
671+
You are an abstraction inventor for a bilevel process planning system. Your \
672+
role is to propose types, predicates, helper objects, processes, and options \
673+
that help a task planner solve planning problems.
674+
675+
## What You Observe
676+
677+
You observe the world ONLY through:
678+
- **Trajectory data**: sequences of states (feature vectors per object) and \
679+
actions
680+
- **Task goals**: symbolic goal descriptions
681+
- **Planning metrics**: success rate, nodes expanded, failure reasons
682+
- **Current abstractions**: the types, predicates, processes, and options \
683+
currently in use
684+
685+
You do NOT have access to environment source code, simulator internals, or \
686+
ground-truth models. You must infer useful abstractions from observed data.
687+
688+
## What You Can Propose
689+
690+
1. **Types**: New object types with named features
691+
2. **Predicates**: Boolean classifiers over states and objects
692+
3. **Helper Objects / Task Augmentation**: Functions that add helper objects \
693+
to tasks (e.g., grid locations, reference frames)
694+
4. **Processes**: Causal processes (exogenous events triggered by conditions)
695+
5. **Options**: Parameterized actions
696+
697+
## Code Conventions
698+
699+
When writing proposal code, the following variables are available in the exec \
700+
context:
701+
702+
### Imports (already available — no need to import)
703+
- `np`, `numpy`, `torch`
704+
- `Box` (from gym.spaces)
705+
- `Type`, `Predicate`, `DerivedPredicate`, `NSPredicate`
706+
- `Object`, `Variable`, `LiftedAtom`, `GroundAtom`
707+
- `ExogenousProcess`, `EndogenousProcess`, `CausalProcess`
708+
- `ParameterizedOption`, `State`, `Task`
709+
- `ConstantDelay`, `DiscreteGaussianDelay`
710+
- `List`, `Set`, `Sequence` (from typing)
711+
712+
### Current abstractions
713+
- Each type `T` is available as `T_type` (e.g., `domino_type`, `robot_type`)
714+
- Each predicate `P` is available by name (e.g., `Fallen`, `Standing`)
715+
- Each predicate classifier is available as `_P_holds` \
716+
(e.g., `_Fallen_holds`)
717+
- Each option `O` is available by name (e.g., `Push`)
718+
719+
### Expected output variables per proposal tool
720+
- `propose_types`: must define `proposed_types` (a list of Type objects)
721+
- `propose_predicates`: must define `proposed_predicates` \
722+
(a list of Predicate objects)
723+
- `propose_object_augmentor`: must define `augment_task(task) -> Task`
724+
- `propose_processes`: must define `proposed_processes` \
725+
(a list of CausalProcess objects)
726+
- `propose_options`: must define `proposed_options` \
727+
(a list of ParameterizedOption objects)
728+
729+
## Key API Reference
730+
731+
### State
732+
```python
733+
state.get(obj, "feature_name") # get a feature value
734+
state.set(obj, "feature_name", value) # set a feature value
735+
state.get_objects(some_type) # get all objects of a type
736+
list(state) # iterate over all objects
737+
state.copy() # copy the state
738+
```
739+
740+
### Predicate
741+
```python
742+
pred = Predicate("MyPred", [type1_type, type2_type],
743+
lambda state, objects: state.get(objects[0], "feat") > 0.5)
744+
pred.holds(state, [obj1, obj2]) # evaluate
745+
```
746+
747+
### Process (ExogenousProcess)
748+
```python
749+
v1 = Variable("?x", some_type)
750+
v2 = Variable("?y", other_type)
751+
proc = ExogenousProcess(
752+
name="MyProcess",
753+
parameters=[v1, v2],
754+
condition_at_start={LiftedAtom(SomePred, [v1, v2])},
755+
condition_overall={LiftedAtom(SomePred, [v1, v2])},
756+
condition_at_end=set(),
757+
add_effects={LiftedAtom(ResultPred, [v1])},
758+
delete_effects=set(),
759+
delay_distribution=ConstantDelay(1),
760+
strength=torch.tensor([1.0]),
761+
)
762+
```
763+
764+
### Type
765+
```python
766+
my_type = Type("my_type", ["feature1", "feature2"])
767+
```
768+
769+
## Iteration Protocol
770+
771+
At each learning iteration:
772+
1. **Inspect** the trajectory data and planning results using inspection tools
773+
2. **Form hypotheses** about what abstractions are missing or insufficient
774+
3. **Propose** new abstractions using proposal tools
775+
4. **Test** your proposals using testing tools
776+
5. **Refine** based on test results - fix errors and retry
777+
778+
Focus on proposing abstractions that will help the planner solve more tasks. \
779+
Pay attention to:
780+
- States where planning fails - what conditions are missing?
781+
- Patterns in trajectory data that aren't captured by current predicates
782+
- Whether helper objects (like grid positions) could simplify the problem
783+
"""
784+
785+
786+
def build_iteration_message(cycle: int,
787+
num_new_trajs: int,
788+
num_total_trajs: int,
789+
task_success_rate: float,
790+
type_names_with_features: str,
791+
predicate_signatures: str,
792+
num_predicates: int,
793+
process_summaries: str,
794+
num_processes: int,
795+
option_names: str,
796+
num_options: int,
797+
planning_success: str,
798+
avg_nodes: str,
799+
failure_summaries: str,
800+
previous_iteration_outcomes: str,
801+
available_tools: Optional[List[Any]] = None) -> str:
802+
"""Build the message sent to the agent at each iteration."""
803+
tools_section = ""
804+
if available_tools:
805+
tool_list = "\n".join(f" - {t}" for t in available_tools)
806+
tools_section = f"\nAVAILABLE TOOLS:\n{tool_list}\n"
807+
808+
return f"""\
809+
== Online Learning Iteration {cycle} ==
810+
811+
TRAJECTORY SUMMARY:
812+
- {num_new_trajs} new trajectories collected this cycle
813+
- {num_total_trajs} total trajectories (offline + online)
814+
- Task success rate: {task_success_rate:.1%}
815+
816+
CURRENT ABSTRACTIONS:
817+
- Types: {type_names_with_features}
818+
- Predicates ({num_predicates}): {predicate_signatures}
819+
- Processes ({num_processes}): {process_summaries}
820+
- Options ({num_options}): {option_names}
821+
822+
PLANNING PERFORMANCE:
823+
{planning_success}
824+
- Avg nodes expanded: {avg_nodes}
825+
- Failures: {failure_summaries}
826+
827+
PREVIOUS ITERATION OUTCOMES:
828+
{previous_iteration_outcomes}
829+
{tools_section}
830+
YOUR TASK:
831+
Inspect the trajectory data and planning results. Propose new or improved \
832+
abstractions that will help the planner solve more tasks. Use the proposal \
833+
tools to register your proposals and the testing tools to validate them.
834+
"""

0 commit comments

Comments
 (0)