|
15 | 15 | from predicators import utils |
16 | 16 | from predicators.agent_sdk.proposal_parser import ProposalBundle, \ |
17 | 17 | build_exec_context, exec_code_safely |
18 | | -from predicators.agent_sdk.system_prompt import build_iteration_message, \ |
19 | | - build_system_prompt |
20 | 18 | from predicators.approaches.agent_planner_approach import AgentPlannerApproach |
21 | 19 | from predicators.approaches.agent_session_mixin import AgentSessionMixin |
22 | 20 | from predicators.approaches.pp_online_process_learning_approach import \ |
@@ -87,11 +85,8 @@ def _get_log_dir(self) -> str: |
87 | 85 | """Use the mixin's simple log dir (no run_id subdirectory).""" |
88 | 86 | return AgentSessionMixin._get_log_dir(self) |
89 | 87 |
|
90 | | - def _get_agent_model_name(self) -> str: |
91 | | - return CFG.agent_sdk_model_name |
92 | | - |
93 | 88 | def _get_agent_system_prompt(self) -> str: |
94 | | - return build_system_prompt() |
| 89 | + return _SYSTEM_PROMPT |
95 | 90 |
|
96 | 91 | # ------------------------------------------------------------------ # |
97 | 92 | # Overridable helpers (from AgentPlannerApproach) |
@@ -666,3 +661,174 @@ def load(self, online_learning_cycle: Optional[int] = None) -> None: |
666 | 661 | f"{len(self._learned_predicates)} learned predicates, " |
667 | 662 | f"{len(self._offline_dataset.trajectories)} offline trajectories, " |
668 | 663 | f"{len(self._online_dataset.trajectories)} online trajectories") |
| 664 | + |
| 665 | + |
| 666 | +# ------------------------------------------------------------------ # |
| 667 | +# Prompt helpers (abstraction-learning specific) |
| 668 | +# ------------------------------------------------------------------ # |
| 669 | + |
| 670 | +_SYSTEM_PROMPT = """\ |
| 671 | +You are an abstraction inventor for a bilevel process planning system. Your \ |
| 672 | +role is to propose types, predicates, helper objects, processes, and options \ |
| 673 | +that help a task planner solve planning problems. |
| 674 | +
|
| 675 | +## What You Observe |
| 676 | +
|
| 677 | +You observe the world ONLY through: |
| 678 | +- **Trajectory data**: sequences of states (feature vectors per object) and \ |
| 679 | +actions |
| 680 | +- **Task goals**: symbolic goal descriptions |
| 681 | +- **Planning metrics**: success rate, nodes expanded, failure reasons |
| 682 | +- **Current abstractions**: the types, predicates, processes, and options \ |
| 683 | +currently in use |
| 684 | +
|
| 685 | +You do NOT have access to environment source code, simulator internals, or \ |
| 686 | +ground-truth models. You must infer useful abstractions from observed data. |
| 687 | +
|
| 688 | +## What You Can Propose |
| 689 | +
|
| 690 | +1. **Types**: New object types with named features |
| 691 | +2. **Predicates**: Boolean classifiers over states and objects |
| 692 | +3. **Helper Objects / Task Augmentation**: Functions that add helper objects \ |
| 693 | +to tasks (e.g., grid locations, reference frames) |
| 694 | +4. **Processes**: Causal processes (exogenous events triggered by conditions) |
| 695 | +5. **Options**: Parameterized actions |
| 696 | +
|
| 697 | +## Code Conventions |
| 698 | +
|
| 699 | +When writing proposal code, the following variables are available in the exec \ |
| 700 | +context: |
| 701 | +
|
| 702 | +### Imports (already available — no need to import) |
| 703 | +- `np`, `numpy`, `torch` |
| 704 | +- `Box` (from gym.spaces) |
| 705 | +- `Type`, `Predicate`, `DerivedPredicate`, `NSPredicate` |
| 706 | +- `Object`, `Variable`, `LiftedAtom`, `GroundAtom` |
| 707 | +- `ExogenousProcess`, `EndogenousProcess`, `CausalProcess` |
| 708 | +- `ParameterizedOption`, `State`, `Task` |
| 709 | +- `ConstantDelay`, `DiscreteGaussianDelay` |
| 710 | +- `List`, `Set`, `Sequence` (from typing) |
| 711 | +
|
| 712 | +### Current abstractions |
| 713 | +- Each type `T` is available as `T_type` (e.g., `domino_type`, `robot_type`) |
| 714 | +- Each predicate `P` is available by name (e.g., `Fallen`, `Standing`) |
| 715 | +- Each predicate classifier is available as `_P_holds` \ |
| 716 | +(e.g., `_Fallen_holds`) |
| 717 | +- Each option `O` is available by name (e.g., `Push`) |
| 718 | +
|
| 719 | +### Expected output variables per proposal tool |
| 720 | +- `propose_types`: must define `proposed_types` (a list of Type objects) |
| 721 | +- `propose_predicates`: must define `proposed_predicates` \ |
| 722 | +(a list of Predicate objects) |
| 723 | +- `propose_object_augmentor`: must define `augment_task(task) -> Task` |
| 724 | +- `propose_processes`: must define `proposed_processes` \ |
| 725 | +(a list of CausalProcess objects) |
| 726 | +- `propose_options`: must define `proposed_options` \ |
| 727 | +(a list of ParameterizedOption objects) |
| 728 | +
|
| 729 | +## Key API Reference |
| 730 | +
|
| 731 | +### State |
| 732 | +```python |
| 733 | +state.get(obj, "feature_name") # get a feature value |
| 734 | +state.set(obj, "feature_name", value) # set a feature value |
| 735 | +state.get_objects(some_type) # get all objects of a type |
| 736 | +list(state) # iterate over all objects |
| 737 | +state.copy() # copy the state |
| 738 | +``` |
| 739 | +
|
| 740 | +### Predicate |
| 741 | +```python |
| 742 | +pred = Predicate("MyPred", [type1_type, type2_type], |
| 743 | + lambda state, objects: state.get(objects[0], "feat") > 0.5) |
| 744 | +pred.holds(state, [obj1, obj2]) # evaluate |
| 745 | +``` |
| 746 | +
|
| 747 | +### Process (ExogenousProcess) |
| 748 | +```python |
| 749 | +v1 = Variable("?x", some_type) |
| 750 | +v2 = Variable("?y", other_type) |
| 751 | +proc = ExogenousProcess( |
| 752 | + name="MyProcess", |
| 753 | + parameters=[v1, v2], |
| 754 | + condition_at_start={LiftedAtom(SomePred, [v1, v2])}, |
| 755 | + condition_overall={LiftedAtom(SomePred, [v1, v2])}, |
| 756 | + condition_at_end=set(), |
| 757 | + add_effects={LiftedAtom(ResultPred, [v1])}, |
| 758 | + delete_effects=set(), |
| 759 | + delay_distribution=ConstantDelay(1), |
| 760 | + strength=torch.tensor([1.0]), |
| 761 | +) |
| 762 | +``` |
| 763 | +
|
| 764 | +### Type |
| 765 | +```python |
| 766 | +my_type = Type("my_type", ["feature1", "feature2"]) |
| 767 | +``` |
| 768 | +
|
| 769 | +## Iteration Protocol |
| 770 | +
|
| 771 | +At each learning iteration: |
| 772 | +1. **Inspect** the trajectory data and planning results using inspection tools |
| 773 | +2. **Form hypotheses** about what abstractions are missing or insufficient |
| 774 | +3. **Propose** new abstractions using proposal tools |
| 775 | +4. **Test** your proposals using testing tools |
| 776 | +5. **Refine** based on test results - fix errors and retry |
| 777 | +
|
| 778 | +Focus on proposing abstractions that will help the planner solve more tasks. \ |
| 779 | +Pay attention to: |
| 780 | +- States where planning fails - what conditions are missing? |
| 781 | +- Patterns in trajectory data that aren't captured by current predicates |
| 782 | +- Whether helper objects (like grid positions) could simplify the problem |
| 783 | +""" |
| 784 | + |
| 785 | + |
| 786 | +def build_iteration_message(cycle: int, |
| 787 | + num_new_trajs: int, |
| 788 | + num_total_trajs: int, |
| 789 | + task_success_rate: float, |
| 790 | + type_names_with_features: str, |
| 791 | + predicate_signatures: str, |
| 792 | + num_predicates: int, |
| 793 | + process_summaries: str, |
| 794 | + num_processes: int, |
| 795 | + option_names: str, |
| 796 | + num_options: int, |
| 797 | + planning_success: str, |
| 798 | + avg_nodes: str, |
| 799 | + failure_summaries: str, |
| 800 | + previous_iteration_outcomes: str, |
| 801 | + available_tools: Optional[List[Any]] = None) -> str: |
| 802 | + """Build the message sent to the agent at each iteration.""" |
| 803 | + tools_section = "" |
| 804 | + if available_tools: |
| 805 | + tool_list = "\n".join(f" - {t}" for t in available_tools) |
| 806 | + tools_section = f"\nAVAILABLE TOOLS:\n{tool_list}\n" |
| 807 | + |
| 808 | + return f"""\ |
| 809 | +== Online Learning Iteration {cycle} == |
| 810 | +
|
| 811 | +TRAJECTORY SUMMARY: |
| 812 | +- {num_new_trajs} new trajectories collected this cycle |
| 813 | +- {num_total_trajs} total trajectories (offline + online) |
| 814 | +- Task success rate: {task_success_rate:.1%} |
| 815 | +
|
| 816 | +CURRENT ABSTRACTIONS: |
| 817 | +- Types: {type_names_with_features} |
| 818 | +- Predicates ({num_predicates}): {predicate_signatures} |
| 819 | +- Processes ({num_processes}): {process_summaries} |
| 820 | +- Options ({num_options}): {option_names} |
| 821 | +
|
| 822 | +PLANNING PERFORMANCE: |
| 823 | +{planning_success} |
| 824 | +- Avg nodes expanded: {avg_nodes} |
| 825 | +- Failures: {failure_summaries} |
| 826 | +
|
| 827 | +PREVIOUS ITERATION OUTCOMES: |
| 828 | +{previous_iteration_outcomes} |
| 829 | +{tools_section} |
| 830 | +YOUR TASK: |
| 831 | +Inspect the trajectory data and planning results. Propose new or improved \ |
| 832 | +abstractions that will help the planner solve more tasks. Use the proposal \ |
| 833 | +tools to register your proposals and the testing tools to validate them. |
| 834 | +""" |
0 commit comments