AgentToolkit · visahak · Apr 3, 2026 · Apr 1, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/evolve/frontend/client/evolve_client.py b/evolve/frontend/client/evolve_client.py
@@ -143,6 +143,7 @@ def consolidate_tips(self, namespace_id: str, threshold: float | None = None) ->
                             "rationale": tip.rationale,
                             "category": tip.category,
                             "trigger": tip.trigger,
+                            "implementation_steps": tip.implementation_steps,
                         },
                     )
                     for tip in consolidated_tips

diff --git a/evolve/frontend/mcp/mcp_server.py b/evolve/frontend/mcp/mcp_server.py
@@ -215,6 +215,7 @@ def save_trajectory(trajectory_data: str, task_id: str | None = None) -> list[Re
                         "category": tip.category,
                         "rationale": tip.rationale,
                         "trigger": tip.trigger,
+                        "implementation_steps": tip.implementation_steps,
                         "task_description": result.task_description,
                         "source_task_id": task_id,
                         "creation_mode": "auto-mcp",

diff --git a/evolve/llm/tips/clustering.py b/evolve/llm/tips/clustering.py
@@ -157,12 +157,22 @@ def combine_cluster(entities: list[RecordedEntity]) -> list[Tip]:
         dict.fromkeys((e.metadata or {}).get("task_description", "") for e in entities if (e.metadata or {}).get("task_description"))
     )
 
+    def _normalize_steps(raw: object) -> list[str]:
+        if raw is None or raw == []:
+            return []
+        if isinstance(raw, str):
+            return [raw]
+        if isinstance(raw, list):
+            return [str(x) for x in raw]
+        return [str(raw)]
+
     tips = [
         {
             "content": str(e.content),
             "rationale": (e.metadata or {}).get("rationale", ""),
             "category": (e.metadata or {}).get("category", "strategy"),
             "trigger": (e.metadata or {}).get("trigger", ""),
+            "implementation_steps": _normalize_steps((e.metadata or {}).get("implementation_steps")),
         }
         for e in entities
     ]

diff --git a/evolve/llm/tips/prompts/combine_tips.jinja2 b/evolve/llm/tips/prompts/combine_tips.jinja2
@@ -13,6 +13,7 @@ These guidelines came from tasks like:
 - **Rationale:** {{ tip.rationale }}
 - **Category:** {{ tip.category }}
 - **Trigger:** {{ tip.trigger }}
+{% if tip.implementation_steps %}- **Implementation Steps:** {{ tip.implementation_steps | join('; ') }}{% endif %}
 
 {% endfor %}
 
@@ -35,7 +36,8 @@ Combine the above guidelines into a smaller set of HIGH-QUALITY, CONSOLIDATED, N
             "content": "Clear, actionable tip",
             "rationale": "Why this tip helps",
             "category": "strategy|recovery|optimization",
-            "trigger": "When to apply this tip"
+            "trigger": "When to apply this tip",
+            "implementation_steps": ["step 1", "step 2"]
         }
     ]
 }

diff --git a/evolve/llm/tips/prompts/generate_tips.jinja2 b/evolve/llm/tips/prompts/generate_tips.jinja2
@@ -1,23 +1,33 @@
-You are analyzing an AI agent's execution trajectory to extract actionable tips.
+Extract actionable, relevant tips from this trajectory that would help an AI agent perform similar tasks better in the future.
 
 # Task Information
 **Task:** {{task_instruction}}
-**Status:** UNKNOWN
+**Task Status:** There is no evaluation of the task's trajectory or output against any ground truth. There is also no user feedback to the AI agent. But the trajectory may contain the agent's self-evaluation of whether the task succeeded or failed.
 **Steps Taken:** {{num_steps}}
 
 # Agent Trajectory
 {{trajectory_summary}}
 
-# Your Task
-Extract 3-5 actionable tips from this trajectory that would help AI agents perform similar tasks better.
+**IMPORTANT TO REMEMBER:**
+1. Only generate tips if they are truly relevant and actionable
+2. Tips should be specific to patterns observed in this trajectory
+3. Include both positive patterns (what worked) and negative patterns (what to avoid)
+4. Each tip should have:
+   - A clear, concise description (content)
+   - The purpose/benefit of following it
+   - The category: "strategy", "recovery", or "optimization"
+   - Specific steps to implement the tip
+   - A trigger condition (when to apply this tip)
 
-**Guidelines:**
-1. Focus on patterns that worked or mistakes that were made
-2. Be specific to what you observed in this trajectory
-3. Each tip should have:
-   - Clear description of what to do (or avoid)
-   - Why it matters
-   - When to apply it
+5. If the task seems to have succeeded, focus on the successful strategies used
+6. If the task seems to have failed, focus on what went wrong and how to prevent/recover from it
+7. Do not generate generic tips - be specific to this task execution
+8. Look for patterns in how the agent:
+   - Discovered and used APIs
+   - Handled authentication and credentials
+   - Iterated through results (pagination)
+   - Structured its approach to the problem
+   - Handled errors or unexpected responses
 
 {% if not constrained_decoding_supported %}
 **Output Format (JSON):**
@@ -28,11 +38,15 @@ Extract 3-5 actionable tips from this trajectory that would help AI agents perfo
             "content": "Clear, actionable tip",
             "rationale": "Why this tip helps",
             "category": "strategy|recovery|optimization",
-            "trigger": "When to apply this tip"
+            "trigger": "When to apply this tip",
+            "implementation_steps": ["step 1", "step 2"]
         }
     ]
 }
 ```
 
 Generate tips now. Return ONLY the JSON, no other text.
-{% endif %}
+{% endif %}
+
+
+
diff --git a/evolve/schema/tips.py b/evolve/schema/tips.py
@@ -10,6 +10,7 @@ class Tip(BaseModel):
     rationale: str = Field(description="Why this tip helps")
     category: Literal["strategy", "recovery", "optimization"]
     trigger: str = Field(description="When to apply this tip")
+    implementation_steps: list[str] = Field(default_factory=list, description="Specific steps to implement this tip")
 
 
 class TipGenerationResponse(BaseModel):

diff --git a/evolve/sync/phoenix_sync.py b/evolve/sync/phoenix_sync.py
@@ -480,6 +480,7 @@ def _process_trajectory(self, trajectory: dict) -> int:
                         "category": tip.category,
                         "rationale": tip.rationale,
                         "trigger": tip.trigger,
+                        "implementation_steps": tip.implementation_steps,
                         "source_task_id": trajectory["trace_id"],
                         "source_span_id": trajectory["span_id"],
                         "task_description": result.task_description,