|
| 1 | +/** |
| 2 | + * Flow Generator Agent — uses ToolLoopAgent to generate YAML flows one at a time. |
| 3 | + * |
| 4 | + * Unlike the batch generateFlows() approach (single generateObject call for all flows), |
| 5 | + * this agent generates each flow independently via a save_flow tool, giving the LLM |
| 6 | + * full context of previously generated flows so each new one is meaningfully different. |
| 7 | + * |
| 8 | + * Used in Phase 3 of the Explorer pipeline (after PRD analysis and optional crawling). |
| 9 | + */ |
| 10 | + |
| 11 | +import { ToolLoopAgent, tool, isLoopFinished, stepCountIs } from 'ai'; |
| 12 | +import { z } from 'zod'; |
| 13 | +import type { PRDAnalysis, ScreenGraph, GeneratedFlow, ScreenshotData } from './types.js'; |
| 14 | + |
| 15 | +const FLOW_AGENT_INSTRUCTIONS = `You are a mobile test automation expert generating YAML flow files for AppClaw. |
| 16 | +
|
| 17 | +Each flow MUST follow this EXACT format: |
| 18 | +
|
| 19 | +\`\`\`yaml |
| 20 | +# One-line comment describing what this flow does. |
| 21 | +name: Descriptive flow name |
| 22 | +--- |
| 23 | +- open <app name> app |
| 24 | +- Click on Search Button |
| 25 | +- Type "Appium 3.0" in the search bar |
| 26 | +- Perform Search |
| 27 | +- Scroll down 2 times until TestMu AI is visible |
| 28 | +- done: "TestMu AI video for Appium 3.0 on YouTube is visible" |
| 29 | +\`\`\` |
| 30 | +
|
| 31 | +The YAML has two documents separated by \`---\`: |
| 32 | +- Document 1: metadata with \`name:\` field |
| 33 | +- Document 2: a list of steps as natural language strings |
| 34 | +
|
| 35 | +Supported step patterns (use NATURAL LANGUAGE): |
| 36 | +- open <app name> app → Opens the app by name |
| 37 | +- Click on <element> → Taps a UI element |
| 38 | +- Tap <element> → Same as click |
| 39 | +- Type "<text>" in the <field> → Types text. Text MUST be in quotes. |
| 40 | +- Perform Search / Submit → Presses Enter/Return |
| 41 | +- Scroll down/up → Swipe gesture |
| 42 | +- Scroll down N times until "X" is visible → Scroll+assert combo |
| 43 | +- wait N s → Wait N seconds |
| 44 | +- go back → Navigate back |
| 45 | +- assert "X" is visible → Verify text is on screen |
| 46 | +- done: "message" → Mark flow complete |
| 47 | +
|
| 48 | +CRITICAL FORMAT RULES: |
| 49 | +- Type steps MUST always quote the text: Type "search term" in the search bar |
| 50 | +- Use natural language for ALL steps — never use structured YAML keys like tap:, type:, wait: |
| 51 | +- Each flow MUST start with "open <app name> app" |
| 52 | +- Each flow MUST end with done: "description of what was achieved" |
| 53 | +- Each flow MUST be a complete, standalone user journey (5–15 steps) |
| 54 | +- Flows MUST be diverse — do NOT generate similar flows |
| 55 | +
|
| 56 | +Your job: |
| 57 | +1. Read the PRD analysis, screen data, and any screenshots in the user message |
| 58 | +2. For each of the N flows requested, generate a distinct YAML flow and call save_flow |
| 59 | +3. Prioritize high-priority journeys first, then medium, then low |
| 60 | +4. If screenshots are provided, look at them carefully — use the EXACT button labels, text, and UI element names you can see |
| 61 | +5. If screen graph data is available, use REAL element labels from it |
| 62 | +6. Call save_flow once per flow. Stop when the tool returns remaining: 0`; |
| 63 | + |
| 64 | +type ContentPart = |
| 65 | + | { type: 'text'; text: string } |
| 66 | + | { type: 'image'; image: string; mimeType: string }; |
| 67 | + |
| 68 | +function buildPromptParts( |
| 69 | + analysis: PRDAnalysis, |
| 70 | + numFlows: number, |
| 71 | + screenGraph?: ScreenGraph, |
| 72 | + screenshots?: ScreenshotData[] |
| 73 | +): ContentPart[] { |
| 74 | + const journeyContext = analysis.userJourneys |
| 75 | + .sort((a, b) => { |
| 76 | + const priority = { high: 0, medium: 1, low: 2 }; |
| 77 | + return priority[a.priority] - priority[b.priority]; |
| 78 | + }) |
| 79 | + .map( |
| 80 | + (j, i) => |
| 81 | + `${i + 1}. [${j.priority}] ${j.name}: ${j.description}\n Steps: ${j.steps.join(' → ')}` |
| 82 | + ) |
| 83 | + .join('\n'); |
| 84 | + |
| 85 | + const featureContext = analysis.features |
| 86 | + .map((f) => `- ${f.name}: ${f.description} (elements: ${f.expectedElements.join(', ')})`) |
| 87 | + .join('\n'); |
| 88 | + |
| 89 | + let screenContext = ''; |
| 90 | + if (screenGraph && screenGraph.screens.length > 0) { |
| 91 | + screenContext = '\n\n## Real Device Screen Data\n'; |
| 92 | + screenContext += `Discovered ${screenGraph.screens.length} screens with ${screenGraph.transitions.length} transitions.\n\n`; |
| 93 | + |
| 94 | + for (const screen of screenGraph.screens) { |
| 95 | + screenContext += `### ${screen.id}\n`; |
| 96 | + if (screen.reachedVia) { |
| 97 | + screenContext += `Reached via: ${screen.reachedVia.action} from ${screen.reachedVia.fromScreen}\n`; |
| 98 | + } |
| 99 | + screenContext += `Visible texts: ${screen.visibleTexts.slice(0, 20).join(', ')}\n`; |
| 100 | + screenContext += `Tappable elements: ${screen.tappableElements.map((e) => `"${e.label}" (${e.type})`).join(', ')}\n\n`; |
| 101 | + } |
| 102 | + |
| 103 | + screenContext += '### Navigation Paths\n'; |
| 104 | + for (const t of screenGraph.transitions) { |
| 105 | + screenContext += `- ${t.fromScreen} → tap "${t.element}" → ${t.toScreen}\n`; |
| 106 | + } |
| 107 | + |
| 108 | + screenContext += |
| 109 | + '\nIMPORTANT: Use the REAL element labels from screen data above — they are the actual UI labels on the device.'; |
| 110 | + } |
| 111 | + |
| 112 | + const hasScreenshots = screenshots && screenshots.length > 0; |
| 113 | + |
| 114 | + const textContent = `Generate exactly ${numFlows} YAML test flows for this mobile app. Call save_flow once for each flow. |
| 115 | +
|
| 116 | +## App: ${analysis.appName} |
| 117 | +${analysis.appId ? `Package: ${analysis.appId}` : ''} |
| 118 | +Platform: ${analysis.platform} |
| 119 | +
|
| 120 | +## Features |
| 121 | +${featureContext} |
| 122 | +
|
| 123 | +## User Journeys (prioritized — pick the top ${numFlows}) |
| 124 | +${journeyContext} |
| 125 | +${screenContext} |
| 126 | +${hasScreenshots ? `\n## App Screenshots (${screenshots!.length} provided)\nThe screenshots below show the actual app UI. Use the EXACT button labels, text, and element names visible in them when writing flow steps.` : ''} |
| 127 | +
|
| 128 | +Generate ${numFlows} diverse flows. Include a mix of core happy-path flows, secondary feature flows, and at least one edge case if applicable.`; |
| 129 | + |
| 130 | + const parts: ContentPart[] = [{ type: 'text', text: textContent }]; |
| 131 | + |
| 132 | + if (hasScreenshots) { |
| 133 | + for (const shot of screenshots!) { |
| 134 | + // Label each image so the agent knows which screen it's looking at |
| 135 | + parts.push({ type: 'text', text: `[Screenshot: ${shot.filename}]` }); |
| 136 | + parts.push({ type: 'image', image: shot.base64, mimeType: shot.mimeType }); |
| 137 | + } |
| 138 | + } |
| 139 | + |
| 140 | + return parts; |
| 141 | +} |
| 142 | + |
| 143 | +function buildYaml(name: string, comment: string, steps: string[]): string { |
| 144 | + const lines: string[] = [`# ${comment}`, `name: ${name}`, '---']; |
| 145 | + for (const step of steps) { |
| 146 | + lines.push(`- ${step}`); |
| 147 | + } |
| 148 | + const lastStep = steps[steps.length - 1]; |
| 149 | + if (!lastStep?.toLowerCase().startsWith('done')) { |
| 150 | + lines.push('- done'); |
| 151 | + } |
| 152 | + return lines.join('\n'); |
| 153 | +} |
| 154 | + |
| 155 | +/** |
| 156 | + * Generate YAML flows using a ToolLoopAgent. |
| 157 | + * |
| 158 | + * The agent generates flows one at a time by calling save_flow for each, |
| 159 | + * giving it full context of what it has already generated so each new |
| 160 | + * flow is meaningfully different. |
| 161 | + */ |
| 162 | +export async function generateFlowsWithAgent( |
| 163 | + analysis: PRDAnalysis, |
| 164 | + numFlows: number, |
| 165 | + model: any, |
| 166 | + providerOptions?: Record<string, any>, |
| 167 | + screenGraph?: ScreenGraph, |
| 168 | + screenshots?: ScreenshotData[] |
| 169 | +): Promise<GeneratedFlow[]> { |
| 170 | + const generatedFlows: GeneratedFlow[] = []; |
| 171 | + |
| 172 | + const agent = new ToolLoopAgent({ |
| 173 | + model, |
| 174 | + instructions: FLOW_AGENT_INSTRUCTIONS, |
| 175 | + tools: { |
| 176 | + save_flow: tool({ |
| 177 | + description: |
| 178 | + 'Save a generated YAML test flow. Call this once per flow after composing its name, comment, and steps.', |
| 179 | + inputSchema: z.object({ |
| 180 | + name: z |
| 181 | + .string() |
| 182 | + .describe( |
| 183 | + "Descriptive flow name (e.g. 'YouTube — search Appium 3.0 and verify TestMu AI video')" |
| 184 | + ), |
| 185 | + comment: z.string().describe('One-line comment placed at the top of the YAML file'), |
| 186 | + journey: z.string().describe('Which user journey this flow covers'), |
| 187 | + steps: z |
| 188 | + .array(z.string()) |
| 189 | + .describe('Ordered natural language steps including the final done: step'), |
| 190 | + }), |
| 191 | + execute: async ({ name, comment, journey, steps }) => { |
| 192 | + generatedFlows.push({ |
| 193 | + name, |
| 194 | + description: comment, |
| 195 | + yamlContent: buildYaml(name, comment, steps), |
| 196 | + journey, |
| 197 | + }); |
| 198 | + |
| 199 | + const remaining = numFlows - generatedFlows.length; |
| 200 | + return { |
| 201 | + saved: true, |
| 202 | + flowsGenerated: generatedFlows.length, |
| 203 | + remaining, |
| 204 | + }; |
| 205 | + }, |
| 206 | + }), |
| 207 | + }, |
| 208 | + // Stop when the agent makes a step with no tool calls (it's done) |
| 209 | + // or after a hard cap to prevent runaway loops |
| 210 | + stopWhen: [isLoopFinished(), stepCountIs(numFlows + 3)], |
| 211 | + ...(providerOptions ? { providerOptions } : {}), |
| 212 | + }); |
| 213 | + |
| 214 | + const parts = buildPromptParts(analysis, numFlows, screenGraph, screenshots); |
| 215 | + |
| 216 | + // Use multimodal message format when screenshots are present, plain string otherwise |
| 217 | + const prompt = |
| 218 | + parts.length === 1 && parts[0].type === 'text' |
| 219 | + ? parts[0].text |
| 220 | + : [{ role: 'user' as const, content: parts }]; |
| 221 | + |
| 222 | + await agent.generate({ prompt }); |
| 223 | + |
| 224 | + return generatedFlows; |
| 225 | +} |
0 commit comments