Skip to content

Commit e3fbb21

Browse files
gewenyu99claude
andcommitted
feat(orchestrator): walking skeleton runner
Seed agent enqueues a parallel-branch task graph (install/init, then identify and plan-capture, then capture); the executor drains it one fresh agent per task as dry-run stubs; the TUI renders the queue. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 4076a2c commit e3fbb21

4 files changed

Lines changed: 234 additions & 18 deletions

File tree

src/lib/agent/agent-interface.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ export type AgentConfig = {
265265
getPendingQuestion?: () =>
266266
| import('@lib/wizard-session').PendingQuestion
267267
| null;
268+
/**
269+
* Orchestrator queue context. Present only on the experimental `orchestrator`
270+
* variant; threaded into wizard-tools so the orchestrator tools register.
271+
*/
272+
orchestrator?: import('@lib/programs/orchestrator/queue-tools').OrchestratorToolsContext;
268273
};
269274

270275
/**
@@ -708,6 +713,7 @@ export async function initializeAgent(
708713
skillsBaseUrl: config.skillsBaseUrl,
709714
askBridge: config.askBridge,
710715
askMaxQuestions: config.askMaxQuestions,
716+
orchestrator: config.orchestrator,
711717
});
712718
mcpServers['wizard-tools'] = wizardToolsServer;
713719

@@ -747,8 +753,6 @@ export async function initializeAgent(
747753
});
748754
}
749755

750-
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
751-
getUI().log.success("Agent initialized. Let's get cooking!");
752756
return agentRunConfig;
753757
} catch (error) {
754758
getUI().log.error(

src/lib/agent/agent-runner.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,12 @@ import {
4545
getBlockingServiceKeys,
4646
SERVICE_LABELS,
4747
} from '@lib/health-checks/readiness';
48-
import { enableDebugLogs, initLogFile, logToFile } from '@utils/debug';
48+
import {
49+
enableDebugLogs,
50+
getLogFilePath,
51+
initLogFile,
52+
logToFile,
53+
} from '@utils/debug';
4954
import { createBenchmarkPipeline } from '@lib/middleware/benchmark';
5055
import { wizardAbort, WizardError, registerCleanup } from '@utils/wizard-abort';
5156
import { formatScanReport, writeScanReport } from '@lib/yara-hooks';
@@ -439,6 +444,8 @@ async function runLinearProgram(
439444
},
440445
sessionToOptions(session),
441446
);
447+
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
448+
getUI().log.success("Agent initialized. Let's get cooking!");
442449

443450
const middleware = session.benchmark
444451
? createBenchmarkPipeline(spinner, sessionToOptions(session))
Lines changed: 175 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,193 @@
11
/**
22
* Experimental task-queue orchestrator runner.
33
*
4-
* Branches from the linear runner when the `wizard-orchestrator` feature flag is
5-
* on. The shape: an orchestrator agent inspects the repo and seeds an
6-
* in-memory task queue, and an executor drains it one fresh agent per task.
4+
* Branches from the linear runner when the `wizard-orchestrator` flag is on. An
5+
* orchestrator agent inspects the repo and seeds an in-memory task queue; an
6+
* executor drains it, running one fresh agent per task.
77
*
8-
* This is the stub. It logs, emits a start event, and returns. The queue, the
9-
* executor, and the seeding agent land in the following issues.
8+
* This is the walking skeleton. The seed agent plans a task graph, and each task
9+
* is a dry-run stub that only reports a handoff, so the whole pipeline runs
10+
* end-to-end without touching the user's project. Real per-task work, served from
11+
* context-mill as `agents`, replaces the stub prompts later.
1012
*/
11-
import type { WizardSession } from '../../wizard-session';
12-
import type { ProgramConfig } from '../program-step';
13-
import type { BootstrapResult } from '../../agent/agent-runner';
13+
import { randomUUID } from 'crypto';
14+
import {
15+
initializeAgent,
16+
runAgent,
17+
type AgentConfig,
18+
} from '../../agent/agent-interface';
19+
import { OutroKind, type WizardSession } from '../../wizard-session';
20+
import { detectNodePackageManagers } from '../../detection/package-manager';
1421
import { getUI } from '../../../ui';
15-
import { logToFile } from '../../../utils/debug';
1622
import { analytics } from '../../../utils/analytics';
23+
import { logToFile } from '../../../utils/debug';
24+
import type { ProgramConfig } from '../program-step';
25+
import type { BootstrapResult } from '../../agent/agent-runner';
26+
import type { WizardRunOptions } from '../../../utils/types';
27+
import { QueueStore, type TaskStatus } from './queue';
28+
import { drainQueue, type RunTask } from './executor';
29+
import {
30+
SKELETON_TASK_TYPES,
31+
buildSeedPrompt,
32+
buildStubPrompt,
33+
} from './skeleton-prompts';
34+
35+
/** The seed plans the graph, so it gets a stronger model; stub tasks are cheap. */
36+
const SEED_MODEL = 'claude-sonnet-4-6';
37+
const STUB_MODEL = 'claude-haiku-4-5-20251001';
38+
/** The skeleton never edits the user's project. */
39+
const NO_EDIT_TOOLS = ['Write', 'Edit', 'Bash'] as const;
1740

18-
export function runOrchestrator(
41+
function toTodoStatus(status: TaskStatus): string {
42+
switch (status) {
43+
case 'in_progress':
44+
return 'in_progress';
45+
case 'done':
46+
case 'failed':
47+
return 'completed';
48+
default:
49+
return 'pending';
50+
}
51+
}
52+
53+
function sessionRunOptions(session: WizardSession): WizardRunOptions {
54+
return {
55+
installDir: session.installDir,
56+
debug: session.debug,
57+
default: false,
58+
signup: session.signup,
59+
localMcp: session.localMcp,
60+
ci: session.ci,
61+
benchmark: session.benchmark,
62+
projectId: session.projectId,
63+
apiKey: session.apiKey,
64+
yaraReport: session.yaraReport,
65+
};
66+
}
67+
68+
export async function runOrchestrator(
1969
session: WizardSession,
2070
programConfig: ProgramConfig,
21-
_boot: BootstrapResult,
71+
boot: BootstrapResult,
2272
): Promise<void> {
73+
const runId = randomUUID();
74+
const store = new QueueStore(session.installDir, runId);
75+
76+
const options = sessionRunOptions(session);
77+
2378
logToFile(
24-
`[orchestrator] START program=${programConfig.id} dir=${session.installDir}`,
79+
`[orchestrator] START program=${programConfig.id} dir=${session.installDir} run=${runId}`,
2580
);
2681
analytics.wizardCapture('orchestrator started', {
2782
program_id: programConfig.id,
2883
});
29-
getUI().log.info(
30-
'Orchestrator flag is on. This runner is a stub for now; the queue and executor land in the following issues.',
84+
getUI().startRun();
85+
86+
const renderQueue = () =>
87+
getUI().syncTodos(
88+
store.list().map((t) => ({
89+
content: t.type,
90+
status: toTodoStatus(t.status),
91+
activeForm: `Running ${t.type}`,
92+
})),
93+
);
94+
95+
// Each agent gets its own config so its wizard-tools server is bound to the
96+
// task it runs — independent tasks run in parallel, and attribution of
97+
// complete_task / enqueue_task must hold per agent. The seed is not a task,
98+
// so its context has no task id.
99+
const agentConfigFor = (currentTaskId?: string): AgentConfig => ({
100+
workingDirectory: session.installDir,
101+
posthogMcpUrl: boot.mcpUrl,
102+
posthogApiKey: boot.accessToken,
103+
posthogApiHost: boot.host,
104+
detectPackageManager: detectNodePackageManagers,
105+
skillsBaseUrl: boot.skillsBaseUrl,
106+
wizardFlags: boot.wizardFlags,
107+
// Tag agent events as orchestrator so telemetry segments from the baseline.
108+
wizardMetadata: { ...boot.wizardMetadata, VARIANT: 'orchestrator' },
109+
integrationLabel: programConfig.id,
110+
orchestrator: {
111+
store,
112+
validTypes: SKELETON_TASK_TYPES,
113+
currentTaskId,
114+
},
115+
});
116+
117+
const spinner = getUI().spinner();
118+
119+
// 1. Seed the queue with the orchestrator agent.
120+
const seedAgent = await initializeAgent(agentConfigFor(), options);
121+
const seedResult = await runAgent(
122+
{ ...seedAgent, model: SEED_MODEL, disallowedTools: [...NO_EDIT_TOOLS] },
123+
buildSeedPrompt(),
124+
options,
125+
spinner,
126+
{
127+
spinnerMessage: 'Planning the integration...',
128+
successMessage: 'Planned the integration',
129+
additionalFeatureQueue: [],
130+
},
31131
);
32-
return Promise.resolve();
132+
if (seedResult.error) {
133+
logToFile(
134+
`[orchestrator] seed error: ${seedResult.error} ${
135+
seedResult.message ?? ''
136+
}`,
137+
);
138+
}
139+
analytics.wizardCapture('orchestrator seeded', {
140+
task_count: store.list().length,
141+
types: store.list().map((t) => t.type),
142+
});
143+
renderQueue();
144+
145+
// 2. Drain the queue, one fresh agent per task. Independent tasks run in
146+
// parallel — the graph the seed planned is the only schedule.
147+
const runTask: RunTask = async (task) => {
148+
renderQueue();
149+
const agent = await initializeAgent(agentConfigFor(task.id), options);
150+
try {
151+
await runAgent(
152+
{
153+
...agent,
154+
model: task.model ?? STUB_MODEL,
155+
disallowedTools: [...NO_EDIT_TOOLS],
156+
},
157+
buildStubPrompt(task),
158+
options,
159+
spinner,
160+
{
161+
spinnerMessage: `Running ${task.type}...`,
162+
successMessage: `${task.type} done`,
163+
additionalFeatureQueue: [],
164+
},
165+
);
166+
} finally {
167+
renderQueue();
168+
}
169+
};
170+
await drainQueue(store, runTask);
171+
172+
renderQueue();
173+
174+
const summary = store.summary();
175+
logToFile(
176+
`[orchestrator] DONE done=${summary.done} failed=${summary.failed} total=${summary.total}`,
177+
);
178+
analytics.wizardCapture('orchestrator run finished', {
179+
tasks_total: summary.total,
180+
tasks_done: summary.done,
181+
tasks_failed: summary.failed,
182+
});
183+
184+
const message = `Orchestrator dry run finished: ${summary.done}/${summary.total} tasks completed.`;
185+
getUI().setOutroData({
186+
kind: OutroKind.Success,
187+
message,
188+
reportFile: store.queuePath,
189+
docsUrl: 'https://posthog.com/docs/ai-engineering/ai-wizard',
190+
});
191+
getUI().outro(message);
192+
await analytics.shutdown('success');
33193
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/**
2+
* Inline prompts for the walking skeleton. They prove the seed -> drain pipeline
3+
* without touching the user's project: the seed agent plans a task graph, and each
4+
* task is a dry-run stub that only reports a handoff. The real agent prompts live
5+
* in context-mill as the `agents` content type and replace these later.
6+
*/
7+
import type { QueuedTask } from './queue';
8+
9+
/** Task types the skeleton seeds. Mirrors the real integration graph. */
10+
export const SKELETON_TASK_TYPES = [
11+
'install',
12+
'init',
13+
'identify',
14+
'plan-capture',
15+
'capture',
16+
] as const;
17+
18+
export function buildSeedPrompt(): string {
19+
return `You are the orchestrator for a PostHog integration. Right now your only job is to plan the work and seed a task queue. Do NOT install anything, edit any files, or integrate PostHog yourself.
20+
21+
Take a quick look at the repository to confirm it is a real project. A brief glance, not a deep analysis.
22+
23+
Then seed the queue by calling enqueue_task five times. Each call returns a JSON object with an "id". Capture those ids so you can wire dependencies:
24+
25+
1. enqueue_task, type "install", no dependsOn.
26+
2. enqueue_task, type "init", no dependsOn. install and init are independent and can run together.
27+
3. enqueue_task, type "identify", dependsOn = [the install id, the init id].
28+
4. enqueue_task, type "plan-capture", dependsOn = [the install id, the init id]. identify and plan-capture are independent of each other.
29+
5. enqueue_task, type "capture", dependsOn = [the plan-capture id].
30+
31+
After all five are enqueued, you are done. Do NOT call complete_task. You are the orchestrator, not a task.`;
32+
}
33+
34+
export function buildStubPrompt(task: QueuedTask): string {
35+
return `You are a single, isolated task of type "${task.type}" in a PostHog integration. This is a DRY RUN: do not install anything, edit any files, or change the user's project in any way.
36+
37+
Your only job is to report completion. Call complete_task exactly once with:
38+
- status: "done"
39+
- handoff:
40+
- goals: one line on what a real "${task.type}" task would aim to do
41+
- did: a one-line note that this was a dry-run stub, no changes made
42+
- forNextAgent: anything the next task should know
43+
44+
Then stop.`;
45+
}

0 commit comments

Comments
 (0)