feat(orchestrator): walking skeleton runner

gewenyu99 · claude · gewenyu99 · commit fea70a9e34c2 · 2026-06-10T11:56:02.000-04:00
Seed agent enqueues a parallel-branch task graph (install/init, then identify and plan-capture, then capture); the executor drains it one fresh agent per task as dry-run stubs; the TUI renders the queue.

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/lib/agent/agent-interface.ts b/src/lib/agent/agent-interface.ts
@@ -265,6 +265,11 @@ export type AgentConfig = {
   getPendingQuestion?: () =>
     | import('@lib/wizard-session').PendingQuestion
     | null;
+  /**
+   * Orchestrator queue context. Present only on the experimental `orchestrator`
+   * variant; threaded into wizard-tools so the orchestrator tools register.
+   */
+  orchestrator?: import('@lib/programs/orchestrator/queue-tools').OrchestratorToolsContext;
 };
 
 /**
@@ -708,6 +713,7 @@ export async function initializeAgent(
       skillsBaseUrl: config.skillsBaseUrl,
       askBridge: config.askBridge,
       askMaxQuestions: config.askMaxQuestions,
+      orchestrator: config.orchestrator,
     });
     mcpServers['wizard-tools'] = wizardToolsServer;
 
@@ -747,8 +753,6 @@ export async function initializeAgent(
       });
     }
 
-    getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
-    getUI().log.success("Agent initialized. Let's get cooking!");
     return agentRunConfig;
   } catch (error) {
     getUI().log.error(
diff --git a/src/lib/agent/agent-runner.ts b/src/lib/agent/agent-runner.ts
@@ -45,7 +45,12 @@ import {
   getBlockingServiceKeys,
   SERVICE_LABELS,
 } from '@lib/health-checks/readiness';
-import { enableDebugLogs, initLogFile, logToFile } from '@utils/debug';
+import {
+  enableDebugLogs,
+  getLogFilePath,
+  initLogFile,
+  logToFile,
+} from '@utils/debug';
 import { createBenchmarkPipeline } from '@lib/middleware/benchmark';
 import { wizardAbort, WizardError, registerCleanup } from '@utils/wizard-abort';
 import { formatScanReport, writeScanReport } from '@lib/yara-hooks';
@@ -439,6 +444,8 @@ async function runLinearProgram(
     },
     sessionToOptions(session),
   );
+  getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
+  getUI().log.success("Agent initialized. Let's get cooking!");
 
   const middleware = session.benchmark
     ? createBenchmarkPipeline(spinner, sessionToOptions(session))
diff --git a/src/lib/programs/orchestrator/orchestrator-runner.ts b/src/lib/programs/orchestrator/orchestrator-runner.ts
@@ -1,33 +1,193 @@
 /**
  * Experimental task-queue orchestrator runner.
  *
- * Branches from the linear runner when the `wizard-orchestrator` feature flag is
- * on. The shape: an orchestrator agent inspects the repo and seeds an
- * in-memory task queue, and an executor drains it one fresh agent per task.
+ * Branches from the linear runner when the `wizard-orchestrator` flag is on. An
+ * orchestrator agent inspects the repo and seeds an in-memory task queue; an
+ * executor drains it, running one fresh agent per task.
  *
- * This is the stub. It logs, emits a start event, and returns. The queue, the
- * executor, and the seeding agent land in the following issues.
+ * This is the walking skeleton. The seed agent plans a task graph, and each task
+ * is a dry-run stub that only reports a handoff, so the whole pipeline runs
+ * end-to-end without touching the user's project. Real per-task work, served from
+ * context-mill as `agents`, replaces the stub prompts later.
  */
-import type { WizardSession } from '../../wizard-session';
-import type { ProgramConfig } from '../program-step';
-import type { BootstrapResult } from '../../agent/agent-runner';
+import { randomUUID } from 'crypto';
+import {
+  initializeAgent,
+  runAgent,
+  type AgentConfig,
+} from '../../agent/agent-interface';
+import { OutroKind, type WizardSession } from '../../wizard-session';
+import { detectNodePackageManagers } from '../../detection/package-manager';
 import { getUI } from '../../../ui';
-import { logToFile } from '../../../utils/debug';
 import { analytics } from '../../../utils/analytics';
+import { logToFile } from '../../../utils/debug';
+import type { ProgramConfig } from '../program-step';
+import type { BootstrapResult } from '../../agent/agent-runner';
+import type { WizardRunOptions } from '../../../utils/types';
+import { QueueStore, type TaskStatus } from './queue';
+import { drainQueue, type RunTask } from './executor';
+import {
+  SKELETON_TASK_TYPES,
+  buildSeedPrompt,
+  buildStubPrompt,
+} from './skeleton-prompts';
+
+/** The seed plans the graph, so it gets a stronger model; stub tasks are cheap. */
+const SEED_MODEL = 'claude-sonnet-4-6';
+const STUB_MODEL = 'claude-haiku-4-5-20251001';
+/** The skeleton never edits the user's project. */
+const NO_EDIT_TOOLS = ['Write', 'Edit', 'Bash'] as const;
 
-export function runOrchestrator(
+function toTodoStatus(status: TaskStatus): string {
+  switch (status) {
+    case 'in_progress':
+      return 'in_progress';
+    case 'done':
+    case 'failed':
+      return 'completed';
+    default:
+      return 'pending';
+  }
+}
+
+function sessionRunOptions(session: WizardSession): WizardRunOptions {
+  return {
+    installDir: session.installDir,
+    debug: session.debug,
+    default: false,
+    signup: session.signup,
+    localMcp: session.localMcp,
+    ci: session.ci,
+    benchmark: session.benchmark,
+    projectId: session.projectId,
+    apiKey: session.apiKey,
+    yaraReport: session.yaraReport,
+  };
+}
+
+export async function runOrchestrator(
   session: WizardSession,
   programConfig: ProgramConfig,
-  _boot: BootstrapResult,
+  boot: BootstrapResult,
 ): Promise<void> {
+  const runId = randomUUID();
+  const store = new QueueStore(session.installDir, runId);
+
+  const options = sessionRunOptions(session);
+
   logToFile(
-    `[orchestrator] START program=${programConfig.id} dir=${session.installDir}`,
+    `[orchestrator] START program=${programConfig.id} dir=${session.installDir} run=${runId}`,
   );
   analytics.wizardCapture('orchestrator started', {
     program_id: programConfig.id,
   });
-  getUI().log.info(
-    'Orchestrator flag is on. This runner is a stub for now; the queue and executor land in the following issues.',
+  getUI().startRun();
+
+  const renderQueue = () =>
+    getUI().syncTodos(
+      store.list().map((t) => ({
+        content: t.type,
+        status: toTodoStatus(t.status),
+        activeForm: `Running ${t.type}`,
+      })),
+    );
+
+  // Each agent gets its own config so its wizard-tools server is bound to the
+  // task it runs — independent tasks run in parallel, and attribution of
+  // complete_task / enqueue_task must hold per agent. The seed is not a task,
+  // so its context has no task id.
+  const agentConfigFor = (currentTaskId?: string): AgentConfig => ({
+    workingDirectory: session.installDir,
+    posthogMcpUrl: boot.mcpUrl,
+    posthogApiKey: boot.accessToken,
+    posthogApiHost: boot.host,
+    detectPackageManager: detectNodePackageManagers,
+    skillsBaseUrl: boot.skillsBaseUrl,
+    wizardFlags: boot.wizardFlags,
+    // Tag agent events as orchestrator so telemetry segments from the baseline.
+    wizardMetadata: { ...boot.wizardMetadata, VARIANT: 'orchestrator' },
+    integrationLabel: programConfig.id,
+    orchestrator: {
+      store,
+      validTypes: SKELETON_TASK_TYPES,
+      currentTaskId,
+    },
+  });
+
+  const spinner = getUI().spinner();
+
+  // 1. Seed the queue with the orchestrator agent.
+  const seedAgent = await initializeAgent(agentConfigFor(), options);
+  const seedResult = await runAgent(
+    { ...seedAgent, model: SEED_MODEL, disallowedTools: [...NO_EDIT_TOOLS] },
+    buildSeedPrompt(),
+    options,
+    spinner,
+    {
+      spinnerMessage: 'Planning the integration...',
+      successMessage: 'Planned the integration',
+      additionalFeatureQueue: [],
+    },
   );
-  return Promise.resolve();
+  if (seedResult.error) {
+    logToFile(
+      `[orchestrator] seed error: ${seedResult.error} ${
+        seedResult.message ?? ''
+      }`,
+    );
+  }
+  analytics.wizardCapture('orchestrator seeded', {
+    task_count: store.list().length,
+    types: store.list().map((t) => t.type),
+  });
+  renderQueue();
+
+  // 2. Drain the queue, one fresh agent per task. Independent tasks run in
+  // parallel — the graph the seed planned is the only schedule.
+  const runTask: RunTask = async (task) => {
+    renderQueue();
+    const agent = await initializeAgent(agentConfigFor(task.id), options);
+    try {
+      await runAgent(
+        {
+          ...agent,
+          model: task.model ?? STUB_MODEL,
+          disallowedTools: [...NO_EDIT_TOOLS],
+        },
+        buildStubPrompt(task),
+        options,
+        spinner,
+        {
+          spinnerMessage: `Running ${task.type}...`,
+          successMessage: `${task.type} done`,
+          additionalFeatureQueue: [],
+        },
+      );
+    } finally {
+      renderQueue();
+    }
+  };
+  await drainQueue(store, runTask);
+
+  renderQueue();
+
+  const summary = store.summary();
+  logToFile(
+    `[orchestrator] DONE done=${summary.done} failed=${summary.failed} total=${summary.total}`,
+  );
+  analytics.wizardCapture('orchestrator run finished', {
+    tasks_total: summary.total,
+    tasks_done: summary.done,
+    tasks_failed: summary.failed,
+  });
+
+  const message = `Orchestrator dry run finished: ${summary.done}/${summary.total} tasks completed.`;
+  getUI().setOutroData({
+    kind: OutroKind.Success,
+    message,
+    reportFile: store.queuePath,
+    docsUrl: 'https://posthog.com/docs/ai-engineering/ai-wizard',
+  });
+  getUI().outro(message);
+  await analytics.shutdown('success');
 }
diff --git a/src/lib/programs/orchestrator/skeleton-prompts.ts b/src/lib/programs/orchestrator/skeleton-prompts.ts
@@ -0,0 +1,45 @@
+/**
+ * Inline prompts for the walking skeleton. They prove the seed -> drain pipeline
+ * without touching the user's project: the seed agent plans a task graph, and each
+ * task is a dry-run stub that only reports a handoff. The real agent prompts live
+ * in context-mill as the `agents` content type and replace these later.
+ */
+import type { QueuedTask } from './queue';
+
+/** Task types the skeleton seeds. Mirrors the real integration graph. */
+export const SKELETON_TASK_TYPES = [
+  'install',
+  'init',
+  'identify',
+  'plan-capture',
+  'capture',
+] as const;
+
+export function buildSeedPrompt(): string {
+  return `You are the orchestrator for a PostHog integration. Right now your only job is to plan the work and seed a task queue. Do NOT install anything, edit any files, or integrate PostHog yourself.
+
+Take a quick look at the repository to confirm it is a real project. A brief glance, not a deep analysis.
+
+Then seed the queue by calling enqueue_task five times. Each call returns a JSON object with an "id". Capture those ids so you can wire dependencies:
+
+1. enqueue_task, type "install", no dependsOn.
+2. enqueue_task, type "init", no dependsOn. install and init are independent and can run together.
+3. enqueue_task, type "identify", dependsOn = [the install id, the init id].
+4. enqueue_task, type "plan-capture", dependsOn = [the install id, the init id]. identify and plan-capture are independent of each other.
+5. enqueue_task, type "capture", dependsOn = [the plan-capture id].
+
+After all five are enqueued, you are done. Do NOT call complete_task. You are the orchestrator, not a task.`;
+}
+
+export function buildStubPrompt(task: QueuedTask): string {
+  return `You are a single, isolated task of type "${task.type}" in a PostHog integration. This is a DRY RUN: do not install anything, edit any files, or change the user's project in any way.
+
+Your only job is to report completion. Call complete_task exactly once with:
+- status: "done"
+- handoff:
+  - goals: one line on what a real "${task.type}" task would aim to do
+  - did: a one-line note that this was a dry-run stub, no changes made
+  - forNextAgent: anything the next task should know
+
+Then stop.`;
+}