Skip to content

Commit 1fb8a70

Browse files
gewenyu99claude
andcommitted
feat(orchestrator): markdown-backed agent loader + full integration flow
The runner: a seed agent plans the task graph and an executor drains it, one fresh agent per task, independent tasks in parallel, the TUI rendering the queue — terminal skipped tasks crossed out, the agent's own task tools gated off so the queue is the sole progress surface. Both the WHAT (agent prompts: model, goal, success criteria, tools) and the HOW (mini-skills) are markdown served from context-mill, fetched once at startup into a per-flow registry: agents declare flow and seed in frontmatter, so audit and migration flows can ship their own sets without touching the wizard. Each task agent gets its own wizard-tools binding so complete_task attribution holds under parallelism. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent fa90573 commit 1fb8a70

11 files changed

Lines changed: 882 additions & 26 deletions

File tree

src/lib/agent/agent-interface.ts

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,12 @@ export type AgentConfig = {
265265
getPendingQuestion?: () =>
266266
| import('@lib/wizard-session').PendingQuestion
267267
| null;
268+
/**
269+
* Orchestrator queue context. Present only when the `wizard-orchestrator`
270+
* flag routes the run here; threaded into wizard-tools so the orchestrator
271+
* tools register.
272+
*/
273+
orchestrator?: import('@lib/programs/orchestrator/queue-tools').OrchestratorToolsContext;
268274
};
269275

270276
/**
@@ -286,6 +292,7 @@ export type StopHookResult =
286292
export function createStopHook(
287293
featureQueue: readonly AdditionalFeature[],
288294
signals?: AgentOutputSignals,
295+
requestRemark = true,
289296
): (input: { stop_hook_active: boolean }) => StopHookResult {
290297
let featureIndex = 0;
291298
let remarkRequested = false;
@@ -313,8 +320,9 @@ export function createStopHook(
313320
return { decision: 'block', reason: prompt };
314321
}
315322

316-
// Phase 2: collect remark (once)
317-
if (!remarkRequested) {
323+
// Phase 2: collect remark (once). Skipped when the caller opts out — the
324+
// orchestrator suppresses it per task so it does not fire on every agent.
325+
if (requestRemark && !remarkRequested) {
318326
remarkRequested = true;
319327
logToFile('Stop hook: requesting reflection');
320328
return {
@@ -655,8 +663,6 @@ export async function initializeAgent(
655663
logToFile('Agent initialization starting');
656664
logToFile('Install directory:', options.installDir);
657665

658-
getUI().log.step('Initializing Claude agent...');
659-
660666
try {
661667
// Configure LLM gateway environment variables (inherited by SDK subprocess)
662668
const gatewayUrl = getLlmGatewayUrlFromHost(config.posthogApiHost);
@@ -708,6 +714,7 @@ export async function initializeAgent(
708714
skillsBaseUrl: config.skillsBaseUrl,
709715
askBridge: config.askBridge,
710716
askMaxQuestions: config.askMaxQuestions,
717+
orchestrator: config.orchestrator,
711718
});
712719
mcpServers['wizard-tools'] = wizardToolsServer;
713720

@@ -747,8 +754,6 @@ export async function initializeAgent(
747754
});
748755
}
749756

750-
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
751-
getUI().log.success("Agent initialized. Let's get cooking!");
752757
return agentRunConfig;
753758
} catch (error) {
754759
getUI().log.error(
@@ -794,6 +799,8 @@ export async function runAgent(
794799
errorMessage?: string;
795800
additionalFeatureQueue?: readonly AdditionalFeature[];
796801
abortCases?: readonly AbortCaseMatcher[];
802+
/** Request the end-of-run reflection remark. Defaults to true. */
803+
requestRemark?: boolean;
797804
},
798805
middleware?: {
799806
onMessage(message: any): void;
@@ -1052,7 +1059,11 @@ export async function runAgent(
10521059
Stop: [
10531060
{
10541061
hooks: [
1055-
createStopHook(config?.additionalFeatureQueue ?? [], signals),
1062+
createStopHook(
1063+
config?.additionalFeatureQueue ?? [],
1064+
signals,
1065+
config?.requestRemark ?? true,
1066+
),
10561067
],
10571068
timeout: 30,
10581069
},
@@ -1100,6 +1111,7 @@ export async function runAgent(
11001111
signals,
11011112
receivedSuccessResult,
11021113
tasks,
1114+
isOrchestratorEnabled(agentConfig.wizardFlags ?? {}),
11031115
);
11041116

11051117
// [ABORT] detection: the skill emits "[ABORT] <reason>" when it
@@ -1433,6 +1445,9 @@ function handleSDKMessage(
14331445
signals: AgentOutputSignals,
14341446
receivedSuccessResult = false,
14351447
tasks?: Map<string, TaskEntry>,
1448+
// The orchestrator owns the TUI task panel (it renders its queue). Suppress the
1449+
// agent's own TaskCreate/TaskUpdate rendering so it does not clobber the queue.
1450+
suppressTaskRender = false,
14361451
): void {
14371452
// Map preserves insertion order (the order the agent created the tasks).
14381453
// Within that, group by status: completed first, then in_progress, then
@@ -1444,7 +1459,7 @@ function handleSDKMessage(
14441459
};
14451460
const rank = (status: string): number => STATUS_RANK[status] ?? 2;
14461461
const syncTasks = (): void => {
1447-
if (!tasks) return;
1462+
if (!tasks || suppressTaskRender) return;
14481463
const sorted = Array.from(tasks.values()).sort(
14491464
(a, b) => rank(a.status) - rank(b.status),
14501465
);

src/lib/agent/agent-runner.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@ import {
3131
AgentErrorType,
3232
AgentSignals,
3333
buildWizardMetadata,
34+
isOrchestratorEnabled,
3435
checkAllSettingsConflicts,
3536
backupAndFixClaudeSettings,
3637
restoreClaudeSettings,
3738
} from './agent-interface';
39+
import { runOrchestrator } from '../programs/orchestrator/orchestrator-runner';
3840
import { getCloudUrlFromRegion } from '@utils/urls';
3941
import {
4042
evaluateWizardReadiness,
@@ -43,7 +45,12 @@ import {
4345
getBlockingServiceKeys,
4446
SERVICE_LABELS,
4547
} from '@lib/health-checks/readiness';
46-
import { enableDebugLogs, initLogFile, logToFile } from '@utils/debug';
48+
import {
49+
enableDebugLogs,
50+
getLogFilePath,
51+
initLogFile,
52+
logToFile,
53+
} from '@utils/debug';
4754
import { createBenchmarkPipeline } from '@lib/middleware/benchmark';
4855
import { wizardAbort, WizardError, registerCleanup } from '@utils/wizard-abort';
4956
import { formatScanReport, writeScanReport } from '@lib/yara-hooks';
@@ -200,6 +207,11 @@ export async function runProgram(
200207
): Promise<void> {
201208
const boot = await bootstrapProgram(session, config, programConfig);
202209

210+
if (isOrchestratorEnabled(boot.wizardFlags)) {
211+
getUI().log.info('Task-queue orchestrator enabled.');
212+
return runOrchestrator(session, programConfig, boot);
213+
}
214+
203215
return runLinearProgram(session, config, programConfig, boot);
204216
}
205217

@@ -412,6 +424,7 @@ async function runLinearProgram(
412424
showQuestion: (q) => getUI().requestQuestion(q),
413425
});
414426

427+
getUI().log.step('Initializing Claude agent...');
415428
const agent = await initializeAgent(
416429
{
417430
workingDirectory: session.installDir,
@@ -433,6 +446,8 @@ async function runLinearProgram(
433446
},
434447
sessionToOptions(session),
435448
);
449+
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
450+
getUI().log.success("Agent initialized. Let's get cooking!");
436451

437452
const middleware = session.benchmark
438453
? createBenchmarkPipeline(spinner, sessionToOptions(session))
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
import * as fs from 'fs';
2+
import * as os from 'os';
3+
import * as path from 'path';
4+
import {
5+
agentRunTools,
6+
buildRegistry,
7+
parseAgentPrompt,
8+
resolveTask,
9+
type AgentPrompt,
10+
type AgentRegistry,
11+
} from '../agent-prompt-loader';
12+
import { QueueStore } from '../queue';
13+
14+
function tmpDir(): string {
15+
return fs.mkdtempSync(path.join(os.tmpdir(), 'agent-loader-test-'));
16+
}
17+
18+
function registryOf(prompts: AgentPrompt[]): AgentRegistry {
19+
return buildRegistry(
20+
prompts.map((p) => ({ ...p, flow: 'test-flow' })),
21+
'test-flow',
22+
);
23+
}
24+
25+
describe('parseAgentPrompt', () => {
26+
const sample = `---
27+
type: instrument-events
28+
model: claude-sonnet-4-6 # cheapest model that succeeds
29+
skills: [instrument-events]
30+
allowedTools: [Read, Edit, Grep, Glob, Bash]
31+
disallowedTools: [enqueue_task]
32+
dependsOn: [init]
33+
---
34+
35+
## Goal
36+
Add at least one capture call.
37+
`;
38+
39+
it('parses frontmatter scalars and inline arrays', () => {
40+
const p = parseAgentPrompt(sample, 'fallback');
41+
expect(p.type).toBe('instrument-events');
42+
expect(p.model).toBe('claude-sonnet-4-6');
43+
expect(p.skills).toEqual(['instrument-events']);
44+
expect(p.allowedTools).toEqual(['Read', 'Edit', 'Grep', 'Glob', 'Bash']);
45+
expect(p.disallowedTools).toEqual(['enqueue_task']);
46+
expect(p.dependsOn).toEqual(['init']);
47+
});
48+
49+
it('strips inline comments and keeps the body', () => {
50+
const p = parseAgentPrompt(sample, 'fallback');
51+
expect(p.model).not.toContain('#');
52+
expect(p.body).toContain('## Goal');
53+
expect(p.body).not.toContain('---');
54+
});
55+
56+
it('falls back to the menu id when type is omitted', () => {
57+
const p = parseAgentPrompt('---\nmodel: x\n---\nbody', 'install');
58+
expect(p.type).toBe('install');
59+
});
60+
61+
it('parses the flow from frontmatter', () => {
62+
const p = parseAgentPrompt('---\nflow: audit\n---\nx', 'fix-events');
63+
expect(p.flow).toBe('audit');
64+
});
65+
66+
it('marks the seed from frontmatter; everything else is a task', () => {
67+
expect(parseAgentPrompt('---\nseed: true\n---\nplan', 'planner').seed).toBe(
68+
true,
69+
);
70+
expect(parseAgentPrompt('---\nmodel: x\n---\nbody', 'install').seed).toBe(
71+
false,
72+
);
73+
});
74+
75+
it('defaults missing array fields to empty and model to undefined', () => {
76+
const p = parseAgentPrompt('no frontmatter at all', 'stub');
77+
expect(p.model).toBeUndefined();
78+
expect(p.skills).toEqual([]);
79+
expect(p.dependsOn).toEqual([]);
80+
expect(p.body).toBe('no frontmatter at all');
81+
});
82+
});
83+
84+
describe('agentRunTools', () => {
85+
it('MCP-qualifies orchestrator tools and passes native tools through', () => {
86+
const p = parseAgentPrompt(
87+
'---\nallowedTools: [Read, read_handoffs]\ndisallowedTools: [enqueue_task, complete_task, Bash]\n---\nx',
88+
't',
89+
);
90+
const { allowedTools, disallowedTools } = agentRunTools(p);
91+
expect(allowedTools).toEqual([
92+
'Read',
93+
'mcp__posthog-wizard__read_handoffs',
94+
]);
95+
expect(disallowedTools).toEqual([
96+
'mcp__posthog-wizard__enqueue_task',
97+
'mcp__posthog-wizard__complete_task',
98+
'Bash',
99+
]);
100+
});
101+
});
102+
103+
describe('buildRegistry', () => {
104+
const prompt = (over: Partial<AgentPrompt>): AgentPrompt => ({
105+
type: 'x',
106+
seed: false,
107+
skills: [],
108+
allowedTools: [],
109+
disallowedTools: [],
110+
dependsOn: [],
111+
body: 'b',
112+
...over,
113+
});
114+
115+
it('scopes to one flow and keeps the seed out of the task types', () => {
116+
const registry = buildRegistry(
117+
[
118+
prompt({ type: 'plan-audit', flow: 'audit', seed: true }),
119+
prompt({ type: 'fix-events', flow: 'audit' }),
120+
prompt({ type: 'install', flow: 'posthog-integration' }),
121+
prompt({ type: 'example' }),
122+
],
123+
'audit',
124+
);
125+
expect(registry.types).toEqual(['fix-events']);
126+
expect(registry.seed?.type).toBe('plan-audit');
127+
expect(registry.get('install')).toBeUndefined();
128+
// A flowless prompt (e.g. the documentation example) joins no registry.
129+
expect(registry.get('example')).toBeUndefined();
130+
});
131+
});
132+
133+
describe('resolveTask', () => {
134+
let dir: string;
135+
let store: QueueStore;
136+
137+
beforeEach(() => {
138+
dir = tmpDir();
139+
store = new QueueStore(dir, 'run-1');
140+
});
141+
142+
afterEach(() => {
143+
fs.rmSync(dir, { recursive: true, force: true });
144+
});
145+
146+
const prompt: AgentPrompt = {
147+
type: 'capture',
148+
seed: false,
149+
model: 'claude-haiku-4-5-20251001',
150+
skills: ['instrument-events'],
151+
allowedTools: ['Read', 'Edit'],
152+
disallowedTools: ['enqueue_task'],
153+
dependsOn: ['plan-capture'],
154+
body: '## Goal\nInstrument the planned events.',
155+
};
156+
157+
it('throws when no prompt is registered for the type', () => {
158+
const registry = registryOf([]);
159+
const task = { type: 'capture', dependsOn: [] } as never;
160+
expect(() => resolveTask(registry, task, store)).toThrow(/capture/);
161+
});
162+
163+
it('resolves model, tools, and skills from the prompt', () => {
164+
const registry = registryOf([prompt]);
165+
const task = store.enqueue({ type: 'capture' });
166+
const resolved = resolveTask(registry, task, store);
167+
expect(resolved.model).toBe('claude-haiku-4-5-20251001');
168+
expect(resolved.skills).toEqual(['instrument-events']);
169+
expect(resolved.disallowedTools).toEqual([
170+
'mcp__posthog-wizard__enqueue_task',
171+
]);
172+
});
173+
174+
it('prefers the enqueue model override over the prompt model', () => {
175+
const registry = registryOf([prompt]);
176+
const task = store.enqueue({ type: 'capture', model: 'override-x' });
177+
expect(resolveTask(registry, task, store).model).toBe('override-x');
178+
});
179+
180+
it("appends upstream dependencies' handoffs as context", () => {
181+
const registry = registryOf([prompt]);
182+
const dep = store.enqueue({ type: 'plan-capture' });
183+
store.complete(dep.id, {
184+
goals: 'decide events',
185+
did: 'picked signup and purchase',
186+
forNextAgent: 'instrument those two',
187+
});
188+
const task = store.enqueue({
189+
type: 'capture',
190+
dependsOn: [dep.id],
191+
});
192+
const resolved = resolveTask(registry, task, store);
193+
expect(resolved.prompt).toContain('Context from previous steps');
194+
expect(resolved.prompt).toContain('picked signup and purchase');
195+
expect(resolved.prompt).toContain('instrument those two');
196+
});
197+
198+
it('omits the context section when there are no handoffs', () => {
199+
const registry = registryOf([prompt]);
200+
const task = store.enqueue({ type: 'capture' });
201+
expect(resolveTask(registry, task, store).prompt).not.toContain(
202+
'Context from previous steps',
203+
);
204+
});
205+
});

0 commit comments

Comments
 (0)