Skip to content

Commit 6c2318d

Browse files
gewenyu99claude
andcommitted
feat(orchestrator): markdown-backed agent loader + full integration flow (#619)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 5679456 commit 6c2318d

11 files changed

Lines changed: 902 additions & 26 deletions

File tree

src/lib/agent/agent-interface.ts

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,12 @@ export type AgentConfig = {
147147
getPendingQuestion?: () =>
148148
| import('@lib/wizard-session').PendingQuestion
149149
| null;
150+
/**
151+
* Orchestrator queue context. Present only when the `wizard-orchestrator`
152+
* flag routes the run here; threaded into wizard-tools so the orchestrator
153+
* tools register.
154+
*/
155+
orchestrator?: import('@lib/programs/orchestrator/queue-tools').OrchestratorToolsContext;
150156
};
151157

152158
/**
@@ -168,6 +174,7 @@ export type StopHookResult =
168174
export function createStopHook(
169175
featureQueue: readonly AdditionalFeature[],
170176
signals?: AgentOutputSignals,
177+
requestRemark = true,
171178
): (input: { stop_hook_active: boolean }) => StopHookResult {
172179
let featureIndex = 0;
173180
let remarkRequested = false;
@@ -195,8 +202,9 @@ export function createStopHook(
195202
return { decision: 'block', reason: prompt };
196203
}
197204

198-
// Phase 2: collect remark (once)
199-
if (!remarkRequested) {
205+
// Phase 2: collect remark (once). Skipped when the caller opts out — the
206+
// orchestrator suppresses it per task so it does not fire on every agent.
207+
if (requestRemark && !remarkRequested) {
200208
remarkRequested = true;
201209
logToFile('Stop hook: requesting reflection');
202210
return {
@@ -537,8 +545,6 @@ export async function initializeAgent(
537545
logToFile('Agent initialization starting');
538546
logToFile('Install directory:', options.installDir);
539547

540-
getUI().log.step('Initializing Claude agent...');
541-
542548
try {
543549
// Configure LLM gateway environment variables (inherited by SDK subprocess)
544550
const gatewayUrl = getLlmGatewayUrlFromHost(config.posthogApiHost);
@@ -590,6 +596,7 @@ export async function initializeAgent(
590596
skillsBaseUrl: config.skillsBaseUrl,
591597
askBridge: config.askBridge,
592598
askMaxQuestions: config.askMaxQuestions,
599+
orchestrator: config.orchestrator,
593600
});
594601
mcpServers['wizard-tools'] = wizardToolsServer;
595602

@@ -624,8 +631,6 @@ export async function initializeAgent(
624631
});
625632
}
626633

627-
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
628-
getUI().log.success("Agent initialized. Let's get cooking!");
629634
return agentRunConfig;
630635
} catch (error) {
631636
getUI().log.error(
@@ -671,6 +676,8 @@ export async function runAgent(
671676
errorMessage?: string;
672677
additionalFeatureQueue?: readonly AdditionalFeature[];
673678
abortCases?: readonly AbortCaseMatcher[];
679+
/** Request the end-of-run reflection remark. Defaults to true. */
680+
requestRemark?: boolean;
674681
},
675682
middleware?: {
676683
onMessage(message: any): void;
@@ -930,7 +937,11 @@ export async function runAgent(
930937
Stop: [
931938
{
932939
hooks: [
933-
createStopHook(config?.additionalFeatureQueue ?? [], signals),
940+
createStopHook(
941+
config?.additionalFeatureQueue ?? [],
942+
signals,
943+
config?.requestRemark ?? true,
944+
),
934945
],
935946
timeout: 30,
936947
},
@@ -978,6 +989,7 @@ export async function runAgent(
978989
signals,
979990
receivedSuccessResult,
980991
tasks,
992+
isOrchestratorEnabled(agentConfig.wizardFlags ?? {}),
981993
);
982994

983995
// [ABORT] detection: the skill emits "[ABORT] <reason>" when it
@@ -1327,6 +1339,9 @@ function handleSDKMessage(
13271339
signals: AgentOutputSignals,
13281340
receivedSuccessResult = false,
13291341
tasks?: Map<string, TaskEntry>,
1342+
// The orchestrator owns the TUI task panel (it renders its queue). Suppress the
1343+
// agent's own TaskCreate/TaskUpdate rendering so it does not clobber the queue.
1344+
suppressTaskRender = false,
13301345
): void {
13311346
// Map preserves insertion order (the order the agent created the tasks).
13321347
// Within that, group by status: completed first, then in_progress, then
@@ -1338,7 +1353,7 @@ function handleSDKMessage(
13381353
};
13391354
const rank = (status: string): number => STATUS_RANK[status] ?? 2;
13401355
const syncTasks = (): void => {
1341-
if (!tasks) return;
1356+
if (!tasks || suppressTaskRender) return;
13421357
const sorted = Array.from(tasks.values()).sort(
13431358
(a, b) => rank(a.status) - rank(b.status),
13441359
);

src/lib/agent/agent-runner.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@ import {
3131
AgentErrorType,
3232
AgentSignals,
3333
buildWizardMetadata,
34+
isOrchestratorEnabled,
3435
} from './agent-interface';
3536
import {
3637
checkAllSettingsConflicts,
3738
backupAndFixClaudeSettings,
3839
restoreClaudeSettings,
3940
} from './claude-settings';
41+
import { runOrchestrator } from '../programs/orchestrator/orchestrator-runner';
4042
import { getCloudUrlFromRegion } from '@utils/urls';
4143
import {
4244
evaluateWizardReadiness,
@@ -45,7 +47,12 @@ import {
4547
getBlockingServiceKeys,
4648
SERVICE_LABELS,
4749
} from '@lib/health-checks/readiness';
48-
import { enableDebugLogs, initLogFile, logToFile } from '@utils/debug';
50+
import {
51+
enableDebugLogs,
52+
getLogFilePath,
53+
initLogFile,
54+
logToFile,
55+
} from '@utils/debug';
4956
import { createBenchmarkPipeline } from '@lib/middleware/benchmark';
5057
import { wizardAbort, WizardError, registerCleanup } from '@utils/wizard-abort';
5158
import { formatScanReport, writeScanReport } from '@lib/yara-hooks';
@@ -209,6 +216,11 @@ export async function runProgram(
209216
): Promise<void> {
210217
const boot = await bootstrapProgram(session, config, programConfig);
211218

219+
if (isOrchestratorEnabled(boot.wizardFlags)) {
220+
getUI().log.info('Task-queue orchestrator enabled.');
221+
return runOrchestrator(session, programConfig, boot);
222+
}
223+
212224
return runLinearProgram(session, config, programConfig, boot);
213225
}
214226

@@ -445,6 +457,7 @@ async function runLinearProgram(
445457
timeoutMs: config.askTimeoutMs,
446458
});
447459

460+
getUI().log.step('Initializing Claude agent...');
448461
const agent = await initializeAgent(
449462
{
450463
workingDirectory: session.installDir,
@@ -466,6 +479,8 @@ async function runLinearProgram(
466479
},
467480
sessionToOptions(session),
468481
);
482+
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
483+
getUI().log.success("Agent initialized. Let's get cooking!");
469484

470485
logToFile('[agent-runner] agent initialized');
471486

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
import * as fs from 'fs';
2+
import * as os from 'os';
3+
import * as path from 'path';
4+
import {
5+
agentRunTools,
6+
buildRegistry,
7+
parseAgentPrompt,
8+
resolveTask,
9+
type AgentPrompt,
10+
type AgentRegistry,
11+
} from '../agent-prompt-loader';
12+
import { QueueStore } from '../queue';
13+
14+
function tmpDir(): string {
15+
return fs.mkdtempSync(path.join(os.tmpdir(), 'agent-loader-test-'));
16+
}
17+
18+
function registryOf(prompts: AgentPrompt[]): AgentRegistry {
19+
return buildRegistry(
20+
prompts.map((p) => ({ ...p, flow: 'test-flow' })),
21+
'test-flow',
22+
);
23+
}
24+
25+
describe('parseAgentPrompt', () => {
26+
const sample = `---
27+
type: instrument-events
28+
model: claude-sonnet-4-6 # cheapest model that succeeds
29+
skills: [instrument-events]
30+
allowedTools: [Read, Edit, Grep, Glob, Bash]
31+
disallowedTools: [enqueue_task]
32+
dependsOn: [init]
33+
---
34+
35+
## Goal
36+
Add at least one capture call.
37+
`;
38+
39+
it('parses frontmatter scalars and inline arrays', () => {
40+
const p = parseAgentPrompt(sample, 'fallback');
41+
expect(p.type).toBe('instrument-events');
42+
expect(p.model).toBe('claude-sonnet-4-6');
43+
expect(p.skills).toEqual(['instrument-events']);
44+
expect(p.allowedTools).toEqual(['Read', 'Edit', 'Grep', 'Glob', 'Bash']);
45+
expect(p.disallowedTools).toEqual(['enqueue_task']);
46+
expect(p.dependsOn).toEqual(['init']);
47+
});
48+
49+
it('strips inline comments and keeps the body', () => {
50+
const p = parseAgentPrompt(sample, 'fallback');
51+
expect(p.model).not.toContain('#');
52+
expect(p.body).toContain('## Goal');
53+
expect(p.body).not.toContain('---');
54+
});
55+
56+
it('falls back to the menu id when type is omitted', () => {
57+
const p = parseAgentPrompt('---\nmodel: x\n---\nbody', 'install');
58+
expect(p.type).toBe('install');
59+
});
60+
61+
it('parses the flow from frontmatter', () => {
62+
const p = parseAgentPrompt('---\nflow: audit\n---\nx', 'fix-events');
63+
expect(p.flow).toBe('audit');
64+
});
65+
66+
it('marks the seed from frontmatter; everything else is a task', () => {
67+
expect(parseAgentPrompt('---\nseed: true\n---\nplan', 'planner').seed).toBe(
68+
true,
69+
);
70+
expect(parseAgentPrompt('---\nmodel: x\n---\nbody', 'install').seed).toBe(
71+
false,
72+
);
73+
});
74+
75+
it('defaults missing array fields to empty and model to undefined', () => {
76+
const p = parseAgentPrompt('no frontmatter at all', 'stub');
77+
expect(p.model).toBeUndefined();
78+
expect(p.skills).toEqual([]);
79+
expect(p.dependsOn).toEqual([]);
80+
expect(p.body).toBe('no frontmatter at all');
81+
});
82+
});
83+
84+
describe('agentRunTools', () => {
85+
it('MCP-qualifies orchestrator tools and passes native tools through', () => {
86+
const p = parseAgentPrompt(
87+
'---\nallowedTools: [Read, read_handoffs]\ndisallowedTools: [enqueue_task, complete_task, Bash]\n---\nx',
88+
't',
89+
);
90+
const { allowedTools, disallowedTools } = agentRunTools(p);
91+
expect(allowedTools).toEqual([
92+
'Read',
93+
'mcp__posthog-wizard__read_handoffs',
94+
]);
95+
expect(disallowedTools).toEqual([
96+
'mcp__posthog-wizard__enqueue_task',
97+
'mcp__posthog-wizard__complete_task',
98+
'Bash',
99+
]);
100+
});
101+
});
102+
103+
describe('buildRegistry', () => {
104+
const prompt = (over: Partial<AgentPrompt>): AgentPrompt => ({
105+
type: 'x',
106+
seed: false,
107+
skills: [],
108+
allowedTools: [],
109+
disallowedTools: [],
110+
dependsOn: [],
111+
body: 'b',
112+
...over,
113+
});
114+
115+
it('scopes to one flow and keeps the seed out of the task types', () => {
116+
const registry = buildRegistry(
117+
[
118+
prompt({ type: 'plan-audit', flow: 'audit', seed: true }),
119+
prompt({ type: 'fix-events', flow: 'audit' }),
120+
prompt({ type: 'install', flow: 'posthog-integration' }),
121+
prompt({ type: 'example' }),
122+
],
123+
'audit',
124+
);
125+
expect(registry.types).toEqual(['fix-events']);
126+
expect(registry.seed?.type).toBe('plan-audit');
127+
expect(registry.get('install')).toBeUndefined();
128+
// A flowless prompt (e.g. the documentation example) joins no registry.
129+
expect(registry.get('example')).toBeUndefined();
130+
});
131+
});
132+
133+
describe('resolveTask', () => {
134+
let dir: string;
135+
let store: QueueStore;
136+
137+
beforeEach(() => {
138+
dir = tmpDir();
139+
store = new QueueStore(dir, 'run-1');
140+
});
141+
142+
afterEach(() => {
143+
fs.rmSync(dir, { recursive: true, force: true });
144+
});
145+
146+
const prompt: AgentPrompt = {
147+
type: 'capture',
148+
seed: false,
149+
model: 'claude-haiku-4-5-20251001',
150+
skills: ['instrument-events'],
151+
allowedTools: ['Read', 'Edit'],
152+
disallowedTools: ['enqueue_task'],
153+
dependsOn: ['plan-capture'],
154+
body: '## Goal\nInstrument the planned events.',
155+
};
156+
157+
it('throws when no prompt is registered for the type', () => {
158+
const registry = registryOf([]);
159+
const task = { type: 'capture', dependsOn: [] } as never;
160+
expect(() => resolveTask(registry, task, store)).toThrow(/capture/);
161+
});
162+
163+
it('resolves model, tools, and skills from the prompt', () => {
164+
const registry = registryOf([prompt]);
165+
const task = store.enqueue({ type: 'capture' });
166+
const resolved = resolveTask(registry, task, store);
167+
expect(resolved.model).toBe('claude-haiku-4-5-20251001');
168+
expect(resolved.skills).toEqual(['instrument-events']);
169+
expect(resolved.disallowedTools).toEqual([
170+
'mcp__posthog-wizard__enqueue_task',
171+
]);
172+
});
173+
174+
it('prefers the enqueue model override over the prompt model', () => {
175+
const registry = registryOf([prompt]);
176+
const task = store.enqueue({ type: 'capture', model: 'override-x' });
177+
expect(resolveTask(registry, task, store).model).toBe('override-x');
178+
});
179+
180+
it("appends upstream dependencies' handoffs as context", () => {
181+
const registry = registryOf([prompt]);
182+
const dep = store.enqueue({ type: 'plan-capture' });
183+
store.complete(dep.id, {
184+
goals: 'decide events',
185+
did: 'picked signup and purchase',
186+
forNextAgent: 'instrument those two',
187+
});
188+
const task = store.enqueue({
189+
type: 'capture',
190+
dependsOn: [dep.id],
191+
});
192+
const resolved = resolveTask(registry, task, store);
193+
expect(resolved.prompt).toContain('Context from previous steps');
194+
expect(resolved.prompt).toContain('picked signup and purchase');
195+
expect(resolved.prompt).toContain('instrument those two');
196+
});
197+
198+
it('omits the context section when there are no handoffs', () => {
199+
const registry = registryOf([prompt]);
200+
const task = store.enqueue({ type: 'capture' });
201+
expect(resolveTask(registry, task, store).prompt).not.toContain(
202+
'Context from previous steps',
203+
);
204+
});
205+
});

0 commit comments

Comments
 (0)