Skip to content

Commit ab4eb35

Browse files
garrytanclaude
andcommitted
test(gbrain): periodic E2E for /setup-gbrain Path 4 + Step 4.5 Yes flow
End-to-end coverage of the new opt-in question via runAgentSdkTest. Stubs the MCP endpoint at /tools/list with a 200 response carrying a fake gbrain v0.32.3.0 serverInfo, and fakes the gbrain + claude CLIs so init writes a PGLite config and mcp add succeeds. Asserts the model: 1. invokes gstack-gbrain-install (Step 4.5 Yes branch) 2. invokes `gbrain init --pglite --json` 3. writes a working ~/.gbrain/config.json with engine=pglite 4. registers the remote MCP via `claude mcp add --transport http` 5. never leaks the bearer token to CLAUDE.md Classified as periodic-tier per plan D6 (codex #12 flagged AgentSDK flakiness; gate-tier coverage of the split-engine behavior lives in the deterministic unit tests at gbrain-local-status.test.ts and gbrain-sync-skip.test.ts). Touchfile fires the test when the skill template, install/verify/init helpers, the local-status classifier, or the agent-sdk-runner harness changes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 7a920ff commit ab4eb35

2 files changed

Lines changed: 269 additions & 0 deletions

File tree

test/helpers/touchfiles.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
157157
// or the detect script changes.
158158
'setup-gbrain-remote': ['setup-gbrain/SKILL.md.tmpl', 'bin/gstack-gbrain-mcp-verify', 'bin/gstack-artifacts-init', 'bin/gstack-gbrain-detect', 'test/helpers/agent-sdk-runner.ts'],
159159
'setup-gbrain-bad-token': ['setup-gbrain/SKILL.md.tmpl', 'bin/gstack-gbrain-mcp-verify', 'test/helpers/agent-sdk-runner.ts'],
160+
// v1.34.0.0 split-engine Path 4 + Step 4.5 Yes (local PGLite for code).
161+
// Periodic-tier per codex #12 (AgentSDK harness is non-deterministic).
162+
// Fires when the setup-gbrain template, install/verify/init helpers, or
163+
// the agent-sdk-runner harness changes.
164+
'setup-gbrain-path4-local-pglite': ['setup-gbrain/SKILL.md.tmpl', 'bin/gstack-gbrain-mcp-verify', 'bin/gstack-gbrain-install', 'bin/gstack-gbrain-detect', 'lib/gbrain-local-status.ts', 'test/helpers/agent-sdk-runner.ts'],
160165

161166
// AskUserQuestion format regression (RECOMMENDATION + Completeness: N/10)
162167
// Fires when either template OR the two preamble resolvers change.
@@ -471,6 +476,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
471476
// model's behavior against a stub MCP server.
472477
'setup-gbrain-remote': 'periodic',
473478
'setup-gbrain-bad-token': 'periodic',
479+
'setup-gbrain-path4-local-pglite': 'periodic',
474480

475481
// AskUserQuestion format regression — periodic (Opus 4.7 non-deterministic benchmark)
476482
'plan-ceo-review-format-mode': 'periodic',
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
// E2E: /setup-gbrain Path 4 with Step 4.5 "Yes" — local PGLite for code search.
2+
//
3+
// Drives the skill against a stub HTTP MCP server (200 OK on tools/list).
4+
// Auto-answers AskUserQuestion to pick:
5+
// - Path 4 at Step 2 (Remote gbrain MCP)
6+
// - "Yes, set up local PGLite for code" at Step 4.5
7+
//
8+
// Asserts that the model:
9+
// 1. ran the verify helper successfully (got past Step 4c)
10+
// 2. invoked gstack-gbrain-install (Step 4.5 Yes branch)
11+
// 3. invoked `gbrain init --pglite --json` (also Step 4.5 Yes branch)
12+
// 4. registered the remote MCP via claude mcp add --transport http
13+
// 5. wrote a "Code search ..... OK local-pglite" row to the Step 10 verdict
14+
//
15+
// Periodic-tier (codex #12: AgentSDK harness is non-deterministic; gate-tier
16+
// coverage of the split-engine behavior lives in the deterministic unit
17+
// tests at gbrain-local-status.test.ts, gbrain-sync-skip.test.ts, etc).
18+
//
19+
// Cost: ~$0.50-$1.00 per run. Periodic-tier (EVALS=1 EVALS_TIER=periodic).
20+
21+
import { describe, test, expect } from 'bun:test';
22+
import * as fs from 'fs';
23+
import * as os from 'os';
24+
import * as path from 'path';
25+
import * as http from 'http';
26+
import {
27+
runAgentSdkTest,
28+
passThroughNonAskUserQuestion,
29+
resolveClaudeBinary,
30+
} from './helpers/agent-sdk-runner';
31+
32+
const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'periodic';
33+
const describeE2E = shouldRun ? describe : describe.skip;
34+
35+
/**
36+
* Minimal stub MCP server that returns success on initialize / tools/list.
37+
* Verify helper calls /tools/list with a Bearer header and inspects the body.
38+
*/
39+
function startStubMcp(): Promise<{ url: string; close: () => Promise<void> }> {
40+
return new Promise((resolve) => {
41+
const server = http.createServer((req, res) => {
42+
let body = '';
43+
req.on('data', (c) => (body += c));
44+
req.on('end', () => {
45+
res.statusCode = 200;
46+
res.setHeader('Content-Type', 'text/event-stream');
47+
// Try to be useful: respond with a fake initialize + tools/list payload.
48+
let payload: unknown = { jsonrpc: '2.0', id: 1, result: { tools: [] } };
49+
try {
50+
const req = JSON.parse(body);
51+
if (req.method === 'initialize') {
52+
payload = {
53+
jsonrpc: '2.0',
54+
id: req.id,
55+
result: {
56+
protocolVersion: '2024-11-05',
57+
capabilities: { tools: {} },
58+
serverInfo: { name: 'gbrain', version: '0.32.3.0' },
59+
},
60+
};
61+
}
62+
} catch {
63+
// ignore parse failure; default payload
64+
}
65+
res.end(`event: message\ndata: ${JSON.stringify(payload)}\n\n`);
66+
});
67+
});
68+
server.listen(0, '127.0.0.1', () => {
69+
const addr = server.address();
70+
if (!addr || typeof addr === 'string') throw new Error('no address');
71+
resolve({
72+
url: `http://127.0.0.1:${addr.port}/mcp`,
73+
close: () => new Promise((r) => server.close(() => r())),
74+
});
75+
});
76+
});
77+
}
78+
79+
/**
80+
* Fake gbrain CLI:
81+
* - --version → echoes a version
82+
* - init --pglite --json → writes a pglite config, exits 0
83+
* - everything else → exits 0 quietly
84+
*
85+
* Logs every invocation so we can assert init was called.
86+
*/
87+
function makeFakeGbrain(binDir: string, gbrainConfigPath: string): string {
88+
const callLog = path.join(binDir, 'gbrain-calls.log');
89+
const script = `#!/bin/bash
90+
echo "gbrain $@" >> "${callLog}"
91+
case "$1 $2" in
92+
"--version "*) echo "gbrain 0.33.1.0"; exit 0 ;;
93+
"init --pglite") cat > "${gbrainConfigPath}" <<JSON
94+
{"engine":"pglite","database_url":"pglite:///fake"}
95+
JSON
96+
echo '{"status":"ok","engine":"pglite"}'
97+
exit 0 ;;
98+
esac
99+
exit 0
100+
`;
101+
fs.writeFileSync(path.join(binDir, 'gbrain'), script, { mode: 0o755 });
102+
return callLog;
103+
}
104+
105+
/**
106+
* Fake `claude` CLI for mcp add/remove/get/list. Logs every call so we can
107+
* assert remote MCP registration happened.
108+
*/
109+
function makeFakeClaude(binDir: string): string {
110+
const callLog = path.join(binDir, 'claude-calls.log');
111+
const script = `#!/bin/bash
112+
echo "claude $@" >> "${callLog}"
113+
case "$1 $2" in
114+
"mcp add") exit 0 ;;
115+
"mcp list") echo "gbrain: http://stub/mcp (HTTP) — connected" ; exit 0 ;;
116+
"mcp remove") exit 0 ;;
117+
"mcp get") echo '{"type":"http","url":"http://stub/mcp"}'; exit 0 ;;
118+
esac
119+
exit 0
120+
`;
121+
fs.writeFileSync(path.join(binDir, 'claude'), script, { mode: 0o755 });
122+
return callLog;
123+
}
124+
125+
/**
126+
* Fake gstack-gbrain-install so we don't actually clone the gbrain repo +
127+
* bun-link. The test only cares that the skill INVOKED it on the Yes branch.
128+
*/
129+
function makeFakeInstall(binDir: string): string {
130+
const callLog = path.join(binDir, 'install-calls.log');
131+
const script = `#!/bin/bash
132+
echo "install $@" >> "${callLog}"
133+
exit 0
134+
`;
135+
fs.writeFileSync(path.join(binDir, 'gstack-gbrain-install'), script, {
136+
mode: 0o755,
137+
});
138+
return callLog;
139+
}
140+
141+
describeE2E('/setup-gbrain Path 4 + Step 4.5 Yes → local PGLite for code', () => {
142+
test('opt-in flow invokes install + gbrain init + remote MCP register', async () => {
143+
const stubServer = await startStubMcp();
144+
const sandboxHome = fs.mkdtempSync(path.join(os.tmpdir(), 'path4-pglite-'));
145+
const fakeBinDir = fs.mkdtempSync(path.join(os.tmpdir(), 'path4-pglite-bin-'));
146+
const gbrainConfigDir = path.join(sandboxHome, '.gbrain');
147+
fs.mkdirSync(gbrainConfigDir, { recursive: true });
148+
const gbrainConfigPath = path.join(gbrainConfigDir, 'config.json');
149+
const claudeLog = makeFakeClaude(fakeBinDir);
150+
const gbrainLog = makeFakeGbrain(fakeBinDir, gbrainConfigPath);
151+
const installLog = makeFakeInstall(fakeBinDir);
152+
153+
const ORIGINAL_CLAUDE_MD = '# Test project\n';
154+
fs.writeFileSync(path.join(sandboxHome, 'CLAUDE.md'), ORIGINAL_CLAUDE_MD);
155+
156+
const askLog: Array<{ question: string; choice: string }> = [];
157+
const binary = resolveClaudeBinary();
158+
159+
const orig = {
160+
home: process.env.HOME,
161+
pathEnv: process.env.PATH,
162+
mcpToken: process.env.GBRAIN_MCP_TOKEN,
163+
};
164+
process.env.HOME = sandboxHome;
165+
process.env.PATH = `${fakeBinDir}:${path.join(path.resolve(import.meta.dir, '..'), 'bin')}:${process.env.PATH ?? '/usr/bin:/bin:/opt/homebrew/bin'}`;
166+
process.env.GBRAIN_MCP_TOKEN = 'gbrain_fake_token_for_test';
167+
168+
try {
169+
const skillPath = path.resolve(
170+
import.meta.dir,
171+
'..',
172+
'setup-gbrain',
173+
'SKILL.md',
174+
);
175+
const result = await runAgentSdkTest({
176+
systemPrompt: { type: 'preset', preset: 'claude_code' },
177+
userPrompt:
178+
`Read the skill file at ${skillPath} and follow Path 4 (Remote MCP). ` +
179+
`Use this MCP URL: ${stubServer.url}. ` +
180+
`The bearer token is already in GBRAIN_MCP_TOKEN. ` +
181+
`At Step 4.5 (the new "Want symbol-aware code search?" question), PICK YES — set up local PGLite for code. ` +
182+
`Then continue through Step 5a (MCP registration) → Step 10 (verdict). ` +
183+
`Do not skip Step 4.5; the test depends on the Yes path being taken.`,
184+
workingDirectory: sandboxHome,
185+
maxTurns: 25,
186+
allowedTools: ['Read', 'Grep', 'Glob', 'Bash', 'Write', 'Edit'],
187+
...(binary ? { pathToClaudeCodeExecutable: binary } : {}),
188+
canUseTool: async (toolName, input) => {
189+
if (toolName === 'AskUserQuestion') {
190+
const qs = input.questions as Array<{
191+
question: string;
192+
options: Array<{ label: string }>;
193+
}>;
194+
const answers: Record<string, string> = {};
195+
for (const q of qs) {
196+
// Heuristics: pick the option that screams "yes/PGLite/code search" for our flow.
197+
const yes =
198+
q.options.find((o) =>
199+
/yes.*local|local.*pglite|code search|opt in/i.test(o.label),
200+
) ??
201+
q.options.find((o) => /remote.*mcp|path 4/i.test(o.label)) ??
202+
q.options[0]!;
203+
answers[q.question] = yes.label;
204+
askLog.push({ question: q.question, choice: yes.label });
205+
}
206+
return {
207+
behavior: 'allow',
208+
updatedInput: { questions: qs, answers },
209+
};
210+
}
211+
return passThroughNonAskUserQuestion(toolName, input);
212+
},
213+
});
214+
215+
const modelOut = JSON.stringify(result);
216+
217+
// Assertion 1: gstack-gbrain-install was invoked (Step 4.5 Yes branch).
218+
const installCalls = fs.existsSync(installLog)
219+
? fs.readFileSync(installLog, 'utf-8')
220+
: '';
221+
expect(installCalls.length).toBeGreaterThan(0);
222+
223+
// Assertion 2: `gbrain init --pglite` was invoked.
224+
const gbrainCalls = fs.existsSync(gbrainLog)
225+
? fs.readFileSync(gbrainLog, 'utf-8')
226+
: '';
227+
expect(gbrainCalls).toMatch(/gbrain init --pglite/);
228+
229+
// Assertion 3: local PGLite config was written.
230+
expect(fs.existsSync(gbrainConfigPath)).toBe(true);
231+
const cfg = JSON.parse(fs.readFileSync(gbrainConfigPath, 'utf-8')) as {
232+
engine: string;
233+
};
234+
expect(cfg.engine).toBe('pglite');
235+
236+
// Assertion 4: claude mcp add --transport http was invoked (remote MCP register).
237+
const claudeCalls = fs.existsSync(claudeLog)
238+
? fs.readFileSync(claudeLog, 'utf-8')
239+
: '';
240+
expect(claudeCalls).toMatch(/mcp add.*--transport http|mcp add.*--header/);
241+
242+
// Assertion 5: token never leaked to CLAUDE.md
243+
const finalClaudeMd = fs.readFileSync(
244+
path.join(sandboxHome, 'CLAUDE.md'),
245+
'utf-8',
246+
);
247+
expect(finalClaudeMd).not.toContain('gbrain_fake_token_for_test');
248+
249+
// Soft assertion: AskUserQuestion was actually called (sanity)
250+
expect(askLog.length).toBeGreaterThan(0);
251+
} finally {
252+
if (orig.home === undefined) delete process.env.HOME;
253+
else process.env.HOME = orig.home;
254+
if (orig.pathEnv === undefined) delete process.env.PATH;
255+
else process.env.PATH = orig.pathEnv;
256+
if (orig.mcpToken === undefined) delete process.env.GBRAIN_MCP_TOKEN;
257+
else process.env.GBRAIN_MCP_TOKEN = orig.mcpToken;
258+
await stubServer.close();
259+
fs.rmSync(sandboxHome, { recursive: true, force: true });
260+
fs.rmSync(fakeBinDir, { recursive: true, force: true });
261+
}
262+
}, 300_000);
263+
});

0 commit comments

Comments
 (0)