|
| 1 | +// E2E: /setup-gbrain Path 4 with Step 4.5 "Yes" — local PGLite for code search. |
| 2 | +// |
| 3 | +// Drives the skill against a stub HTTP MCP server (200 OK on tools/list). |
| 4 | +// Auto-answers AskUserQuestion to pick: |
| 5 | +// - Path 4 at Step 2 (Remote gbrain MCP) |
| 6 | +// - "Yes, set up local PGLite for code" at Step 4.5 |
| 7 | +// |
| 8 | +// Asserts that the model: |
| 9 | +// 1. ran the verify helper successfully (got past Step 4c) |
| 10 | +// 2. invoked gstack-gbrain-install (Step 4.5 Yes branch) |
| 11 | +// 3. invoked `gbrain init --pglite --json` (also Step 4.5 Yes branch) |
| 12 | +// 4. registered the remote MCP via claude mcp add --transport http |
| 13 | +// 5. wrote a "Code search ..... OK local-pglite" row to the Step 10 verdict |
| 14 | +// |
| 15 | +// Periodic-tier (codex #12: AgentSDK harness is non-deterministic; gate-tier |
| 16 | +// coverage of the split-engine behavior lives in the deterministic unit |
| 17 | +// tests at gbrain-local-status.test.ts, gbrain-sync-skip.test.ts, etc). |
| 18 | +// |
| 19 | +// Cost: ~$0.50-$1.00 per run. Periodic-tier (EVALS=1 EVALS_TIER=periodic). |
| 20 | + |
| 21 | +import { describe, test, expect } from 'bun:test'; |
| 22 | +import * as fs from 'fs'; |
| 23 | +import * as os from 'os'; |
| 24 | +import * as path from 'path'; |
| 25 | +import * as http from 'http'; |
| 26 | +import { |
| 27 | + runAgentSdkTest, |
| 28 | + passThroughNonAskUserQuestion, |
| 29 | + resolveClaudeBinary, |
| 30 | +} from './helpers/agent-sdk-runner'; |
| 31 | + |
| 32 | +const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'periodic'; |
| 33 | +const describeE2E = shouldRun ? describe : describe.skip; |
| 34 | + |
| 35 | +/** |
| 36 | + * Minimal stub MCP server that returns success on initialize / tools/list. |
| 37 | + * Verify helper calls /tools/list with a Bearer header and inspects the body. |
| 38 | + */ |
| 39 | +function startStubMcp(): Promise<{ url: string; close: () => Promise<void> }> { |
| 40 | + return new Promise((resolve) => { |
| 41 | + const server = http.createServer((req, res) => { |
| 42 | + let body = ''; |
| 43 | + req.on('data', (c) => (body += c)); |
| 44 | + req.on('end', () => { |
| 45 | + res.statusCode = 200; |
| 46 | + res.setHeader('Content-Type', 'text/event-stream'); |
| 47 | + // Try to be useful: respond with a fake initialize + tools/list payload. |
| 48 | + let payload: unknown = { jsonrpc: '2.0', id: 1, result: { tools: [] } }; |
| 49 | + try { |
| 50 | + const req = JSON.parse(body); |
| 51 | + if (req.method === 'initialize') { |
| 52 | + payload = { |
| 53 | + jsonrpc: '2.0', |
| 54 | + id: req.id, |
| 55 | + result: { |
| 56 | + protocolVersion: '2024-11-05', |
| 57 | + capabilities: { tools: {} }, |
| 58 | + serverInfo: { name: 'gbrain', version: '0.32.3.0' }, |
| 59 | + }, |
| 60 | + }; |
| 61 | + } |
| 62 | + } catch { |
| 63 | + // ignore parse failure; default payload |
| 64 | + } |
| 65 | + res.end(`event: message\ndata: ${JSON.stringify(payload)}\n\n`); |
| 66 | + }); |
| 67 | + }); |
| 68 | + server.listen(0, '127.0.0.1', () => { |
| 69 | + const addr = server.address(); |
| 70 | + if (!addr || typeof addr === 'string') throw new Error('no address'); |
| 71 | + resolve({ |
| 72 | + url: `http://127.0.0.1:${addr.port}/mcp`, |
| 73 | + close: () => new Promise((r) => server.close(() => r())), |
| 74 | + }); |
| 75 | + }); |
| 76 | + }); |
| 77 | +} |
| 78 | + |
| 79 | +/** |
| 80 | + * Fake gbrain CLI: |
| 81 | + * - --version → echoes a version |
| 82 | + * - init --pglite --json → writes a pglite config, exits 0 |
| 83 | + * - everything else → exits 0 quietly |
| 84 | + * |
| 85 | + * Logs every invocation so we can assert init was called. |
| 86 | + */ |
| 87 | +function makeFakeGbrain(binDir: string, gbrainConfigPath: string): string { |
| 88 | + const callLog = path.join(binDir, 'gbrain-calls.log'); |
| 89 | + const script = `#!/bin/bash |
| 90 | +echo "gbrain $@" >> "${callLog}" |
| 91 | +case "$1 $2" in |
| 92 | + "--version "*) echo "gbrain 0.33.1.0"; exit 0 ;; |
| 93 | + "init --pglite") cat > "${gbrainConfigPath}" <<JSON |
| 94 | +{"engine":"pglite","database_url":"pglite:///fake"} |
| 95 | +JSON |
| 96 | + echo '{"status":"ok","engine":"pglite"}' |
| 97 | + exit 0 ;; |
| 98 | +esac |
| 99 | +exit 0 |
| 100 | +`; |
| 101 | + fs.writeFileSync(path.join(binDir, 'gbrain'), script, { mode: 0o755 }); |
| 102 | + return callLog; |
| 103 | +} |
| 104 | + |
| 105 | +/** |
| 106 | + * Fake `claude` CLI for mcp add/remove/get/list. Logs every call so we can |
| 107 | + * assert remote MCP registration happened. |
| 108 | + */ |
| 109 | +function makeFakeClaude(binDir: string): string { |
| 110 | + const callLog = path.join(binDir, 'claude-calls.log'); |
| 111 | + const script = `#!/bin/bash |
| 112 | +echo "claude $@" >> "${callLog}" |
| 113 | +case "$1 $2" in |
| 114 | + "mcp add") exit 0 ;; |
| 115 | + "mcp list") echo "gbrain: http://stub/mcp (HTTP) — connected" ; exit 0 ;; |
| 116 | + "mcp remove") exit 0 ;; |
| 117 | + "mcp get") echo '{"type":"http","url":"http://stub/mcp"}'; exit 0 ;; |
| 118 | +esac |
| 119 | +exit 0 |
| 120 | +`; |
| 121 | + fs.writeFileSync(path.join(binDir, 'claude'), script, { mode: 0o755 }); |
| 122 | + return callLog; |
| 123 | +} |
| 124 | + |
| 125 | +/** |
| 126 | + * Fake gstack-gbrain-install so we don't actually clone the gbrain repo + |
| 127 | + * bun-link. The test only cares that the skill INVOKED it on the Yes branch. |
| 128 | + */ |
| 129 | +function makeFakeInstall(binDir: string): string { |
| 130 | + const callLog = path.join(binDir, 'install-calls.log'); |
| 131 | + const script = `#!/bin/bash |
| 132 | +echo "install $@" >> "${callLog}" |
| 133 | +exit 0 |
| 134 | +`; |
| 135 | + fs.writeFileSync(path.join(binDir, 'gstack-gbrain-install'), script, { |
| 136 | + mode: 0o755, |
| 137 | + }); |
| 138 | + return callLog; |
| 139 | +} |
| 140 | + |
| 141 | +describeE2E('/setup-gbrain Path 4 + Step 4.5 Yes → local PGLite for code', () => { |
| 142 | + test('opt-in flow invokes install + gbrain init + remote MCP register', async () => { |
| 143 | + const stubServer = await startStubMcp(); |
| 144 | + const sandboxHome = fs.mkdtempSync(path.join(os.tmpdir(), 'path4-pglite-')); |
| 145 | + const fakeBinDir = fs.mkdtempSync(path.join(os.tmpdir(), 'path4-pglite-bin-')); |
| 146 | + const gbrainConfigDir = path.join(sandboxHome, '.gbrain'); |
| 147 | + fs.mkdirSync(gbrainConfigDir, { recursive: true }); |
| 148 | + const gbrainConfigPath = path.join(gbrainConfigDir, 'config.json'); |
| 149 | + const claudeLog = makeFakeClaude(fakeBinDir); |
| 150 | + const gbrainLog = makeFakeGbrain(fakeBinDir, gbrainConfigPath); |
| 151 | + const installLog = makeFakeInstall(fakeBinDir); |
| 152 | + |
| 153 | + const ORIGINAL_CLAUDE_MD = '# Test project\n'; |
| 154 | + fs.writeFileSync(path.join(sandboxHome, 'CLAUDE.md'), ORIGINAL_CLAUDE_MD); |
| 155 | + |
| 156 | + const askLog: Array<{ question: string; choice: string }> = []; |
| 157 | + const binary = resolveClaudeBinary(); |
| 158 | + |
| 159 | + const orig = { |
| 160 | + home: process.env.HOME, |
| 161 | + pathEnv: process.env.PATH, |
| 162 | + mcpToken: process.env.GBRAIN_MCP_TOKEN, |
| 163 | + }; |
| 164 | + process.env.HOME = sandboxHome; |
| 165 | + process.env.PATH = `${fakeBinDir}:${path.join(path.resolve(import.meta.dir, '..'), 'bin')}:${process.env.PATH ?? '/usr/bin:/bin:/opt/homebrew/bin'}`; |
| 166 | + process.env.GBRAIN_MCP_TOKEN = 'gbrain_fake_token_for_test'; |
| 167 | + |
| 168 | + try { |
| 169 | + const skillPath = path.resolve( |
| 170 | + import.meta.dir, |
| 171 | + '..', |
| 172 | + 'setup-gbrain', |
| 173 | + 'SKILL.md', |
| 174 | + ); |
| 175 | + const result = await runAgentSdkTest({ |
| 176 | + systemPrompt: { type: 'preset', preset: 'claude_code' }, |
| 177 | + userPrompt: |
| 178 | + `Read the skill file at ${skillPath} and follow Path 4 (Remote MCP). ` + |
| 179 | + `Use this MCP URL: ${stubServer.url}. ` + |
| 180 | + `The bearer token is already in GBRAIN_MCP_TOKEN. ` + |
| 181 | + `At Step 4.5 (the new "Want symbol-aware code search?" question), PICK YES — set up local PGLite for code. ` + |
| 182 | + `Then continue through Step 5a (MCP registration) → Step 10 (verdict). ` + |
| 183 | + `Do not skip Step 4.5; the test depends on the Yes path being taken.`, |
| 184 | + workingDirectory: sandboxHome, |
| 185 | + maxTurns: 25, |
| 186 | + allowedTools: ['Read', 'Grep', 'Glob', 'Bash', 'Write', 'Edit'], |
| 187 | + ...(binary ? { pathToClaudeCodeExecutable: binary } : {}), |
| 188 | + canUseTool: async (toolName, input) => { |
| 189 | + if (toolName === 'AskUserQuestion') { |
| 190 | + const qs = input.questions as Array<{ |
| 191 | + question: string; |
| 192 | + options: Array<{ label: string }>; |
| 193 | + }>; |
| 194 | + const answers: Record<string, string> = {}; |
| 195 | + for (const q of qs) { |
| 196 | + // Heuristics: pick the option that screams "yes/PGLite/code search" for our flow. |
| 197 | + const yes = |
| 198 | + q.options.find((o) => |
| 199 | + /yes.*local|local.*pglite|code search|opt in/i.test(o.label), |
| 200 | + ) ?? |
| 201 | + q.options.find((o) => /remote.*mcp|path 4/i.test(o.label)) ?? |
| 202 | + q.options[0]!; |
| 203 | + answers[q.question] = yes.label; |
| 204 | + askLog.push({ question: q.question, choice: yes.label }); |
| 205 | + } |
| 206 | + return { |
| 207 | + behavior: 'allow', |
| 208 | + updatedInput: { questions: qs, answers }, |
| 209 | + }; |
| 210 | + } |
| 211 | + return passThroughNonAskUserQuestion(toolName, input); |
| 212 | + }, |
| 213 | + }); |
| 214 | + |
| 215 | + const modelOut = JSON.stringify(result); |
| 216 | + |
| 217 | + // Assertion 1: gstack-gbrain-install was invoked (Step 4.5 Yes branch). |
| 218 | + const installCalls = fs.existsSync(installLog) |
| 219 | + ? fs.readFileSync(installLog, 'utf-8') |
| 220 | + : ''; |
| 221 | + expect(installCalls.length).toBeGreaterThan(0); |
| 222 | + |
| 223 | + // Assertion 2: `gbrain init --pglite` was invoked. |
| 224 | + const gbrainCalls = fs.existsSync(gbrainLog) |
| 225 | + ? fs.readFileSync(gbrainLog, 'utf-8') |
| 226 | + : ''; |
| 227 | + expect(gbrainCalls).toMatch(/gbrain init --pglite/); |
| 228 | + |
| 229 | + // Assertion 3: local PGLite config was written. |
| 230 | + expect(fs.existsSync(gbrainConfigPath)).toBe(true); |
| 231 | + const cfg = JSON.parse(fs.readFileSync(gbrainConfigPath, 'utf-8')) as { |
| 232 | + engine: string; |
| 233 | + }; |
| 234 | + expect(cfg.engine).toBe('pglite'); |
| 235 | + |
| 236 | + // Assertion 4: claude mcp add --transport http was invoked (remote MCP register). |
| 237 | + const claudeCalls = fs.existsSync(claudeLog) |
| 238 | + ? fs.readFileSync(claudeLog, 'utf-8') |
| 239 | + : ''; |
| 240 | + expect(claudeCalls).toMatch(/mcp add.*--transport http|mcp add.*--header/); |
| 241 | + |
| 242 | + // Assertion 5: token never leaked to CLAUDE.md |
| 243 | + const finalClaudeMd = fs.readFileSync( |
| 244 | + path.join(sandboxHome, 'CLAUDE.md'), |
| 245 | + 'utf-8', |
| 246 | + ); |
| 247 | + expect(finalClaudeMd).not.toContain('gbrain_fake_token_for_test'); |
| 248 | + |
| 249 | + // Soft assertion: AskUserQuestion was actually called (sanity) |
| 250 | + expect(askLog.length).toBeGreaterThan(0); |
| 251 | + } finally { |
| 252 | + if (orig.home === undefined) delete process.env.HOME; |
| 253 | + else process.env.HOME = orig.home; |
| 254 | + if (orig.pathEnv === undefined) delete process.env.PATH; |
| 255 | + else process.env.PATH = orig.pathEnv; |
| 256 | + if (orig.mcpToken === undefined) delete process.env.GBRAIN_MCP_TOKEN; |
| 257 | + else process.env.GBRAIN_MCP_TOKEN = orig.mcpToken; |
| 258 | + await stubServer.close(); |
| 259 | + fs.rmSync(sandboxHome, { recursive: true, force: true }); |
| 260 | + fs.rmSync(fakeBinDir, { recursive: true, force: true }); |
| 261 | + } |
| 262 | + }, 300_000); |
| 263 | +}); |
0 commit comments