Skip to content

Commit f75be38

Browse files
committed
Fix codebase-memory readiness CI evaluator setup
1 parent ec2baa1 commit f75be38

1 file changed

Lines changed: 12 additions & 5 deletions

File tree

.github/workflows/contextbench-ci-recovery.yml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ jobs:
4646
- name: Install repo dependencies
4747
run: pnpm install --frozen-lockfile
4848

49+
- name: Install official evaluator dependencies
50+
run: python -m pip install tree-sitter tree-sitter-languages
51+
4952
- name: Validate frozen ContextBench fixtures
5053
run: node scripts/contextbench-runner.mjs --validate-fixtures
5154

@@ -156,11 +159,11 @@ jobs:
156159
};
157160
const setup = run(cbmBin, ['--version'], { env, timeoutMs: 60_000 });
158161
const indexRun = run(cbmBin, ['cli', 'index_repository', JSON.stringify({ repo_path: task.repo_checkout_path })], { env, timeoutMs: 45 * 60 * 1000 });
159-
const listProjects = run(cbmBin, ['cli', '--raw', 'list_projects', '{}'], { env, timeoutMs: 120_000 });
160-
const graphSchema = run(cbmBin, ['cli', '--raw', 'get_graph_schema', '{}'], { env, timeoutMs: 120_000 });
161-
const graphSearch = run(cbmBin, ['cli', '--raw', 'search_graph', JSON.stringify({ label: 'Function', limit: 25 })], { env, timeoutMs: 120_000 });
162+
const listProjects = run(cbmBin, ['cli', 'list_projects'], { env, timeoutMs: 120_000 });
163+
const graphSchema = run(cbmBin, ['cli', 'get_graph_schema'], { env, timeoutMs: 120_000 });
164+
const graphSearch = run(cbmBin, ['cli', 'search_graph', JSON.stringify({ label: 'Function', limit: 25 })], { env, timeoutMs: 120_000 });
162165
const query = makeQuery(task.problem_statement);
163-
const codeSearch = run(cbmBin, ['cli', '--raw', 'search_code', JSON.stringify({ query, output: 'compact', limit: 25 })], { env, timeoutMs: 120_000 });
166+
const codeSearch = run(cbmBin, ['cli', 'search_code', JSON.stringify({ query, output: 'compact', limit: 25 })], { env, timeoutMs: 120_000 });
164167
165168
const parsed = [codeSearch.stdout, graphSearch.stdout, graphSchema.stdout, listProjects.stdout]
166169
.map(parseJsonish)
@@ -186,9 +189,13 @@ jobs:
186189
writeFileSync(join(runDir, 'index.stdout.log'), indexRun.stdout);
187190
writeFileSync(join(runDir, 'index.stderr.log'), indexRun.stderr);
188191
writeFileSync(join(runDir, 'list-projects.stdout.log'), listProjects.stdout);
192+
writeFileSync(join(runDir, 'list-projects.stderr.log'), listProjects.stderr);
189193
writeFileSync(join(runDir, 'graph-schema.stdout.log'), graphSchema.stdout);
194+
writeFileSync(join(runDir, 'graph-schema.stderr.log'), graphSchema.stderr);
190195
writeFileSync(join(runDir, 'graph-search.stdout.log'), graphSearch.stdout);
196+
writeFileSync(join(runDir, 'graph-search.stderr.log'), graphSearch.stderr);
191197
writeFileSync(join(runDir, 'code-search.stdout.log'), codeSearch.stdout);
198+
writeFileSync(join(runDir, 'code-search.stderr.log'), codeSearch.stderr);
192199
193200
const goldPath = join(runDir, 'gold.json');
194201
const gold = run('node', ['scripts/contextbench-select-slice.mjs', '--write-gold', '--task-id', task.instance_id, '--out', goldPath, '--payloads', payloadPath], { timeoutMs: 10 * 60 * 1000 });
@@ -204,7 +211,7 @@ jobs:
204211
allowedTool: 'codebase-memory-mcp',
205212
observedCommands: [setup.command, indexRun.command, listProjects.command, graphSchema.command, graphSearch.command, codeSearch.command],
206213
disallowedNativeReadSearchUsedForPrediction: false,
207-
note: 'Prediction spans are derived only from codebase-memory-mcp CLI JSON/stdout outputs.'
214+
note: 'Prediction spans are derived only from codebase-memory-mcp CLI outputs.'
208215
};
209216
const report = {
210217
taskId: task.instance_id,

0 commit comments

Comments
 (0)