Skip to content

Commit ed46c22

Browse files
committed
Fix codebase-memory readiness project queries
1 parent e924a09 commit ed46c22

1 file changed

Lines changed: 52 additions & 6 deletions

File tree

.github/workflows/contextbench-cbm-readiness-retry.yml

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
cat > "$ROOT/readiness.mjs" <<'NODE'
6767
import { spawnSync } from 'node:child_process';
6868
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
69-
import { join } from 'node:path';
69+
import { basename, join } from 'node:path';
7070
7171
const root = process.env.ROOT;
7272
const payloads = JSON.parse(readFileSync(process.env.TASK_PAYLOADS, 'utf8'));
@@ -112,6 +112,39 @@ jobs:
112112
return null;
113113
}
114114
115+
function payloadsFrom(result) {
116+
const out = [];
117+
for (const text of [result?.stdout, result?.stderr]) {
118+
const parsed = jsonish(text);
119+
if (!parsed) continue;
120+
out.push(parsed);
121+
const content = parsed.content;
122+
if (Array.isArray(content)) {
123+
for (const item of content) {
124+
const nested = jsonish(item?.text);
125+
if (nested) out.push(nested);
126+
}
127+
}
128+
}
129+
return out;
130+
}
131+
132+
function projectFrom(...results) {
133+
for (const result of results) {
134+
for (const obj of payloadsFrom(result)) {
135+
if (typeof obj.project === 'string' && obj.project) return obj.project;
136+
if (Array.isArray(obj.projects)) {
137+
for (const entry of obj.projects) {
138+
if (typeof entry === 'string' && entry) return entry;
139+
if (typeof entry?.project === 'string' && entry.project) return entry.project;
140+
if (typeof entry?.name === 'string' && entry.name) return entry.name;
141+
}
142+
}
143+
}
144+
}
145+
return '';
146+
}
147+
115148
function add(spans, file, start = 1, end = start) {
116149
if (typeof file !== 'string' || !file) return;
117150
const clean = file.replace(/^\/+/, '');
@@ -138,6 +171,10 @@ jobs:
138171
while ((m = re.exec(String(s || ''))) !== null) add(spans, m[1], m[2] || 1, m[2] || 1);
139172
}
140173
174+
function regexLiteral(value) {
175+
return String(value || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
176+
}
177+
141178
const reports = [];
142179
let ready = true;
143180
for (const [i, task] of tasks.entries()) {
@@ -148,11 +185,20 @@ jobs:
148185
const query = queryOf(task.problem_statement);
149186
const firstTerm = query.split(/\s+/)[0] || 'import';
150187
const setup = run(cbm, ['--version'], { env, timeoutMs: 60_000 });
151-
const indexRun = run(cbm, ['cli', 'index_repository', JSON.stringify({ repo_path: '.' })], { ...opts, timeoutMs: 45 * 60 * 1000 });
188+
const indexRun = run(cbm, ['cli', 'index_repository', JSON.stringify({ repo_path: task.repo_checkout_path })], { ...opts, timeoutMs: 45 * 60 * 1000 });
152189
const listProjects = firstOk('list_projects', [['cli', 'list_projects'], ['cli', 'list_projects', '{}']], opts);
153-
const graphSchema = firstOk('get_graph_schema', [['cli', 'get_graph_schema'], ['cli', 'get_graph_schema', '{}']], opts);
154-
const graphSearch = firstOk('search_graph', [['cli', 'search_graph', JSON.stringify({ label: 'Function', limit: 25 })], ['cli', 'search_graph', JSON.stringify({ label: 'Class', limit: 25 })]], opts);
155-
const codeSearch = firstOk('search_code', [['cli', 'search_code', JSON.stringify({ pattern: query, limit: 25 })], ['cli', 'search_code', JSON.stringify({ pattern: firstTerm, limit: 25 })], ['cli', 'search_code', JSON.stringify({ pattern: '.', limit: 25 })]], opts);
190+
const project = projectFrom(indexRun, listProjects) || basename(task.repo_checkout_path);
191+
const graphSchema = firstOk('get_graph_schema', [['cli', 'get_graph_schema', JSON.stringify({ project })]], opts);
192+
const graphSearch = firstOk('search_graph', [
193+
['cli', 'search_graph', JSON.stringify({ project, query, limit: 25 })],
194+
['cli', 'search_graph', JSON.stringify({ project, label: 'Function', query: firstTerm, limit: 25 })],
195+
['cli', 'search_graph', JSON.stringify({ project, label: 'Function', name_pattern: `.*${regexLiteral(firstTerm)}.*`, limit: 25 })]
196+
], opts);
197+
const codeSearch = firstOk('search_code', [
198+
['cli', 'search_code', JSON.stringify({ project, pattern: query, mode: 'compact', limit: 25 })],
199+
['cli', 'search_code', JSON.stringify({ project, pattern: firstTerm, mode: 'compact', limit: 25 })],
200+
['cli', 'search_code', JSON.stringify({ project, pattern: '.', mode: 'compact', limit: 25 })]
201+
], opts);
156202
157203
const spans = new Map();
158204
for (const r of [listProjects, graphSchema, graphSearch, codeSearch]) for (const text of [r.stdout, r.stderr]) { const parsed = jsonish(text); if (parsed) walk(parsed, spans); textPaths(text, spans); }
@@ -168,7 +214,7 @@ jobs:
168214
if (!existsSync(join(official, 'contextbench', 'evaluate.py'))) run('git', ['clone', '--depth', '1', 'https://github.com/EuniAI/ContextBench.git', official], { timeoutMs: 10 * 60 * 1000 });
169215
const scorePath = join(dir, 'official-score.jsonl');
170216
const evaluator = run('python', ['-m', 'contextbench.evaluate', '--gold', goldPath, '--pred', predictionPath, '--cache', join(dir, 'repo-cache'), '--out', scorePath], { cwd: official, timeoutMs: 20 * 60 * 1000 });
171-
const report = { taskId: task.instance_id, repo: task.repo, setupStatus: setup.status, indexStatus: indexRun.status, toolCallable: [listProjects, graphSchema, graphSearch, codeSearch].some((r) => r.status === 0), nonEmptyPrediction: predFiles.length > 0 && Object.keys(predSpans).length > 0, officialEvaluatorStatus: evaluator.status, officialEvaluatorScoreable: evaluator.status === 0 && existsSync(scorePath), costs: { setupDurationMs: setup.durationMs, indexDurationMs: indexRun.durationMs, queryDurationMs: listProjects.durationMs + graphSchema.durationMs + graphSearch.durationMs + codeSearch.durationMs, evaluatorDurationMs: evaluator.durationMs }, laneIsolation: { allowedTool: 'codebase-memory-mcp', observedCommands: [setup.command, indexRun.command, listProjects.command, graphSchema.command, graphSearch.command, codeSearch.command], observedCwds: [setup.cwd, indexRun.cwd, listProjects.cwd, graphSchema.cwd, graphSearch.cwd, codeSearch.cwd], disallowedNativeReadSearchUsedForPrediction: false }, query, predFiles, commands: { setup, indexRun, listProjects, graphSchema, graphSearch, codeSearch, gold, evaluator } };
217+
const report = { taskId: task.instance_id, repo: task.repo, project, setupStatus: setup.status, indexStatus: indexRun.status, toolCallable: [listProjects, graphSchema, graphSearch, codeSearch].some((r) => r.status === 0), nonEmptyPrediction: predFiles.length > 0 && Object.keys(predSpans).length > 0, officialEvaluatorStatus: evaluator.status, officialEvaluatorScoreable: evaluator.status === 0 && existsSync(scorePath), costs: { setupDurationMs: setup.durationMs, indexDurationMs: indexRun.durationMs, queryDurationMs: listProjects.durationMs + graphSchema.durationMs + graphSearch.durationMs + codeSearch.durationMs, evaluatorDurationMs: evaluator.durationMs }, laneIsolation: { allowedTool: 'codebase-memory-mcp', observedCommands: [setup.command, indexRun.command, listProjects.command, graphSchema.command, graphSearch.command, codeSearch.command], observedCwds: [setup.cwd, indexRun.cwd, listProjects.cwd, graphSchema.cwd, graphSearch.cwd, codeSearch.cwd], disallowedNativeReadSearchUsedForPrediction: false }, query, predFiles, commands: { setup, indexRun, listProjects, graphSchema, graphSearch, codeSearch, gold, evaluator } };
172218
writeFileSync(join(dir, 'readiness-report.json'), JSON.stringify(report, null, 2));
173219
reports.push(report);
174220
if (!(report.setupStatus === 0 && report.indexStatus === 0 && report.toolCallable && report.nonEmptyPrediction && report.officialEvaluatorScoreable)) ready = false;

0 commit comments

Comments
 (0)