6666 cat > "$ROOT/readiness.mjs" <<'NODE'
6767 import { spawnSync } from 'node:child_process';
6868 import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
69- import { join } from 'node:path';
69+ import { basename, join } from 'node:path';
7070
7171 const root = process.env.ROOT;
7272 const payloads = JSON.parse(readFileSync(process.env.TASK_PAYLOADS, 'utf8'));
@@ -112,6 +112,39 @@ jobs:
112112 return null;
113113 }
114114
115+ function payloadsFrom(result) {
116+ const out = [];
117+ for (const text of [result?.stdout, result?.stderr]) {
118+ const parsed = jsonish(text);
119+ if (!parsed) continue;
120+ out.push(parsed);
121+ const content = parsed.content;
122+ if (Array.isArray(content)) {
123+ for (const item of content) {
124+ const nested = jsonish(item?.text);
125+ if (nested) out.push(nested);
126+ }
127+ }
128+ }
129+ return out;
130+ }
131+
132+ function projectFrom(...results) {
133+ for (const result of results) {
134+ for (const obj of payloadsFrom(result)) {
135+ if (typeof obj.project === 'string' && obj.project) return obj.project;
136+ if (Array.isArray(obj.projects)) {
137+ for (const entry of obj.projects) {
138+ if (typeof entry === 'string' && entry) return entry;
139+ if (typeof entry?.project === 'string' && entry.project) return entry.project;
140+ if (typeof entry?.name === 'string' && entry.name) return entry.name;
141+ }
142+ }
143+ }
144+ }
145+ return '';
146+ }
147+
115148 function add(spans, file, start = 1, end = start) {
116149 if (typeof file !== 'string' || !file) return;
117150 const clean = file.replace(/^\/+/, '');
@@ -138,6 +171,10 @@ jobs:
138171 while ((m = re.exec(String(s || ''))) !== null) add(spans, m[1], m[2] || 1, m[2] || 1);
139172 }
140173
174+ function regexLiteral(value) {
175+ return String(value || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
176+ }
177+
141178 const reports = [];
142179 let ready = true;
143180 for (const [i, task] of tasks.entries()) {
@@ -148,11 +185,20 @@ jobs:
148185 const query = queryOf(task.problem_statement);
149186 const firstTerm = query.split(/\s+/)[0] || 'import';
150187 const setup = run(cbm, ['--version'], { env, timeoutMs: 60_000 });
151- const indexRun = run(cbm, ['cli', 'index_repository', JSON.stringify({ repo_path: '.' })], { ...opts, timeoutMs: 45 * 60 * 1000 });
188+ const indexRun = run(cbm, ['cli', 'index_repository', JSON.stringify({ repo_path: task.repo_checkout_path })], { ...opts, timeoutMs: 45 * 60 * 1000 });
152189 const listProjects = firstOk('list_projects', [['cli', 'list_projects'], ['cli', 'list_projects', '{}']], opts);
153- const graphSchema = firstOk('get_graph_schema', [['cli', 'get_graph_schema'], ['cli', 'get_graph_schema', '{}']], opts);
154- const graphSearch = firstOk('search_graph', [['cli', 'search_graph', JSON.stringify({ label: 'Function', limit: 25 })], ['cli', 'search_graph', JSON.stringify({ label: 'Class', limit: 25 })]], opts);
155- const codeSearch = firstOk('search_code', [['cli', 'search_code', JSON.stringify({ pattern: query, limit: 25 })], ['cli', 'search_code', JSON.stringify({ pattern: firstTerm, limit: 25 })], ['cli', 'search_code', JSON.stringify({ pattern: '.', limit: 25 })]], opts);
190+ const project = projectFrom(indexRun, listProjects) || basename(task.repo_checkout_path);
191+ const graphSchema = firstOk('get_graph_schema', [['cli', 'get_graph_schema', JSON.stringify({ project })]], opts);
192+ const graphSearch = firstOk('search_graph', [
193+ ['cli', 'search_graph', JSON.stringify({ project, query, limit: 25 })],
194+ ['cli', 'search_graph', JSON.stringify({ project, label: 'Function', query: firstTerm, limit: 25 })],
195+ ['cli', 'search_graph', JSON.stringify({ project, label: 'Function', name_pattern: `.*${regexLiteral(firstTerm)}.*`, limit: 25 })]
196+ ], opts);
197+ const codeSearch = firstOk('search_code', [
198+ ['cli', 'search_code', JSON.stringify({ project, pattern: query, mode: 'compact', limit: 25 })],
199+ ['cli', 'search_code', JSON.stringify({ project, pattern: firstTerm, mode: 'compact', limit: 25 })],
200+ ['cli', 'search_code', JSON.stringify({ project, pattern: '.', mode: 'compact', limit: 25 })]
201+ ], opts);
156202
157203 const spans = new Map();
158204 for (const r of [listProjects, graphSchema, graphSearch, codeSearch]) for (const text of [r.stdout, r.stderr]) { const parsed = jsonish(text); if (parsed) walk(parsed, spans); textPaths(text, spans); }
@@ -168,7 +214,7 @@ jobs:
168214 if (!existsSync(join(official, 'contextbench', 'evaluate.py'))) run('git', ['clone', '--depth', '1', 'https://github.com/EuniAI/ContextBench.git', official], { timeoutMs: 10 * 60 * 1000 });
169215 const scorePath = join(dir, 'official-score.jsonl');
170216 const evaluator = run('python', ['-m', 'contextbench.evaluate', '--gold', goldPath, '--pred', predictionPath, '--cache', join(dir, 'repo-cache'), '--out', scorePath], { cwd: official, timeoutMs: 20 * 60 * 1000 });
171- const report = { taskId: task.instance_id, repo: task.repo, setupStatus: setup.status, indexStatus: indexRun.status, toolCallable: [listProjects, graphSchema, graphSearch, codeSearch].some((r) => r.status === 0), nonEmptyPrediction: predFiles.length > 0 && Object.keys(predSpans).length > 0, officialEvaluatorStatus: evaluator.status, officialEvaluatorScoreable: evaluator.status === 0 && existsSync(scorePath), costs: { setupDurationMs: setup.durationMs, indexDurationMs: indexRun.durationMs, queryDurationMs: listProjects.durationMs + graphSchema.durationMs + graphSearch.durationMs + codeSearch.durationMs, evaluatorDurationMs: evaluator.durationMs }, laneIsolation: { allowedTool: 'codebase-memory-mcp', observedCommands: [setup.command, indexRun.command, listProjects.command, graphSchema.command, graphSearch.command, codeSearch.command], observedCwds: [setup.cwd, indexRun.cwd, listProjects.cwd, graphSchema.cwd, graphSearch.cwd, codeSearch.cwd], disallowedNativeReadSearchUsedForPrediction: false }, query, predFiles, commands: { setup, indexRun, listProjects, graphSchema, graphSearch, codeSearch, gold, evaluator } };
217+ const report = { taskId: task.instance_id, repo: task.repo, project, setupStatus: setup.status, indexStatus: indexRun.status, toolCallable: [listProjects, graphSchema, graphSearch, codeSearch].some((r) => r.status === 0), nonEmptyPrediction: predFiles.length > 0 && Object.keys(predSpans).length > 0, officialEvaluatorStatus: evaluator.status, officialEvaluatorScoreable: evaluator.status === 0 && existsSync(scorePath), costs: { setupDurationMs: setup.durationMs, indexDurationMs: indexRun.durationMs, queryDurationMs: listProjects.durationMs + graphSchema.durationMs + graphSearch.durationMs + codeSearch.durationMs, evaluatorDurationMs: evaluator.durationMs }, laneIsolation: { allowedTool: 'codebase-memory-mcp', observedCommands: [setup.command, indexRun.command, listProjects.command, graphSchema.command, graphSearch.command, codeSearch.command], observedCwds: [setup.cwd, indexRun.cwd, listProjects.cwd, graphSchema.cwd, graphSearch.cwd, codeSearch.cwd], disallowedNativeReadSearchUsedForPrediction: false }, query, predFiles, commands: { setup, indexRun, listProjects, graphSchema, graphSearch, codeSearch, gold, evaluator } };
172218 writeFileSync(join(dir, 'readiness-report.json'), JSON.stringify(report, null, 2));
173219 reports.push(report);
174220 if (!(report.setupStatus === 0 && report.indexStatus === 0 && report.toolCallable && report.nonEmptyPrediction && report.officialEvaluatorScoreable)) ready = false;
0 commit comments