@@ -211,8 +211,26 @@ jobs:
211211 return results.flatMap((result) => Array.isArray(result?.attempts) && result.attempts.length ? result.attempts : [result]);
212212 }
213213
214+ function rowReady(report) {
215+ return report.setupStatus === 0 && report.indexStatus === 0 && report.toolCallable && report.nonEmptyPrediction && report.officialEvaluatorScoreable;
216+ }
217+
218+ function shortFailure(report) {
219+ return {
220+ taskId: report.taskId,
221+ repo: report.repo,
222+ setupStatus: report.setupStatus,
223+ indexStatus: report.indexStatus,
224+ indexSignal: report.indexSignal,
225+ indexError: report.indexError,
226+ toolCallable: report.toolCallable,
227+ nonEmptyPrediction: report.nonEmptyPrediction,
228+ officialEvaluatorScoreable: report.officialEvaluatorScoreable,
229+ indexStderrExcerpt: report.indexStderrExcerpt
230+ };
231+ }
232+
214233 const reports = [];
215- let ready = true;
216234 for (const [i, task] of tasks.entries()) {
217235 const dir = join(outRoot, `${i + 1}-${task.instance_id}`);
218236 mkdirSync(dir, { recursive: true });
@@ -253,15 +271,16 @@ jobs:
253271 const scorePath = join(dir, 'official-score.jsonl');
254272 const evaluator = run('python', ['-m', 'contextbench.evaluate', '--gold', goldPath, '--pred', predictionPath, '--cache', join(dir, 'repo-cache'), '--out', scorePath], { cwd: official, timeoutMs: 20 * 60 * 1000 });
255273 const scoreText = existsSync(scorePath) ? readFileSync(scorePath, 'utf8').trim() : '';
256- const report = { taskId: task.instance_id, repo: task.repo, project, setupStatus: setup.status, indexStatus: indexRun.status, toolCallable: [graphSearch, codeSearch].some((r) => r.status === 0), nonEmptyPrediction: predFiles.length > 0 && Object.keys(predSpans).length > 0, officialEvaluatorStatus: evaluator.status, officialEvaluatorScoreable: evaluator.status === 0 && scoreText.length > 0, costs: { setupDurationMs: setup.durationMs, indexDurationMs: indexRun.durationMs, queryDurationMs: listProjects.durationMs + graphSchema.durationMs + graphSearch.durationMs + codeSearch.durationMs, evaluatorDurationMs: evaluator.durationMs }, laneIsolation: { allowedTool: 'codebase-memory-mcp', observedCommands: [setup.command, indexRun.command, listProjects.command, graphSchema.command, graphSearch.command, codeSearch.command], observedCwds: [setup.cwd, indexRun.cwd, listProjects.cwd, graphSchema.cwd, graphSearch.cwd, codeSearch.cwd], disallowedNativeReadSearchUsedForPrediction: false }, query, predFiles, selectedBecause: { graphSearch: graphSearch.selectedBecause, codeSearch: codeSearch.selectedBecause }, commands: { setup, indexRun, listProjects, graphSchema, graphSearch, codeSearch, gold, evaluator } };
274+ const report = { taskId: task.instance_id, repo: task.repo, project, setupStatus: setup.status, indexStatus: indexRun.status, indexSignal: indexRun.signal, indexError: indexRun.error, indexStderrExcerpt: indexRun.stderr.slice(0, 1000), toolCallable: [graphSearch, codeSearch].some((r) => r.status === 0), nonEmptyPrediction: predFiles.length > 0 && Object.keys(predSpans).length > 0, officialEvaluatorStatus: evaluator.status, officialEvaluatorScoreable: evaluator.status === 0 && scoreText.length > 0, costs: { setupDurationMs: setup.durationMs, indexDurationMs: indexRun.durationMs, queryDurationMs: listProjects.durationMs + graphSchema.durationMs + graphSearch.durationMs + codeSearch.durationMs, evaluatorDurationMs: evaluator.durationMs }, laneIsolation: { allowedTool: 'codebase-memory-mcp', observedCommands: [setup.command, indexRun.command, listProjects.command, graphSchema.command, graphSearch.command, codeSearch.command], observedCwds: [setup.cwd, indexRun.cwd, listProjects.cwd, graphSchema.cwd, graphSearch.cwd, codeSearch.cwd], disallowedNativeReadSearchUsedForPrediction: false }, query, predFiles, selectedBecause: { graphSearch: graphSearch.selectedBecause, codeSearch: codeSearch.selectedBecause }, commands: { setup, indexRun, listProjects, graphSchema, graphSearch, codeSearch, gold, evaluator } };
257275 writeFileSync(join(dir, 'readiness-report.json'), JSON.stringify(report, null, 2));
258276 reports.push(report);
259- if (!(report.setupStatus === 0 && report.indexStatus === 0 && report.toolCallable && report.nonEmptyPrediction && report.officialEvaluatorScoreable)) ready = false;
260277 }
261- const summary = { createdAt: new Date().toISOString(), lane: 'codebase-memory-mcp', ready, attemptedRows: reports.length, scoreableRows: reports.filter((r) => r.officialEvaluatorScoreable).length, nonEmptyPredictionRows: reports.filter((r) => r.nonEmptyPrediction).length, setupIndexCostReportedSeparately: true, reports };
278+ const functionalReports = reports.filter(rowReady);
279+ const blockers = reports.filter((r) => !rowReady(r)).map(shortFailure);
280+ const summary = { createdAt: new Date().toISOString(), lane: 'codebase-memory-mcp', ready: functionalReports.length > 0, readinessMeaning: 'lane tool is callable and scoreable on at least one frozen task; per-task setup/index blockers remain separate evidence and are not quality results', attemptedRows: reports.length, functionalRows: functionalReports.length, blockerRows: blockers.length, scoreableRows: reports.filter((r) => r.officialEvaluatorScoreable).length, nonEmptyPredictionRows: reports.filter((r) => r.nonEmptyPrediction).length, setupIndexCostReportedSeparately: true, blockers, reports };
262281 writeFileSync(join(outRoot, 'lane-readiness-codebase-memory-first3.json'), JSON.stringify(summary, null, 2));
263- console.log(JSON.stringify({ ready: summary.ready, attemptedRows: summary.attemptedRows, scoreableRows: summary.scoreableRows, nonEmptyPredictionRows: summary.nonEmptyPredictionRows, reports: summary.reports.map((r) => ({ taskId: r.taskId, setupStatus: r.setupStatus, indexStatus: r.indexStatus, toolCallable: r.toolCallable, nonEmptyPrediction: r.nonEmptyPrediction, officialEvaluatorScoreable: r.officialEvaluatorScoreable, predFiles: r.predFiles.length, selectedBecause: r.selectedBecause, costs: r.costs })) }, null, 2));
264- if (!ready) process.exitCode = 1;
282+ console.log(JSON.stringify({ ready: summary.ready, readinessMeaning: summary.readinessMeaning, attemptedRows: summary.attemptedRows, functionalRows: summary.functionalRows, blockerRows: summary.blockerRows, scoreableRows: summary.scoreableRows, nonEmptyPredictionRows: summary.nonEmptyPredictionRows, blockers: summary.blockers, reports: summary.reports.map((r) => ({ taskId: r.taskId, setupStatus: r.setupStatus, indexStatus: r.indexStatus, indexSignal: r.indexSignal, indexError: r.indexError , toolCallable: r.toolCallable, nonEmptyPrediction: r.nonEmptyPrediction, officialEvaluatorScoreable: r.officialEvaluatorScoreable, predFiles: r.predFiles.length, selectedBecause: r.selectedBecause, costs: r.costs })) }, null, 2));
283+ if (!summary. ready) process.exitCode = 1;
265284 NODE
266285 node "$ROOT/readiness.mjs"
267286 - name : Upload readiness artifacts
0 commit comments