Skip to content

Commit b1c3f8d

Browse files
committed
Harden five-lane ContextBench scorer gates
1 parent 3ecc4d5 commit b1c3f8d

1 file changed

Lines changed: 19 additions & 3 deletions

File tree

scripts/contextbench-score-five-lane-selections.mjs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ const targetTaskId = process.env.TARGET_TASK_ID || 'SWE-Bench-Pro__go__maintenan
66
const root = process.env.ROOT || '/tmp/contextbench-five-lane-score';
77
const officialContextBench = process.env.OFFICIAL_CONTEXTBENCH;
88
const selectionsPath = process.env.SELECTIONS_PATH || 'scripts/contextbench-five-lane-selections.json';
9+
const requiredLanes = ['raw-native', 'codebase-context', 'codebase-memory-mcp', 'grepai', 'codegraphcontext'];
910
const payloads = JSON.parse(readFileSync(process.env.TASK_PAYLOADS, 'utf8'));
1011
const task = payloads.tasks.find((candidate) => candidate.instance_id === targetTaskId);
1112
if (!task) throw new Error(`target task ${targetTaskId} missing from payloads`);
@@ -15,6 +16,20 @@ const selections = JSON.parse(readFileSync(selectionsPath, 'utf8'));
1516
const laneSelections = selections.laneSelections || [];
1617
if (laneSelections.length === 0) throw new Error('selection file has no laneSelections');
1718

19+
const laneCounts = new Map();
20+
for (const selection of laneSelections) {
21+
const lane = selection.lane_id || selection.lane;
22+
laneCounts.set(lane, (laneCounts.get(lane) || 0) + 1);
23+
}
24+
const missingLanes = requiredLanes.filter((lane) => !laneCounts.has(lane));
25+
const duplicateLanes = [...laneCounts.entries()].filter(([, count]) => count > 1).map(([lane]) => lane);
26+
const extraLanes = [...laneCounts.keys()].filter((lane) => !requiredLanes.includes(lane));
27+
if (missingLanes.length > 0 || duplicateLanes.length > 0 || extraLanes.length > 0) {
28+
throw new Error(
29+
`lane selection set invalid: missing=${missingLanes.join(',') || 'none'} duplicate=${duplicateLanes.join(',') || 'none'} extra=${extraLanes.join(',') || 'none'}`,
30+
);
31+
}
32+
1833
function run(cmd, args, opts = {}) {
1934
const started = Date.now();
2035
const r = spawnSync(cmd, args, {
@@ -97,7 +112,7 @@ for (const selection of laneSelections) {
97112
for (const span of spans) addSpan(spanMap, span.file, span.start, span.end);
98113
const predFiles = [...new Set([...files, ...spans.map((span) => String(span.file || '').replaceAll('\\', '/').replace(/^\.\//, ''))])].filter(Boolean);
99114
const predSpans = Object.fromEntries(spanMap.entries());
100-
const nonEmptyPrediction = predFiles.length > 0 && spans.length > 0;
115+
const nonEmptyPrediction = predFiles.length > 0 || spans.length > 0;
101116
const readiness = selection.readiness || {};
102117
const rowBase = {
103118
lane_id: lane,
@@ -172,7 +187,8 @@ const summary = {
172187
createdAt: new Date().toISOString(),
173188
attemptedRows: rows.length,
174189
scoreableRows: scoreableRows.length,
175-
requiredCompetitors: 5,
190+
requiredCompetitors: requiredLanes.length,
191+
requiredLanes,
176192
setupIndexCostReportedSeparately: true,
177193
model: selections.model || 'gpt-5.4-mini-high',
178194
predictionSource: selections.predictionSource || 'gpt-5.4-mini-high subagent selections over real lane candidate packs',
@@ -186,4 +202,4 @@ writeFileSync(join(root, 'summary.json'), JSON.stringify(summary, null, 2));
186202
console.log('CONTEXTBENCH_FIVE_LANE_SCORE_JSON_START');
187203
console.log(JSON.stringify(summary, null, 2));
188204
console.log('CONTEXTBENCH_FIVE_LANE_SCORE_JSON_END');
189-
if (scoreableRows.length !== rows.length || scoreableRows.length < 5) process.exitCode = 1;
205+
if (scoreableRows.length !== rows.length || scoreableRows.length < requiredLanes.length) process.exitCode = 1;

0 commit comments

Comments
 (0)