Skip to content

Commit 2048fee

Browse files
committed
Extract CBM ContextBench candidates from script file
1 parent 3d6ac27 commit 2048fee

1 file changed

Lines changed: 188 additions & 0 deletions

File tree

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import { spawnSync } from 'node:child_process';
2+
import { basename, join, relative } from 'node:path';
3+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
4+
5+
const root = process.env.ROOT;
6+
const outDir = join(root, 'pack');
7+
mkdirSync(outDir, { recursive: true });
8+
9+
const payloads = JSON.parse(readFileSync(process.env.TASK_PAYLOADS, 'utf8'));
10+
const task = payloads.tasks[2];
11+
const repo = task.repo_checkout_path;
12+
13+
function run(cmd, args, opts = {}) {
14+
const started = Date.now();
15+
const r = spawnSync(cmd, args, {
16+
cwd: opts.cwd || process.cwd(),
17+
env: opts.env || process.env,
18+
encoding: 'utf8',
19+
timeout: opts.timeoutMs || 600000,
20+
maxBuffer: 128 * 1024 * 1024,
21+
});
22+
return {
23+
command: [cmd, ...args].join(' '),
24+
cwd: opts.cwd || process.cwd(),
25+
status: r.status,
26+
signal: r.signal,
27+
error: r.error?.message || null,
28+
durationMs: Date.now() - started,
29+
stdout: r.stdout || '',
30+
stderr: r.stderr || '',
31+
};
32+
}
33+
34+
function queryOf(text) {
35+
const stop = new Set([
36+
'that', 'this', 'with', 'from', 'when', 'then', 'into', 'should',
37+
'would', 'could', 'there', 'where', 'which', 'about', 'after',
38+
'before', 'have', 'will', 'been', 'than', 'also', 'only', 'some',
39+
'using', 'must', 'method', 'methods', 'function', 'interface',
40+
]);
41+
return String(text || '')
42+
.replace(/[`*_#>\[\](){},.;:!?/\\]/g, ' ')
43+
.split(/\s+/)
44+
.filter((w) => w.length >= 4 && !stop.has(w.toLowerCase()))
45+
.slice(0, 18)
46+
.join(' ');
47+
}
48+
49+
function jsonish(s) {
50+
const t = String(s || '').trim();
51+
if (!t) return null;
52+
try {
53+
return JSON.parse(t);
54+
} catch {}
55+
for (const [a, b] of [['{', '}'], ['[', ']']]) {
56+
const i = t.indexOf(a);
57+
const j = t.lastIndexOf(b);
58+
if (i >= 0 && j > i) {
59+
try {
60+
return JSON.parse(t.slice(i, j + 1));
61+
} catch {}
62+
}
63+
}
64+
return null;
65+
}
66+
67+
function norm(file) {
68+
let f = String(file || '').replace(/^file:\/\//, '').replaceAll('\\', '/');
69+
if (!f) return '';
70+
if (f.startsWith(repo)) f = relative(repo, f).replaceAll('\\', '/');
71+
f = f.replace(/^\/+/, '').replace(/^\.\//, '');
72+
if (
73+
!f ||
74+
f.includes('://') ||
75+
f.includes('..') ||
76+
f.startsWith('tmp-contextbench') ||
77+
f.includes('/tmp-contextbench')
78+
) {
79+
return '';
80+
}
81+
if (!existsSync(join(repo, f))) return '';
82+
return f;
83+
}
84+
85+
function add(locs, file, start = 1, end = start, source = 'codebase-memory-mcp') {
86+
const clean = norm(file);
87+
if (!clean) return;
88+
const s = Math.max(1, Number(start) || 1);
89+
locs.push({ file: clean, start: s, end: Math.max(s, Number(end) || s), source });
90+
}
91+
92+
function walk(value, locs) {
93+
if (!value || typeof value !== 'object') return;
94+
if (Array.isArray(value)) {
95+
for (const item of value) walk(item, locs);
96+
return;
97+
}
98+
add(
99+
locs,
100+
value.file || value.path || value.file_path || value.relative_path || value.filename || value.source_path || value.uri,
101+
value.start_line || value.startLine || value.line || value.line_number || value.start || 1,
102+
value.end_line || value.endLine || value.end || value.line || 1,
103+
);
104+
for (const item of Object.values(value)) walk(item, locs);
105+
}
106+
107+
function collect(text, locs) {
108+
const parsed = jsonish(text);
109+
if (parsed) walk(parsed, locs);
110+
const re = /([A-Za-z0-9_.\/-]+\.(?:go|mod|sum|json|yml|yaml|md|ts|tsx|js|jsx|py|rs|java|c|cc|cpp|h|hpp))(?::|#L|\s+line\s+)?(\d+)?/g;
111+
let m;
112+
while ((m = re.exec(String(text || ''))) !== null) add(locs, m[1], m[2] || 1, m[2] || 1);
113+
}
114+
115+
function uniq(locs, max = 100) {
116+
const seen = new Set();
117+
const out = [];
118+
for (const loc of locs) {
119+
const key = `${loc.file}:${loc.start}:${loc.end}`;
120+
if (!seen.has(key)) {
121+
seen.add(key);
122+
out.push(loc);
123+
if (out.length >= max) break;
124+
}
125+
}
126+
return out;
127+
}
128+
129+
const query = queryOf(task.problem_statement);
130+
const env = { ...process.env, CBM_CACHE_DIR: join(outDir, 'cbm-cache'), CBM_DIAGNOSTICS: '1' };
131+
const setup = run(process.env.CBM_BIN, ['--version'], { env, timeoutMs: 60000 });
132+
const index = run(process.env.CBM_BIN, ['cli', 'index_repository', JSON.stringify({ repo_path: repo })], {
133+
cwd: repo,
134+
env,
135+
timeoutMs: 2700000,
136+
});
137+
const project = (jsonish(index.stdout) || jsonish(index.stderr) || {}).project || basename(repo);
138+
const graph = run(process.env.CBM_BIN, ['cli', 'search_graph', JSON.stringify({ project, query, limit: 60 })], {
139+
cwd: repo,
140+
env,
141+
timeoutMs: 120000,
142+
});
143+
const code = run(
144+
process.env.CBM_BIN,
145+
['cli', 'search_code', JSON.stringify({ project, pattern: query.split(/\s+/)[0] || '.', mode: 'compact', limit: 60 })],
146+
{ cwd: repo, env, timeoutMs: 120000 },
147+
);
148+
149+
for (const [name, value] of Object.entries({ setup, index, graph, code })) {
150+
writeFileSync(
151+
join(outDir, `${name}.json`),
152+
JSON.stringify({ ...value, stdout: value.stdout.slice(0, 120000), stderr: value.stderr.slice(0, 120000) }, null, 2),
153+
);
154+
}
155+
156+
const locs = [];
157+
for (const r of [graph, code]) {
158+
collect(r.stdout, locs);
159+
collect(r.stderr, locs);
160+
}
161+
const candidates = uniq(locs);
162+
const pack = {
163+
task: {
164+
instance_id: task.instance_id,
165+
repo: task.repo,
166+
base_commit: task.base_commit,
167+
problem_statement: task.problem_statement,
168+
},
169+
query,
170+
lane: {
171+
lane: 'codebase-memory-mcp',
172+
setupStatus: setup.status === 0 ? 'completed' : 'setup_failed',
173+
indexStatus: index.status === 0 ? 'completed' : 'index_failed',
174+
toolCallable: graph.status === 0 || code.status === 0,
175+
candidateCount: candidates.length,
176+
setupIndex: {
177+
setupDurationMs: setup.durationMs,
178+
indexDurationMs: index.durationMs,
179+
queryDurationMs: graph.durationMs + code.durationMs,
180+
},
181+
candidates,
182+
},
183+
};
184+
185+
writeFileSync(join(outDir, 'cbm-candidate-pack.json'), JSON.stringify(pack, null, 2));
186+
console.log('CBM_CANDIDATE_PACK_JSON_START');
187+
console.log(JSON.stringify(pack, null, 2));
188+
console.log('CBM_CANDIDATE_PACK_JSON_END');

0 commit comments

Comments
 (0)