Skip to content

Commit 345c030

Browse files
committed
Add relevant CodeGraphContext readiness pack
1 parent b8159dc commit 345c030

1 file changed

Lines changed: 199 additions & 0 deletions

File tree

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
import { spawnSync } from 'node:child_process';
2+
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
3+
import { basename, join, relative } from 'node:path';
4+
5+
const targetTaskId = process.env.TARGET_TASK_ID || 'SWE-Bench-Pro__go__maintenance__bugfix__4df06349';
6+
const root = process.env.ROOT || '/tmp/contextbench-cgc-relevant';
7+
const outDir = join(root, 'pack');
8+
mkdirSync(outDir, { recursive: true });
9+
10+
const payloads = JSON.parse(readFileSync(process.env.TASK_PAYLOADS, 'utf8'));
11+
const task = payloads.tasks.find((candidate) => candidate.instance_id === targetTaskId);
12+
if (!task) throw new Error(`target task ${targetTaskId} missing from payloads`);
13+
const repo = task.repo_checkout_path;
14+
15+
function run(cmd, args, opts = {}) {
16+
const started = Date.now();
17+
const r = spawnSync(cmd, args, {
18+
cwd: opts.cwd || process.cwd(),
19+
env: opts.env || process.env,
20+
encoding: 'utf8',
21+
timeout: opts.timeoutMs || 300000,
22+
maxBuffer: 128 * 1024 * 1024,
23+
});
24+
return {
25+
command: [cmd, ...args].join(' '),
26+
cwd: opts.cwd || process.cwd(),
27+
status: typeof r.status === 'number' ? r.status : null,
28+
signal: r.signal,
29+
error: r.error?.message || null,
30+
durationMs: Date.now() - started,
31+
stdout: r.stdout || '',
32+
stderr: r.stderr || '',
33+
};
34+
}
35+
36+
function collectRepoFiles(dir, prefix = '', files = []) {
37+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
38+
if (entry.name === '.git' || entry.name === 'vendor' || entry.name === 'node_modules') continue;
39+
const rel = prefix ? `${prefix}/${entry.name}` : entry.name;
40+
const abs = join(dir, entry.name);
41+
if (entry.isDirectory()) collectRepoFiles(abs, rel, files);
42+
else files.push(rel.replaceAll('\\', '/'));
43+
}
44+
return files;
45+
}
46+
47+
const repoFiles = collectRepoFiles(repo);
48+
const repoFileSet = new Set(repoFiles);
49+
const basenameMap = new Map();
50+
for (const file of repoFiles) {
51+
const name = basename(file);
52+
const list = basenameMap.get(name) || [];
53+
list.push(file);
54+
basenameMap.set(name, list);
55+
}
56+
57+
function stripAnsi(text) {
58+
return String(text || '').replace(/\u001b\[[0-9;]*m/g, '');
59+
}
60+
61+
function norm(file) {
62+
let f = stripAnsi(file).replace(/^file:\/\//, '').replaceAll('\\', '/').trim();
63+
if (!f) return '';
64+
const repoNorm = repo.replaceAll('\\', '/');
65+
if (f.startsWith(repoNorm)) f = relative(repo, f).replaceAll('\\', '/');
66+
f = f.replace(/^\/+/, '').replace(/^\.\//, '');
67+
if (!f || f.includes('://') || f.includes('..')) return '';
68+
if (repoFileSet.has(f)) return f;
69+
if (existsSync(join(repo, f))) return f;
70+
const byName = basenameMap.get(basename(f));
71+
if (byName?.length === 1) return byName[0];
72+
return '';
73+
}
74+
75+
function add(locs, file, start = 1, end = start, source = 'codegraphcontext') {
76+
const clean = norm(file);
77+
if (!clean) return;
78+
const s = Math.max(1, Number(start) || 1);
79+
locs.push({ file: clean, start: s, end: Math.max(s, Number(end) || s), source });
80+
}
81+
82+
function parseJson(text) {
83+
const cleaned = stripAnsi(text).trim();
84+
if (!cleaned) return null;
85+
try { return JSON.parse(cleaned); } catch {}
86+
const start = cleaned.indexOf('[');
87+
const end = cleaned.lastIndexOf(']');
88+
if (start >= 0 && end > start) {
89+
try { return JSON.parse(cleaned.slice(start, end + 1)); } catch {}
90+
}
91+
return null;
92+
}
93+
94+
function walk(value, locs, source) {
95+
if (!value || typeof value !== 'object') return;
96+
if (Array.isArray(value)) {
97+
for (const item of value) walk(item, locs, source);
98+
return;
99+
}
100+
add(locs, value.path || value.file || value.file_path || value.name || value.uri, value.line_number || value.line || 1, value.end_line || value.line_number || 1, source);
101+
for (const item of Object.values(value)) walk(item, locs, source);
102+
}
103+
104+
function collect(text, locs, source) {
105+
const parsed = parseJson(text);
106+
if (parsed) walk(parsed, locs, source);
107+
const cleaned = stripAnsi(text);
108+
const fileLine = /([A-Za-z0-9_.\/-]+\.(?:go|mod|sum|json|yml|yaml|md|ts|tsx|js|jsx|py|rs|java|c|cc|cpp|h|hpp|rb|php|cs|kt|swift|vue|svelte))(?::|#L|\s+line\s+)?(\d+)?/g;
109+
let m;
110+
while ((m = fileLine.exec(cleaned)) !== null) add(locs, m[1], m[2] || 1, m[2] || 1, source);
111+
}
112+
113+
const relevantTerms = ['metrics', 'prometheus', 'insights', 'auth', 'authorization', 'bearer', 'token', 'header', 'subsonic', 'root'];
114+
function isRelevant(loc) {
115+
const file = loc.file.toLowerCase();
116+
return relevantTerms.some((term) => file.includes(term));
117+
}
118+
119+
function uniq(locs, max = 80) {
120+
const seen = new Set();
121+
const out = [];
122+
for (const loc of locs.filter(isRelevant)) {
123+
const key = `${loc.file}:${loc.start}:${loc.end}`;
124+
if (seen.has(key)) continue;
125+
seen.add(key);
126+
out.push(loc);
127+
if (out.length >= max) break;
128+
}
129+
return out;
130+
}
131+
132+
function writeCommands(commands) {
133+
for (const [i, command] of commands.entries()) {
134+
writeFileSync(join(outDir, `codegraphcontext-command-${i + 1}.json`), JSON.stringify({
135+
...command,
136+
stdout: command.stdout.slice(0, 200000),
137+
stderr: command.stderr.slice(0, 200000),
138+
}, null, 2));
139+
}
140+
}
141+
142+
const env = { ...process.env, DEFAULT_DATABASE: 'kuzudb', CGC_RUNTIME_DB_TYPE: 'kuzudb' };
143+
const commands = [];
144+
const locs = [];
145+
const setup = run('cgc', ['--version'], { env, timeoutMs: 60000 });
146+
commands.push(setup);
147+
const index = run('cgc', ['index', '.', '--force'], { cwd: repo, env, timeoutMs: 1200000 });
148+
commands.push(index);
149+
const queryCommands = [];
150+
for (const term of relevantTerms) {
151+
const apiQuery = run('python', ['-c', `import json\nfrom codegraphcontext.core.database import DatabaseManager\ndb=DatabaseManager()\nwith db.get_driver().session() as s:\n rows=s.run("MATCH (f:File) WHERE toLower(f.path) CONTAINS '${term}' RETURN f.path as path LIMIT 100").data()\nprint(json.dumps(rows))\ndb.close_driver()`], { cwd: repo, env, timeoutMs: 180000 });
152+
commands.push(apiQuery);
153+
queryCommands.push(apiQuery);
154+
collect(apiQuery.stdout, locs, `codegraphcontext-query-${term}`);
155+
collect(apiQuery.stderr, locs, `codegraphcontext-query-${term}`);
156+
}
157+
for (const pattern of ['Metrics', 'Prometheus', 'Authorization', 'Bearer', 'Token', 'Subsonic', 'Header']) {
158+
const found = run('cgc', ['find', 'pattern', pattern], { cwd: repo, env, timeoutMs: 180000 });
159+
commands.push(found);
160+
queryCommands.push(found);
161+
collect(found.stdout, locs, 'codegraphcontext-pattern');
162+
collect(found.stderr, locs, 'codegraphcontext-pattern');
163+
}
164+
165+
writeCommands(commands);
166+
const candidates = uniq(locs);
167+
const result = {
168+
lane: 'codegraphcontext',
169+
ready: setup.status === 0 && index.status === 0 && candidates.length > 0,
170+
setupStatus: setup.status === 0 ? 'completed' : 'setup_failed',
171+
indexStatus: index.status === 0 && candidates.length > 0 ? 'completed' : 'index_failed',
172+
toolCallable: commands.some((command) => command.status === 0),
173+
candidateCount: candidates.length,
174+
setupIndex: {
175+
setupDurationMs: setup.durationMs,
176+
indexDurationMs: index.durationMs,
177+
queryDurationMs: queryCommands.reduce((sum, command) => sum + command.durationMs, 0),
178+
},
179+
commands: commands.map((command) => ({ command: command.command, status: command.status, signal: command.signal, error: command.error, durationMs: command.durationMs })),
180+
candidates,
181+
};
182+
183+
const pack = {
184+
createdAt: new Date().toISOString(),
185+
targetTaskId,
186+
task: {
187+
instance_id: task.instance_id,
188+
repo: task.repo,
189+
base_commit: task.base_commit,
190+
problem_statement: task.problem_statement,
191+
},
192+
readiness: result,
193+
};
194+
writeFileSync(join(outDir, 'codegraphcontext-candidate-pack.json'), JSON.stringify(pack, null, 2));
195+
writeFileSync(join(outDir, 'codegraphcontext-readiness.json'), JSON.stringify(result, null, 2));
196+
console.log('CONTEXTBENCH_CGC_RELEVANT_READINESS_JSON_START');
197+
console.log(JSON.stringify(pack, null, 2));
198+
console.log('CONTEXTBENCH_CGC_RELEVANT_READINESS_JSON_END');
199+
if (!result.ready) process.exitCode = 1;

0 commit comments

Comments
 (0)