Skip to content

Commit 8e451c2

Browse files
author
Евгений Балякин
committed
enhance Python MCP context discovery
1 parent d0368bb commit 8e451c2

13 files changed

Lines changed: 365 additions & 41 deletions

src/core/context.ts

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,23 @@ export async function buildContext(root: string, options: ContextOptions) {
3434
const files = (await walkSourceFiles(root, options.path ?? '.', { maxFiles: 1000 })).filter((file) => !options.changedOnly || changedFiles.has(file.relativePath));
3535
const ranked = [] as Array<{ path: string; score: number; reason: string; tokens: number; content: string; symbolId?: string }>;
3636
const fileRecords = [] as Array<{ path: string; source: string; imports: string[]; exported: Array<{ name: string; kind: string }>; symbolText: string; tokens: number; size: number; content: string; symbolId?: string }>;
37+
const omittedFiles = [] as Array<{ path: string; reason: string }>;
3738
for (const file of files) {
38-
const { text: source } = await readTextFileSafe(file.absolutePath, undefined, root);
39-
if (options.includeTests === false && /(^|\/)(test|tests|spec|__tests__)(\/|$)|\.(test|spec)\./.test(file.relativePath)) continue;
40-
const skeleton = await skeletonSourceAsync(root, file.relativePath, source, { budget: Math.min(2000, budget) });
41-
const content = JSON.stringify(skeleton, null, 2);
42-
fileRecords.push({ path: file.relativePath, source, imports: skeleton.symbols.filter((symbol) => symbol.kind === 'import').map((symbol) => symbol.source ?? symbol.signature), exported: skeleton.symbols.filter((symbol) => symbol.exported).map((symbol) => ({ name: symbol.qualifiedName, kind: symbol.kind })), symbolText: skeleton.symbols.map((symbol) => `${symbol.name} ${symbol.signature}`).join('\n'), tokens: skeleton.tokenEstimate, size: file.size, content, symbolId: skeleton.symbols.find((symbol) => symbol.kind !== 'import')?.symbolId });
39+
if (options.includeTests === false && /(^|\/)(test|tests|spec|__tests__)(\/|$)|\.(test|spec)\./.test(file.relativePath)) {
40+
omittedFiles.push({ path: file.relativePath, reason: 'includeTests:false' });
41+
continue;
42+
}
43+
try {
44+
const { text: source } = await readTextFileSafe(file.absolutePath, undefined, root);
45+
const skeleton = await skeletonSourceAsync(root, file.relativePath, source, { budget: Math.min(2000, budget) });
46+
const content = JSON.stringify(skeleton, null, 2);
47+
fileRecords.push({ path: file.relativePath, source, imports: skeleton.symbols.filter((symbol) => symbol.kind === 'import').map((symbol) => symbol.source ?? symbol.signature), exported: skeleton.symbols.filter((symbol) => symbol.exported).map((symbol) => ({ name: symbol.qualifiedName, kind: symbol.kind })), symbolText: skeleton.symbols.map((symbol) => `${symbol.name} ${symbol.signature}`).join('\n'), tokens: skeleton.tokenEstimate, size: file.size, content, symbolId: skeleton.symbols.find((symbol) => symbol.kind !== 'import')?.symbolId });
48+
} catch (error) {
49+
const message = error instanceof Error ? error.message : String(error);
50+
const reason = /File too large/i.test(message) ? 'file_too_large' : /Binary/i.test(message) ? 'unsupported_binary' : `parse_or_read_error:${message}`;
51+
omittedFiles.push({ path: file.relativePath, reason });
52+
warnings.push(`omitted:${file.relativePath}:${reason}`);
53+
}
4354
}
4455
const fileSet = new Set(fileRecords.map((record) => record.path));
4556
const graph = summarizeGraph(
@@ -100,15 +111,20 @@ export async function buildContext(root: string, options: ContextOptions) {
100111
}
101112
for (const item of ranked.filter((rankedItem) => rankedItem.symbolId)) {
102113
if (usedTokens >= budget) break;
103-
const body = await readCode(root, item.path, { symbolId: item.symbolId, maxBytes: Math.min(12000, (budget - usedTokens) * 4) });
104-
if (usedTokens + body.tokenEstimate > budget) continue;
105-
items.push({ type: 'symbol_body', path: item.path, symbolId: item.symbolId, score: Number(item.score.toFixed(2)), reason: 'top ranked symbol body within remaining budget', content: body.content });
106-
usedTokens += body.tokenEstimate;
114+
try {
115+
const body = await readCode(root, item.path, { symbolId: item.symbolId, maxBytes: Math.min(12000, (budget - usedTokens) * 4) });
116+
if (usedTokens + body.tokenEstimate > budget) continue;
117+
items.push({ type: 'symbol_body', path: item.path, symbolId: item.symbolId, score: Number(item.score.toFixed(2)), reason: 'top ranked symbol body within remaining budget', content: body.content });
118+
usedTokens += body.tokenEstimate;
119+
} catch (error) {
120+
warnings.push(`body_read_unavailable:${item.path}:${error instanceof Error ? error.message : String(error)}`);
121+
}
107122
}
108123
const nextReads = ranked.slice(0, 5).map((item) => ({ command: item.symbolId ? 'read' : 'skeleton', path: item.path, symbolId: item.symbolId }));
109124
const included = new Set(items.map((item) => `${item.type}:${item.path}:${item.symbolId ?? ''}`));
110-
const omitted = ranked.filter((item) => !included.has(`skeleton:${item.path}:`)).slice(0, 20).map((item) => ({ path: item.path, reason: 'budget' }));
111-
const data = { schemaVersion: SCHEMA_VERSION, goal: options.goal, budget, usedTokens, items, omitted, nextReads, warnings, truncated: omitted.length > 0, tokenEstimate: estimateTokens(JSON.stringify(items)) };
125+
const omitted = [...ranked.filter((item) => !included.has(`skeleton:${item.path}:`)).slice(0, 20).map((item) => ({ path: item.path, reason: 'budget' })), ...omittedFiles.slice(0, 20)];
126+
const testRelations = inferTestRelations(fileRecords, graph.edges).slice(0, 20);
127+
const data = { schemaVersion: SCHEMA_VERSION, goal: options.goal, budget, usedTokens, items, omitted, nextReads, testRelations, warnings, truncated: omitted.length > 0, tokenEstimate: estimateTokens(JSON.stringify(items)) };
112128
return data;
113129
}
114130

@@ -136,6 +152,33 @@ function isGeneratedOrVendor(filePath: string): boolean {
136152
return /(^|\/)(vendor|vendors|third_party|node_modules|dist|build|coverage)(\/|$)|(^|\/)[^/]+\.(min|generated|gen)\.[^.]+$|(^|\/)[^/]+_(pb|generated)\.[^.]+$/.test(filePath);
137153
}
138154

155+
function inferTestRelations(fileRecords: Array<{ path: string; imports: string[] }>, edges: Array<{ from: string; resolved?: string }>) {
156+
const sourceFiles = new Set(fileRecords.map((record) => record.path).filter((filePath) => !isTestPath(filePath)));
157+
const relations: Array<{ test: string; source: string; reason: string }> = [];
158+
for (const test of fileRecords.filter((record) => isTestPath(record.path))) {
159+
for (const edge of edges.filter((item) => item.from === test.path && item.resolved && sourceFiles.has(item.resolved))) {
160+
relations.push({ test: test.path, source: edge.resolved!, reason: 'import' });
161+
}
162+
const testBase = path.posix.basename(test.path).replace(/^(test_|spec_)/, '').replace(/(_test|\.test|\.spec)?\.[^.]+$/, '').toLowerCase();
163+
for (const source of sourceFiles) {
164+
const sourceBase = path.posix.basename(source).replace(/\.[^.]+$/, '').toLowerCase();
165+
if (testBase && sourceBase && (testBase === sourceBase || testBase.includes(sourceBase) || sourceBase.includes(testBase))) relations.push({ test: test.path, source, reason: 'name_proximity' });
166+
}
167+
}
168+
const seen = new Set<string>();
169+
return relations.filter((relation) => {
170+
const key = `${relation.test}:${relation.source}`;
171+
if (seen.has(key)) return false;
172+
seen.add(key);
173+
return true;
174+
});
175+
}
176+
177+
function isTestPath(filePath: string): boolean {
178+
return /(^|\/)(test|tests|spec|__tests__)(\/|$)|\.(test|spec)\.|(^|\/)test_[^/]+\.py$|(^|\/)[^/]+_test\.py$/.test(filePath);
179+
}
180+
139181
export function renderContext(data: Awaited<ReturnType<typeof buildContext>>): string {
140-
return `Context pack: ${data.usedTokens} tokens, ${data.items.length} included\n\n${data.items.map((item, index) => `${index + 1}. ${item.path} ${item.type} (${item.reason})`).join('\n')}\n\nNext reads:\n${data.nextReads.map((item) => ` codebone ${item.command} ${item.path}`).join('\n')}`;
182+
const relatedTests = data.testRelations.length ? `\n\nRelated tests:\n${data.testRelations.slice(0, 10).map((item) => ` ${item.test} -> ${item.source} (${item.reason})`).join('\n')}` : '';
183+
return `Context pack: ${data.usedTokens} tokens, ${data.items.length} included\n\n${data.items.map((item, index) => `${index + 1}. ${item.path} ${item.type} (${item.reason})`).join('\n')}\n\nNext reads:\n${data.nextReads.map((item) => ` codebone ${item.command} ${item.path}`).join('\n')}${relatedTests}`;
141184
}

src/core/directory-skeleton.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { estimateTokens } from './budget.js';
66

77
const execFileAsync = promisify(execFile);
88

9-
export async function skeletonDirectory(root: string, inputPath: string, options: { maxFiles?: number; budget?: number; publicOnly?: boolean; include?: string[]; exclude?: string[]; sort?: string; changedOnly?: boolean; respectAiIgnore?: boolean } = {}) {
9+
export async function skeletonDirectory(root: string, inputPath: string, options: { maxFiles?: number; budget?: number; publicOnly?: boolean; include?: string[]; exclude?: string[]; sort?: string; changedOnly?: boolean; respectAiIgnore?: boolean; mode?: 'full' | 'summary' } = {}) {
1010
const files = await sortFiles(root, await walkSourceFiles(root, inputPath, { maxFiles: options.maxFiles ?? 100, include: options.include, exclude: options.exclude, respectAiIgnore: options.respectAiIgnore }), options.sort ?? 'path', Boolean(options.changedOnly));
1111
const skeletons = [];
1212
let used = 0;
@@ -21,10 +21,11 @@ export async function skeletonDirectory(root: string, inputPath: string, options
2121
skeletons.push(skeleton);
2222
used += cost;
2323
}
24-
return { files: skeletons.length, skeletons, warnings: [], truncated: truncated || files.length >= (options.maxFiles ?? 100), tokenEstimate: used };
24+
return { files: skeletons.length, skeletons, mode: options.mode ?? 'full', warnings: [], truncated: truncated || files.length >= (options.maxFiles ?? 100), tokenEstimate: used };
2525
}
2626

2727
export function renderDirectorySkeleton(data: Awaited<ReturnType<typeof skeletonDirectory>>): string {
28+
if (data.mode === 'summary') return renderDirectorySummary(data);
2829
return data.skeletons.map((skeleton) => {
2930
const hidden = Math.max(0, estimateTokens('x'.repeat(skeleton.totalLines * 80)) - skeleton.tokenEstimate);
3031
const rendered = renderSkeleton(skeleton);
@@ -33,6 +34,21 @@ export function renderDirectorySkeleton(data: Awaited<ReturnType<typeof skeleton
3334
}).join('\n\n');
3435
}
3536

37+
function renderDirectorySummary(data: Awaited<ReturnType<typeof skeletonDirectory>>): string {
38+
return data.skeletons.map((skeleton) => {
39+
const lines = [`═══ ${skeleton.file} (${skeleton.totalLines} lines) ═══`];
40+
for (const symbol of skeleton.symbols) {
41+
if (symbol.kind === 'import' || symbol.kind === 'variable' || symbol.kind === 'constant' || symbol.kind === 'property') continue;
42+
lines.push(`${String(symbol.startLine).padStart(4)} ${symbol.kind.toUpperCase().padEnd(10)} ${symbol.qualifiedName}`);
43+
for (const child of symbol.children ?? []) {
44+
if (child.visibility === 'private' && !child.name.startsWith('rpc_')) continue;
45+
lines.push(`${String(child.startLine).padStart(4)} ${child.kind.toUpperCase().padEnd(8)} ${child.name}`);
46+
}
47+
}
48+
return lines.join('\n');
49+
}).join('\n\n');
50+
}
51+
3652
async function sortFiles(root: string, files: Awaited<ReturnType<typeof walkSourceFiles>>, mode: string, changedOnly: boolean) {
3753
if (changedOnly) {
3854
const changed = await getChangedFiles(root);

src/core/graph.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,38 @@ export function summarizeGraph(edges: ImportEdge[], exports: ExportEntry[], file
5959
}
6060

6161
export function resolveImport(from: string, source: string, fileSet: Set<string>): string | undefined {
62+
if (/\.py$/i.test(from)) return resolvePythonImport(from, source, fileSet);
6263
if (!source.startsWith('.')) return undefined;
6364
const base = path.posix.normalize(path.posix.join(path.posix.dirname(from), source));
6465
const candidates = [base, `${base}.ts`, `${base}.tsx`, `${base}.js`, `${base}.jsx`, `${base}.py`, `${base}.go`, `${base}.rs`, path.posix.join(base, 'index.ts'), path.posix.join(base, 'index.js')];
6566
return candidates.find((candidate) => fileSet.has(candidate));
6667
}
68+
69+
function resolvePythonImport(from: string, source: string, fileSet: Set<string>): string | undefined {
70+
if (!/^(?:\.+[A-Za-z_]\w*(?:\.\w+)*|[A-Za-z_]\w*(?:\.\w+)*)$/.test(source)) return undefined;
71+
72+
const modulePath = source.startsWith('.') ? relativePythonModulePath(from, source) : source.replace(/\./g, '/');
73+
if (!modulePath) return undefined;
74+
75+
const directCandidates = [`${modulePath}.py`, path.posix.join(modulePath, '__init__.py')];
76+
const direct = directCandidates.find((candidate) => fileSet.has(candidate));
77+
if (direct) return direct;
78+
79+
const suffixMatches = [...fileSet]
80+
.filter((filePath) => filePath === `${modulePath}.py` || filePath.endsWith(`/${modulePath}.py`) || filePath === path.posix.join(modulePath, '__init__.py') || filePath.endsWith(`/${modulePath}/__init__.py`))
81+
.sort((a, b) => a.length - b.length || a.localeCompare(b));
82+
return suffixMatches[0];
83+
}
84+
85+
function relativePythonModulePath(from: string, source: string): string | undefined {
86+
const match = source.match(/^(\.+)(.*)$/);
87+
if (!match) return source.replace(/\./g, '/');
88+
89+
const level = match[1].length;
90+
const rest = match[2];
91+
const parts = path.posix.dirname(from).split('/').filter(Boolean);
92+
const baseParts = parts.slice(0, Math.max(0, parts.length - level + 1));
93+
const restParts = rest ? rest.split('.') : [];
94+
const moduleParts = [...baseParts, ...restParts];
95+
return moduleParts.length ? moduleParts.join('/') : undefined;
96+
}

src/core/reader.ts

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { SCHEMA_VERSION } from '../types.js';
22
import { resolveInsideRoot, toRelative } from '../utils/paths.js';
3-
import { readTextFileSafe } from '../utils/security.js';
3+
import { readTextFileLinesSafe, readTextFileSafe } from '../utils/security.js';
44
import { estimateTokens } from './budget.js';
55
import { flattenSymbols, skeletonSourceAsync } from './skeleton.js';
66

@@ -15,24 +15,19 @@ export interface ReadOptions {
1515
export async function readCode(root: string, inputPath: string, options: ReadOptions) {
1616
const absolutePath = resolveInsideRoot(root, inputPath);
1717
const maxBytes = options.maxBytes ?? 65536;
18-
const { text: source } = await readTextFileSafe(absolutePath, maxBytes, root);
1918
const relativePath = toRelative(root, absolutePath);
20-
const allLines = source.split(/\r?\n/);
2119
const warnings: string[] = [];
20+
21+
if (options.lines) return readLineRange(root, absolutePath, relativePath, options.lines, Math.max(0, options.context ?? 0), maxBytes, warnings);
22+
23+
const { text: source } = await readTextFileSafe(absolutePath, maxBytes, root);
24+
const allLines = source.split(/\r?\n/);
2225
let startLine = 1;
2326
let endLine = allLines.length;
2427
let label = relativePath;
2528
let symbolId: string | undefined;
2629

27-
if (options.lines) {
28-
const match = options.lines.match(/^(\d+):(\d+)$/);
29-
if (!match) throw new Error('Invalid --lines format, expected start:end');
30-
startLine = Number(match[1]);
31-
endLine = Number(match[2]);
32-
if (startLine < 1 || endLine < startLine || endLine > allLines.length) throw new Error(`Invalid line range: ${options.lines}`);
33-
label = `${relativePath}:${startLine}..${endLine}`;
34-
} else {
35-
if (!options.symbolId && !options.symbol) throw new Error('Either --symbol-id, --symbol, or --lines is required');
30+
if (!options.symbolId && !options.symbol) throw new Error('Either --symbol-id, --symbol, or --lines is required');
3631
const skeleton = await skeletonSourceAsync(root, relativePath, source);
3732
const symbols = flattenSymbols(skeleton.symbols);
3833
const parsedId = options.symbolId ? parseSymbolId(options.symbolId) : undefined;
@@ -56,7 +51,6 @@ export async function readCode(root: string, inputPath: string, options: ReadOpt
5651
endLine = match.endLine;
5752
label = `${relativePath}:${startLine}..${endLine} - ${match.kind} ${match.qualifiedName}`;
5853
symbolId = match.symbolId;
59-
}
6054

6155
const context = Math.max(0, options.context ?? 0);
6256
startLine = Math.max(1, startLine - context);
@@ -83,6 +77,42 @@ export async function readCode(root: string, inputPath: string, options: ReadOpt
8377
};
8478
}
8579

80+
async function readLineRange(root: string, absolutePath: string, relativePath: string, lines: string, context: number, maxBytes: number, warnings: string[]) {
81+
const match = lines.match(/^(\d+):(\d+)$/);
82+
if (!match) throw new Error('Invalid --lines format, expected start:end');
83+
const requestedStart = Number(match[1]);
84+
const requestedEnd = Number(match[2]);
85+
if (requestedStart < 1 || requestedEnd < requestedStart) throw new Error(`Invalid line range: ${lines}`);
86+
87+
const readStart = Math.max(1, requestedStart - context);
88+
const readEnd = requestedEnd + context;
89+
const { text, lineCount } = await readTextFileLinesSafe(absolutePath, readStart, readEnd, root);
90+
if (requestedStart > lineCount) throw new Error(`Invalid line range: ${lines}; file has ${lineCount} lines`);
91+
const actualEnd = Math.min(readEnd, lineCount);
92+
if (readEnd > lineCount) warnings.push(`line_range_clamped:file_has_${lineCount}_lines`);
93+
94+
const fragmentLines = text ? text.split(/\r?\n/) : [];
95+
let content = fragmentLines.map((line, index) => `${String(readStart + index).padStart(4)} | ${line}`).join('\n');
96+
let truncated = false;
97+
if (Buffer.byteLength(content) > maxBytes) {
98+
content = truncateUtf8(content, maxBytes);
99+
truncated = true;
100+
}
101+
return {
102+
schemaVersion: SCHEMA_VERSION,
103+
file: relativePath,
104+
label: `${relativePath}:${readStart}..${actualEnd}`,
105+
symbolId: undefined,
106+
startLine: readStart,
107+
endLine: actualEnd,
108+
content,
109+
text: content,
110+
warnings,
111+
truncated,
112+
tokenEstimate: estimateTokens(content),
113+
};
114+
}
115+
86116
function truncateUtf8(value: string, maxBytes: number): string {
87117
let bytes = 0;
88118
let end = 0;

0 commit comments

Comments
 (0)