Skip to content

Commit cc8a759

Browse files
AlbinoGeekclaude
andcommitted
feat(git_diff_summary): structured token-efficient diff viewer
Raw git diff floods LLMs with thousands of tokens. This tool wraps git diff with per-file parsing, configurable truncation (maxLines- PerFile, maxFiles), automatic exclusion of lock files / vendored deps, glob-based fileFilter, and support for staged/HEAD/range targets — returning structured JSON or compact Markdown output that typically cuts token usage ~60% vs raw diff. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a868122 commit cc8a759

File tree

2 files changed

+391
-0
lines changed

2 files changed

+391
-0
lines changed
Lines changed: 389 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,389 @@
1+
import { matchesGlob } from "node:path";
2+
3+
import type { FastMCP } from "fastmcp";
4+
import { z } from "zod";
5+
6+
import { gitTopLevel, isSafeGitUpstreamToken, spawnGitAsync } from "./git.js";
7+
import { jsonRespond, spreadDefined, spreadWhen } from "./json.js";
8+
import { requireGitAndRoots } from "./roots.js";
9+
import { WorkspacePickSchema } from "./schemas.js";
10+
11+
// ---------------------------------------------------------------------------
12+
// Constants
13+
// ---------------------------------------------------------------------------
14+
15+
const DEFAULT_EXCLUDE_PATTERNS = [
16+
"*.lock",
17+
"*.lockb",
18+
"bun.lock",
19+
"package-lock.json",
20+
"yarn.lock",
21+
"pnpm-lock.yaml",
22+
"*.min.js",
23+
"*.min.css",
24+
"vendor/**",
25+
"node_modules/**",
26+
"dist/**",
27+
];
28+
29+
// ---------------------------------------------------------------------------
30+
// Types
31+
// ---------------------------------------------------------------------------
32+
33+
interface FileDiff {
34+
path: string;
35+
status: "modified" | "added" | "deleted" | "renamed";
36+
additions: number;
37+
deletions: number;
38+
oldPath?: string;
39+
truncated: boolean;
40+
diff: string;
41+
}
42+
43+
interface DiffSummary {
44+
range: string;
45+
totalFiles: number;
46+
totalAdditions: number;
47+
totalDeletions: number;
48+
files: FileDiff[];
49+
truncatedFiles?: number;
50+
excludedFiles?: string[];
51+
}
52+
53+
// ---------------------------------------------------------------------------
54+
// Helpers
55+
// ---------------------------------------------------------------------------
56+
57+
/**
58+
* Parse `git diff --stat` output into per-file stats.
59+
* Format: " path/to/file | N ++++---"
60+
* Final line: " N files changed, N insertions(+), N deletions(-)"
61+
*/
62+
function parseStatOutput(stat: string): Map<string, { additions: number; deletions: number }> {
63+
const result = new Map<string, { additions: number; deletions: number }>();
64+
for (const line of stat.split("\n")) {
65+
// Skip the summary line and blank lines
66+
if (!line.includes("|")) continue;
67+
const pipeIdx = line.indexOf("|");
68+
const filePart = line.slice(0, pipeIdx).trim();
69+
const statPart = line.slice(pipeIdx + 1).trim();
70+
// statPart looks like "5 ++---" or "3 +++", count + and -
71+
const additions = (statPart.match(/\+/g) ?? []).length;
72+
const deletions = (statPart.match(/-/g) ?? []).length;
73+
result.set(filePart, { additions, deletions });
74+
}
75+
return result;
76+
}
77+
78+
/**
79+
* Parse `git diff` output into per-file chunks.
80+
* Splits on "diff --git a/..." lines.
81+
*/
82+
function parseDiffOutput(diff: string): Array<{ header: string; body: string }> {
83+
const chunks: Array<{ header: string; body: string }> = [];
84+
// Each file section starts with "diff --git"
85+
const parts = diff.split(/(?=^diff --git )/m);
86+
for (const part of parts) {
87+
if (!part.startsWith("diff --git ")) continue;
88+
const firstNewline = part.indexOf("\n");
89+
const header = firstNewline >= 0 ? part.slice(0, firstNewline) : part;
90+
const body = firstNewline >= 0 ? part.slice(firstNewline + 1) : "";
91+
chunks.push({ header, body });
92+
}
93+
return chunks;
94+
}
95+
96+
/**
97+
* Extract file paths and status from a diff chunk header + body.
98+
* Header: "diff --git a/old b/new"
99+
* Body may contain "rename from", "rename to", "new file mode", "deleted file mode".
100+
*/
101+
function extractFileInfo(
102+
header: string,
103+
body: string,
104+
): {
105+
path: string;
106+
oldPath?: string;
107+
status: FileDiff["status"];
108+
} {
109+
// Parse "diff --git a/X b/Y"
110+
const headerMatch = /^diff --git a\/(.+) b\/(.+)$/.exec(header);
111+
const aPath = headerMatch?.[1] ?? "";
112+
const bPath = headerMatch?.[2] ?? aPath;
113+
114+
let status: FileDiff["status"] = "modified";
115+
let oldPath: string | undefined;
116+
117+
if (/^new file mode/m.test(body)) {
118+
status = "added";
119+
} else if (/^deleted file mode/m.test(body)) {
120+
status = "deleted";
121+
} else if (/^rename from /m.test(body)) {
122+
status = "renamed";
123+
const fromMatch = /^rename from (.+)$/m.exec(body);
124+
oldPath = fromMatch?.[1];
125+
}
126+
127+
const path = status === "deleted" ? aPath : bPath;
128+
return { path, oldPath, status };
129+
}
130+
131+
/**
132+
* Truncate diff body to at most `maxLines` lines (counting only hunk content lines).
133+
* Returns { text, truncated }.
134+
*/
135+
function truncateDiffBody(body: string, maxLines: number): { text: string; truncated: boolean } {
136+
const lines = body.split("\n");
137+
if (lines.length <= maxLines) {
138+
return { text: body, truncated: false };
139+
}
140+
return { text: lines.slice(0, maxLines).join("\n"), truncated: true };
141+
}
142+
143+
/** Check whether a file path matches any of the given glob patterns. */
144+
function matchesAnyPattern(filePath: string, patterns: string[]): boolean {
145+
const normalized = filePath.replace(/\\/g, "/");
146+
for (const pattern of patterns) {
147+
if (matchesGlob(normalized, pattern)) return true;
148+
// Also match just the basename against simple patterns (e.g. "*.lock")
149+
const basename = normalized.split("/").at(-1) ?? normalized;
150+
if (matchesGlob(basename, pattern)) return true;
151+
}
152+
return false;
153+
}
154+
155+
/** Build the diff args array from the `range` parameter. */
156+
function buildDiffArgs(
157+
range: string | undefined,
158+
): { ok: true; args: string[] } | { ok: false; error: string } {
159+
if (range === undefined || range === "") {
160+
return { ok: true, args: [] };
161+
}
162+
const normalized = range.trim().toLowerCase();
163+
if (normalized === "staged" || normalized === "cached") {
164+
return { ok: true, args: ["--cached"] };
165+
}
166+
if (normalized === "head") {
167+
return { ok: true, args: ["HEAD"] };
168+
}
169+
170+
// Range like "A..B", "A...B", or a single ref
171+
// Split on ".." or "..." separators to validate each token
172+
const separatorMatch = /^(.+?)(\.{2,3})(.+)$/.exec(range.trim());
173+
if (separatorMatch) {
174+
const [, left, sep, right] = separatorMatch;
175+
if (!isSafeGitUpstreamToken(left ?? "") || !isSafeGitUpstreamToken(right ?? "")) {
176+
return { ok: false, error: `unsafe_range_token: ${range}` };
177+
}
178+
return { ok: true, args: [`${left}${sep}${right}`] };
179+
}
180+
181+
// Single ref
182+
if (!isSafeGitUpstreamToken(range.trim())) {
183+
return { ok: false, error: `unsafe_range_token: ${range}` };
184+
}
185+
return { ok: true, args: [range.trim()] };
186+
}
187+
188+
/** Human-readable label for the range. */
189+
function rangeLabel(range: string | undefined, diffArgs: string[]): string {
190+
if (!range || range === "") return "unstaged changes";
191+
if (diffArgs[0] === "--cached") return "staged changes";
192+
return range;
193+
}
194+
195+
// ---------------------------------------------------------------------------
196+
// Tool registration
197+
// ---------------------------------------------------------------------------
198+
199+
export function registerGitDiffSummaryTool(server: FastMCP): void {
200+
server.addTool({
201+
name: "git_diff_summary",
202+
description:
203+
"Structured, token-efficient diff viewer. Returns per-file diffs with additions/deletions, " +
204+
"truncated to configurable line limits, with noise files (lock files, dist, etc.) excluded by default. " +
205+
"Use `range` to target staged, HEAD, or any revision range. See docs/mcp-tools.md.",
206+
annotations: {
207+
readOnlyHint: true,
208+
},
209+
parameters: WorkspacePickSchema.extend({
210+
range: z
211+
.string()
212+
.optional()
213+
.describe(
214+
'Diff range. Examples: "staged", "HEAD~3..HEAD", "main...feature". ' +
215+
"Default: unstaged changes.",
216+
),
217+
fileFilter: z
218+
.string()
219+
.optional()
220+
.describe('Glob pattern to restrict output to matching files, e.g. "*.ts", "src/**".'),
221+
maxLinesPerFile: z
222+
.number()
223+
.int()
224+
.min(1)
225+
.max(2000)
226+
.optional()
227+
.default(50)
228+
.describe("Max diff lines to include per file. Default: 50."),
229+
maxFiles: z
230+
.number()
231+
.int()
232+
.min(1)
233+
.max(500)
234+
.optional()
235+
.default(30)
236+
.describe("Max files to include in output. Default: 30."),
237+
excludePatterns: z
238+
.array(z.string())
239+
.optional()
240+
.describe(
241+
"Glob patterns to exclude. Defaults to common noise: lock files, dist, vendor, etc.",
242+
),
243+
}),
244+
execute: async (args) => {
245+
// --- Standard prelude ---
246+
const pre = requireGitAndRoots(server, args, undefined);
247+
if (!pre.ok) return jsonRespond(pre.error);
248+
249+
const rootInput = pre.roots[0];
250+
if (!rootInput) return jsonRespond({ error: "no_workspace_root" });
251+
252+
const gitTop = gitTopLevel(rootInput);
253+
if (!gitTop) {
254+
return jsonRespond({ error: "not_a_git_repository", path: rootInput });
255+
}
256+
257+
// --- Build git diff args ---
258+
const diffArgsResult = buildDiffArgs(args.range);
259+
if (!diffArgsResult.ok) {
260+
return jsonRespond({ error: diffArgsResult.error });
261+
}
262+
const diffArgs = diffArgsResult.args;
263+
264+
// --- Run git diff --stat ---
265+
const statResult = await spawnGitAsync(gitTop, ["diff", "--stat", ...diffArgs]);
266+
if (!statResult.ok) {
267+
return jsonRespond({
268+
error: "git_diff_failed",
269+
detail: (statResult.stderr || statResult.stdout).trim(),
270+
});
271+
}
272+
const statMap = parseStatOutput(statResult.stdout);
273+
274+
// --- Run git diff ---
275+
const diffResult = await spawnGitAsync(gitTop, ["diff", ...diffArgs]);
276+
if (!diffResult.ok) {
277+
return jsonRespond({
278+
error: "git_diff_failed",
279+
detail: (diffResult.stderr || diffResult.stdout).trim(),
280+
});
281+
}
282+
283+
// --- Parse diff chunks ---
284+
const chunks = parseDiffOutput(diffResult.stdout);
285+
const totalFiles = chunks.length;
286+
287+
// --- Apply excludePatterns and fileFilter ---
288+
const excludePatterns =
289+
args.excludePatterns !== undefined ? args.excludePatterns : DEFAULT_EXCLUDE_PATTERNS;
290+
const excludedFiles: string[] = [];
291+
const includedChunks: typeof chunks = [];
292+
293+
for (const chunk of chunks) {
294+
const { path: filePath } = extractFileInfo(chunk.header, chunk.body);
295+
if (matchesAnyPattern(filePath, excludePatterns)) {
296+
excludedFiles.push(filePath);
297+
continue;
298+
}
299+
if (args.fileFilter && !matchesAnyPattern(filePath, [args.fileFilter])) {
300+
continue;
301+
}
302+
includedChunks.push(chunk);
303+
}
304+
305+
// --- Truncate to maxFiles ---
306+
const maxFiles = args.maxFiles ?? 30;
307+
const maxLinesPerFile = args.maxLinesPerFile ?? 50;
308+
const truncatedFileCount =
309+
includedChunks.length > maxFiles ? includedChunks.length - maxFiles : 0;
310+
const processedChunks = includedChunks.slice(0, maxFiles);
311+
312+
// --- Build FileDiff entries ---
313+
let totalAdditions = 0;
314+
let totalDeletions = 0;
315+
const files: FileDiff[] = [];
316+
317+
for (const chunk of processedChunks) {
318+
const { path: filePath, oldPath, status } = extractFileInfo(chunk.header, chunk.body);
319+
const stat = statMap.get(filePath) ?? { additions: 0, deletions: 0 };
320+
totalAdditions += stat.additions;
321+
totalDeletions += stat.deletions;
322+
323+
const { text: diffText, truncated } = truncateDiffBody(chunk.body, maxLinesPerFile);
324+
files.push({
325+
path: filePath,
326+
status,
327+
additions: stat.additions,
328+
deletions: stat.deletions,
329+
...spreadDefined("oldPath", oldPath),
330+
truncated,
331+
diff: diffText,
332+
});
333+
}
334+
335+
const rangeStr = rangeLabel(args.range, diffArgs);
336+
const summary: DiffSummary = {
337+
range: rangeStr,
338+
totalFiles,
339+
totalAdditions,
340+
totalDeletions,
341+
files,
342+
...spreadWhen(truncatedFileCount > 0, { truncatedFiles: truncatedFileCount }),
343+
...spreadWhen(excludedFiles.length > 0, { excludedFiles }),
344+
};
345+
346+
// --- Format output ---
347+
if (args.format === "json") {
348+
return jsonRespond(summary as unknown as Record<string, unknown>);
349+
}
350+
351+
// --- Markdown output ---
352+
const lines: string[] = [];
353+
lines.push(`# Diff: ${rangeStr}`, "");
354+
355+
// Summary line
356+
const fileWord = totalFiles === 1 ? "file" : "files";
357+
let summaryLine = `**${totalFiles} ${fileWord} changed** (+${totalAdditions} \u2212${totalDeletions})`;
358+
if (excludedFiles.length > 0) {
359+
const excWord = excludedFiles.length === 1 ? "file" : "files";
360+
summaryLine += `, ${excludedFiles.length} ${excWord} excluded (${excludedFiles.join(", ")})`;
361+
}
362+
if (truncatedFileCount > 0) {
363+
summaryLine += `, ${truncatedFileCount} more file(s) omitted (maxFiles=${maxFiles})`;
364+
}
365+
lines.push(summaryLine, "");
366+
367+
for (const file of files) {
368+
// Section header
369+
const statusTag = file.status !== "modified" ? `, ${file.status}` : "";
370+
const renameTag = file.oldPath ? ` (from ${file.oldPath})` : "";
371+
lines.push(
372+
`## ${file.path}${renameTag} (+${file.additions} \u2212${file.deletions}${statusTag})`,
373+
);
374+
375+
if (file.diff) {
376+
lines.push("```diff", file.diff.trimEnd(), "```");
377+
} else {
378+
lines.push("_(no diff content)_");
379+
}
380+
if (file.truncated) {
381+
lines.push(`_(diff truncated at ${maxLinesPerFile} lines)_`);
382+
}
383+
lines.push("");
384+
}
385+
386+
return lines.join("\n");
387+
},
388+
});
389+
}

0 commit comments

Comments
 (0)