Skip to content

Commit 7734787

Browse files
authored
fix(diff): skip binary file contents in reviews (#187)
1 parent 0610021 commit 7734787

7 files changed

Lines changed: 232 additions & 8 deletions

File tree

src/core/binary.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import type { FileDiffMetadata } from "@pierre/diffs";
2+
import fs from "node:fs";
3+
4+
const BINARY_SNIFF_BYTES = 8_000;
5+
const BINARY_CONTROL_BYTE_RATIO = 0.3;
6+
7+
/** Return whether one diff patch explicitly marks the file contents as binary. */
8+
export function patchLooksBinary(patch: string) {
9+
return (
10+
/(^|\n)Binary files .* differ(?:\n|$)/.test(patch) || patch.includes("\nGIT binary patch\n")
11+
);
12+
}
13+
14+
/** Build placeholder metadata for one skipped binary file without inventing fake hunks. */
15+
export function createSkippedBinaryMetadata(
16+
name: string,
17+
type: FileDiffMetadata["type"] = "change",
18+
): FileDiffMetadata {
19+
return {
20+
name,
21+
type,
22+
hunks: [],
23+
splitLineCount: 0,
24+
unifiedLineCount: 0,
25+
isPartial: true,
26+
additionLines: [],
27+
deletionLines: [],
28+
cacheKey: `${name}:binary-skipped`,
29+
};
30+
}
31+
32+
/** Read only a small prefix from disk so binary detection never loads the whole file. */
33+
function readFilePrefix(path: string) {
34+
let fd: number | undefined;
35+
36+
try {
37+
fd = fs.openSync(path, "r");
38+
const buffer = Buffer.alloc(BINARY_SNIFF_BYTES);
39+
const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
40+
return buffer.subarray(0, bytesRead);
41+
} finally {
42+
if (fd !== undefined) {
43+
fs.closeSync(fd);
44+
}
45+
}
46+
}
47+
48+
/** Return whether one byte is a strong binary signal instead of normal text content. */
49+
function isBinarySignalByte(byte: number) {
50+
return byte < 0x07 || (byte > 0x0d && byte < 0x20) || byte === 0x7f;
51+
}
52+
53+
/** Detect likely binary files from a small prefix using Git-style control-byte heuristics. */
54+
export function isProbablyBinaryFile(path: string) {
55+
let prefix: Uint8Array;
56+
57+
try {
58+
prefix = readFilePrefix(path);
59+
} catch {
60+
return false;
61+
}
62+
63+
if (prefix.length === 0) {
64+
return false;
65+
}
66+
67+
let binarySignalBytes = 0;
68+
69+
for (const byte of prefix) {
70+
if (byte === 0) {
71+
return true;
72+
}
73+
74+
if (isBinarySignalByte(byte)) {
75+
binarySignalBytes += 1;
76+
}
77+
}
78+
79+
return binarySignalBytes / prefix.length >= BINARY_CONTROL_BYTE_RATIO;
80+
}

src/core/loaders.test.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,54 @@ describe("loadAppBootstrap", () => {
149149
expect(bootstrap.changeset.files[0]?.agent?.annotations).toHaveLength(1);
150150
});
151151

152+
test("skips binary file-pair diffs instead of reading their contents", async () => {
153+
const dir = mkdtempSync(join(tmpdir(), "hunk-binary-diff-"));
154+
tempDirs.push(dir);
155+
156+
const left = join(dir, "before.png");
157+
const right = join(dir, "after.png");
158+
159+
writeFileSync(left, Buffer.from([0, 1, 2, 3, 4, 5]));
160+
writeFileSync(right, Buffer.from([0, 1, 2, 9, 8, 7]));
161+
162+
const bootstrap = await loadAppBootstrap({
163+
kind: "diff",
164+
left,
165+
right,
166+
options: {
167+
mode: "auto",
168+
},
169+
});
170+
171+
expect(bootstrap.changeset.files).toHaveLength(1);
172+
expect(bootstrap.changeset.files[0]?.path).toBe("after.png");
173+
expect(bootstrap.changeset.files[0]?.previousPath).toBe("before.png");
174+
expect(bootstrap.changeset.files[0]?.isBinary).toBe(true);
175+
expect(bootstrap.changeset.files[0]?.metadata.hunks).toHaveLength(0);
176+
});
177+
178+
test("marks git binary diffs as skipped binary content", async () => {
179+
const dir = createTempRepo("hunk-git-binary-");
180+
const file = join(dir, "image.png");
181+
182+
writeFileSync(file, Buffer.from([0, 1, 2, 3, 4]));
183+
git(dir, "add", "image.png");
184+
git(dir, "commit", "-m", "initial");
185+
186+
writeFileSync(file, Buffer.from([0, 1, 9, 3, 4, 5]));
187+
188+
const bootstrap = await loadFromRepo(dir, {
189+
kind: "git",
190+
staged: false,
191+
options: { mode: "auto" },
192+
});
193+
194+
expect(bootstrap.changeset.files).toHaveLength(1);
195+
expect(bootstrap.changeset.files[0]?.path).toBe("image.png");
196+
expect(bootstrap.changeset.files[0]?.isBinary).toBe(true);
197+
expect(bootstrap.changeset.files[0]?.metadata.hunks).toHaveLength(0);
198+
});
199+
152200
test("loads git working tree changes from a temporary repo", async () => {
153201
const dir = createTempRepo("hunk-git-");
154202

src/core/loaders.ts

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
import { createTwoFilesPatch } from "diff";
99
import { resolve as resolvePath } from "node:path";
1010
import { findAgentFileContext, loadAgentContext } from "./agent";
11+
import { createSkippedBinaryMetadata, isProbablyBinaryFile, patchLooksBinary } from "./binary";
1112
import {
1213
buildGitDiffArgs,
1314
buildGitShowArgs,
@@ -132,15 +133,20 @@ function findPatchChunk(metadata: FileDiffMetadata, chunks: string[], index: num
132133
);
133134
}
134135

136+
interface BuildDiffFileOptions {
137+
isUntracked?: boolean;
138+
previousPath?: string;
139+
isBinary?: boolean;
140+
}
141+
135142
/** Build the normalized per-file model used by the UI regardless of input mode. */
136143
function buildDiffFile(
137144
metadata: FileDiffMetadata,
138145
patch: string,
139146
index: number,
140147
sourcePrefix: string,
141148
agentContext: AgentContext | null,
142-
isUntracked?: boolean,
143-
previousPath?: string,
149+
{ isUntracked, previousPath, isBinary }: BuildDiffFileOptions = {},
144150
): DiffFile {
145151
return {
146152
id: `${sourcePrefix}:${index}:${metadata.name}`,
@@ -152,6 +158,7 @@ function buildDiffFile(
152158
metadata,
153159
agent: findAgentFileContext(agentContext, metadata.name, metadata.prevName),
154160
isUntracked,
161+
isBinary: isBinary ?? patchLooksBinary(patch),
155162
};
156163
}
157164

@@ -237,7 +244,9 @@ function buildUntrackedDiffFile(
237244
index,
238245
sourcePrefix,
239246
agentContext,
240-
true, // isUntracked
247+
{
248+
isUntracked: true,
249+
},
241250
);
242251
}
243252

@@ -326,6 +335,52 @@ function normalizePatchChangeset(
326335
};
327336
}
328337

338+
/** Return the change type to show when direct file comparison skips binary contents. */
339+
function resolveBinaryComparisonType(
340+
leftPath: string,
341+
rightPath: string,
342+
): FileDiffMetadata["type"] {
343+
if (leftPath === "/dev/null") {
344+
return "new";
345+
}
346+
347+
if (rightPath === "/dev/null") {
348+
return "deleted";
349+
}
350+
351+
return "change";
352+
}
353+
354+
/** Build a placeholder changeset for direct file comparisons that include binary content. */
355+
function buildBinaryFileDiffChangeset(
356+
input: FileCommandInput | DiffToolCommandInput,
357+
displayPath: string,
358+
title: string,
359+
leftPath: string,
360+
rightPath: string,
361+
agentContext: AgentContext | null,
362+
) {
363+
return {
364+
id: `pair:${displayPath}`,
365+
sourceLabel: input.kind === "difftool" ? "git difftool" : "file compare",
366+
title,
367+
agentSummary: agentContext?.summary,
368+
files: [
369+
buildDiffFile(
370+
createSkippedBinaryMetadata(displayPath, resolveBinaryComparisonType(leftPath, rightPath)),
371+
`Binary file skipped: ${basename(input.left)}${basename(input.right)}\n`,
372+
0,
373+
displayPath,
374+
agentContext,
375+
{
376+
previousPath: basename(input.left),
377+
isBinary: true,
378+
},
379+
),
380+
],
381+
} satisfies Changeset;
382+
}
383+
329384
/** Build a changeset by diffing two concrete files on disk. */
330385
async function loadFileDiffChangeset(
331386
input: FileCommandInput | DiffToolCommandInput,
@@ -334,8 +389,6 @@ async function loadFileDiffChangeset(
334389
) {
335390
const leftPath = resolvePath(cwd, input.left);
336391
const rightPath = resolvePath(cwd, input.right);
337-
const leftText = await Bun.file(leftPath).text();
338-
const rightText = await Bun.file(rightPath).text();
339392
const displayPath =
340393
input.kind === "difftool" ? (input.path ?? basename(input.right)) : basename(input.right);
341394
const title =
@@ -345,6 +398,19 @@ async function loadFileDiffChangeset(
345398
? displayPath
346399
: `${basename(input.left)}${basename(input.right)}`;
347400

401+
if (isProbablyBinaryFile(leftPath) || isProbablyBinaryFile(rightPath)) {
402+
return buildBinaryFileDiffChangeset(
403+
input,
404+
displayPath,
405+
title,
406+
leftPath,
407+
rightPath,
408+
agentContext,
409+
);
410+
}
411+
412+
const leftText = await Bun.file(leftPath).text();
413+
const rightText = await Bun.file(rightPath).text();
348414
const oldFile: FileContents = {
349415
name: displayPath,
350416
contents: leftText,
@@ -367,7 +433,9 @@ async function loadFileDiffChangeset(
367433
title,
368434
agentSummary: agentContext?.summary,
369435
files: [
370-
buildDiffFile(metadata, patch, 0, displayPath, agentContext, undefined, basename(input.left)),
436+
buildDiffFile(metadata, patch, 0, displayPath, agentContext, {
437+
previousPath: basename(input.left),
438+
}),
371439
],
372440
} satisfies Changeset;
373441
}

src/core/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ export interface DiffFile {
4040
metadata: FileDiffMetadata;
4141
agent: AgentFileContext | null;
4242
isUntracked?: boolean;
43+
isBinary?: boolean;
4344
}
4445

4546
export interface Changeset {

src/opentui/HunkDiffView.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { useMemo } from "react";
2+
import { patchLooksBinary } from "../core/binary";
23
import type { DiffFile } from "../core/types";
34
import { findMaxLineNumber } from "../ui/diff/codeColumns";
45
import { buildSplitRows, buildStackRows } from "../ui/diff/pierre";
@@ -26,12 +27,15 @@ function countDiffStats(metadata: HunkDiffFile["metadata"]) {
2627

2728
/** Adapt the public diff shape into Hunk's internal file model without exposing app-only fields. */
2829
function toInternalDiffFile(diff: HunkDiffFile): DiffFile {
30+
const patch = diff.patch ?? "";
31+
2932
return {
3033
agent: null,
3134
id: diff.id,
35+
isBinary: patchLooksBinary(patch),
3236
language: diff.language,
3337
metadata: diff.metadata,
34-
patch: diff.patch ?? "",
38+
patch,
3539
path: diff.path ?? diff.metadata.name,
3640
previousPath: diff.metadata.prevName,
3741
stats: countDiffStats(diff.metadata),

src/ui/components/ui-components.test.tsx

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ function createWrapBootstrap(): AppBootstrap {
300300
});
301301
}
302302

303-
function createEmptyDiffFile(type: "rename-pure" | "new" | "deleted"): DiffFile {
303+
function createEmptyDiffFile(type: "change" | "rename-pure" | "new" | "deleted"): DiffFile {
304304
return {
305305
id: `empty:${type}`,
306306
path: `${type}.ts`,
@@ -1811,6 +1811,25 @@ describe("UI components", () => {
18111811
6,
18121812
);
18131813
expect(deletedFileFrame).toContain("The file is marked as deleted.");
1814+
1815+
const binaryFileFrame = await captureFrame(
1816+
<PierreDiffView
1817+
file={{
1818+
...createEmptyDiffFile("change"),
1819+
id: "empty:binary",
1820+
isBinary: true,
1821+
path: "image.png",
1822+
}}
1823+
layout="split"
1824+
theme={theme}
1825+
width={72}
1826+
selectedHunkIndex={0}
1827+
scrollable={false}
1828+
/>,
1829+
76,
1830+
6,
1831+
);
1832+
expect(binaryFileFrame).toContain("Binary file skipped");
18141833
});
18151834

18161835
test("PierreDiffView reuses highlighted rows after unmounting and remounting a file section", async () => {

src/ui/diff/renderRows.tsx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,10 @@ export function diffMessage(file: DiffFile) {
555555
return "No textual hunks. This change only renames the file.";
556556
}
557557

558+
if (file.isBinary) {
559+
return "Binary file skipped";
560+
}
561+
558562
if (file.metadata.type === "new") {
559563
return "No textual hunks. The file is marked as new.";
560564
}

0 commit comments

Comments
 (0)