Skip to content

Commit 65c051a

Browse files
committed
fix: binary file handling
1 parent d73a4ab commit 65c051a

9 files changed

Lines changed: 167 additions & 49 deletions

File tree

src/electron/workers/nodeGitWorker.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,19 @@ const blobCache = new LRUCache<string, { binary: boolean; text: string | null }>
1717
let blobCacheHits = 0
1818
const gitCache: Record<string, any> = Object.create(null)
1919
const WORKDIR = '__WORKDIR__'
20+
const BINARY_EXTS = [
21+
'.png','.jpg','.jpeg','.gif','.bmp','.webp','.ico',
22+
'.pdf','.zip','.rar','.7z','.tar','.gz','.tgz',
23+
'.mp3','.wav','.flac',
24+
'.mp4','.mov','.avi','.mkv','.webm',
25+
'.exe','.dll','.bin','.dmg','.pkg','.iso',
26+
'.woff','.woff2','.ttf','.otf',
27+
'.svg'
28+
]
29+
function isBinaryPathLocal(p: string): boolean {
30+
const lower = p.toLowerCase()
31+
return BINARY_EXTS.some(ext => lower.endsWith(ext))
32+
}
2033

2134
// Helper function to parse packed-refs
2235
async function parsePackedRefs(repoPath: string): Promise<string[]> {
@@ -127,6 +140,20 @@ parentPort?.on('message', async (m: Msg) => {
127140
return
128141
}
129142
case 'readFile': {
143+
// Fast path: known-binary extension => no content read
144+
if (isBinaryPathLocal(m.filepath)) {
145+
if (m.ref !== WORKDIR) {
146+
ok(m.id, { binary: true, text: null, notFound: false }); return
147+
}
148+
// WORKDIR existence without reading file
149+
const fileAbs = path.join(repoPath, m.filepath)
150+
const exists = await fs.promises
151+
.stat(fileAbs)
152+
.then(() => true)
153+
.catch(() => false)
154+
ok(m.id, { binary: exists, text: null, notFound: !exists })
155+
return
156+
}
130157
if (m.ref !== WORKDIR) {
131158
const commitOid = await git.resolveRef({ fs, dir: repoPath, ref: m.ref }).catch(() => null as any)
132159
if (!commitOid) { ok(m.id, { binary: false, text: null, notFound: true }); return }

src/web/src/App.tsx

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import { buildUnifiedDiffForStatus } from './utils/diff'
2121
import { countTokens } from './utils/tokenizer'
2222
// Globally shared token counts
2323
import { TokenCountsProvider, useTokenCountsContext } from './context/TokenCountsContext'
24+
import { isBinaryPath } from './utils/binary'
2425
import { logError } from './utils/logger'
2526
import { debounce } from './utils/debounce'
2627

@@ -580,6 +581,11 @@ function App() {
580581

581582
async function previewFile(path: string, status: FileDiffStatus): Promise<void> {
582583
if (!gitClient) return
584+
// Hard guard: no previews for binary files (prevents heavy reads)
585+
if (isBinaryPath(path)) {
586+
setNotif('Binary file preview is not supported.')
587+
return
588+
}
583589
try {
584590
const toFetchBase = status !== 'add'
585591
const toFetchCompare = status !== 'remove'
@@ -589,6 +595,13 @@ function App() {
589595
toFetchCompare && compareBranch ? gitClient.readFile(compareBranch, path) : Promise.resolve(undefined),
590596
])
591597

598+
// If worker reports binary, bail out as well
599+
const baseBin = (baseRes as any)?.binary
600+
const compareBin = (compareRes as any)?.binary
601+
if (baseBin || compareBin) {
602+
setNotif('Binary file preview is not supported.')
603+
return
604+
}
592605
setPreviewPath(path)
593606
setPreviewStatus(status)
594607
setPreviewData({
@@ -654,19 +667,27 @@ function App() {
654667
// File sections
655668
const fileSections: string[] = []
656669
const includeBinaryNow = (includeBinaryCheckboxRef.current?.checked ?? includeBinaryAsPathsRef.current)
657-
const pathsToProcess = includeBinaryNow ? selected : selected.filter((p) => !isLikelyBinaryPath(p))
670+
const pathsToProcess = includeBinaryNow ? selected : selected.filter((p) => !isBinaryPath(p))
658671
const fileReadPromises = pathsToProcess.map((path) => {
659672
const status = statusByPath.get(path) ?? 'unchanged'
660673
const needBase = status !== 'add'
661674
const needCompare = status !== 'remove'
675+
// Avoid heavy reads for binary paths — we only emit a header line
676+
if (isBinaryPath(path)) {
677+
return Promise.resolve({
678+
path, status,
679+
baseRes: { binary: true, text: null },
680+
compareRes: { binary: true, text: null },
681+
})
682+
}
662683
return Promise.all([
663684
needBase ? gitClient.readFile(baseBranch, path) : Promise.resolve(undefined),
664685
needCompare ? gitClient.readFile(compareBranch, path) : Promise.resolve(undefined),
665686
]).then(([baseRes, compareRes]) => ({ path, status, baseRes, compareRes }))
666687
})
667688
const fileContents = await Promise.all(fileReadPromises)
668689
for (const { path, status, baseRes, compareRes } of fileContents) {
669-
const isBinary = (baseRes as { binary?: boolean } | undefined)?.binary || (compareRes as { binary?: boolean } | undefined)?.binary || isLikelyBinaryPath(path)
690+
const isBinary = (baseRes as { binary?: boolean } | undefined)?.binary || (compareRes as { binary?: boolean } | undefined)?.binary || isBinaryPath(path)
670691
const header = `## FILE: ${path} (${status.toUpperCase()})\n\n`
671692
if (isBinary) {
672693
// When we filtered out likely-binary paths earlier and still hit binary here (e.g. unknown ext),
@@ -741,17 +762,7 @@ function App() {
741762
if (lower.endsWith('.html') || lower.endsWith('.htm')) return 'html'
742763
return ''
743764
}
744-
function isLikelyBinaryPath(p: string): boolean {
745-
const lower = p.toLowerCase()
746-
const binaryExts = [
747-
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp', '.ico',
748-
'.pdf', '.zip', '.rar', '.7z', '.tar', '.gz', '.tgz',
749-
'.mp3', '.wav', '.flac', '.mp4', '.mov', '.avi', '.mkv', '.webm',
750-
'.exe', '.dll', '.bin', '.dmg', '.pkg', '.iso',
751-
'.woff', '.woff2', '.ttf', '.otf'
752-
]
753-
return binaryExts.some((ext) => lower.endsWith(ext))
754-
}
765+
// binary detection now centralized in utils/binary.ts (isBinaryPath)
755766

756767
// Small helper: use context to feed TokenUsage without prop-drilling
757768
function TokenUsageWithContext({
@@ -847,6 +858,7 @@ function App() {
847858
selectedPaths={selectedPaths}
848859
statusByPath={statusByPath}
849860
diffContextLines={diffContextLines}
861+
includeBinaryPaths={includeBinaryAsPaths}
850862
>
851863
<div className={`app-container${!projectLoaded ? ' landing-full' : ''}`} id="gc-app">
852864
<header className="header">
@@ -1135,7 +1147,7 @@ function App() {
11351147
const curr = Array.from(selectedPathsRef.current)
11361148
const removed: string[] = []
11371149
for (const p of curr) {
1138-
if (isLikelyBinaryPath(p)) {
1150+
if (isBinaryPath(p)) {
11391151
removed.push(p)
11401152
toggleSelect(p)
11411153
}

src/web/src/components/FileTreeView.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,9 @@ export function FileTreeView({
151151
type="button"
152152
onClick={() => onPreviewFile(node.path, st)}
153153
className="btn btn-ghost btn-icon ml-auto"
154-
title="Preview"
154+
title={node.isLikelyBinary ? 'Preview disabled for binary files' : 'Preview'}
155155
aria-label="Preview"
156+
disabled={node.isLikelyBinary}
156157
>
157158
<Search size={14} />
158159
</button>

src/web/src/components/SelectedFilesPanel.tsx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { useMemo, useState } from 'react'
22
import { ArrowUpDown, Search, X, FilePenLine, FilePlus2, FileMinus2, File as FileIcon, FileArchive } from 'lucide-react'
33
import { useTokenCountsContext } from '../context/TokenCountsContext'
44
import type { FileDiffStatus } from '../hooks/useFileTree'
5+
import { isBinaryPath } from '../utils/binary'
56

67
type SelectedEntry = {
78
path: string
@@ -33,9 +34,7 @@ export function SelectedFilesPanel({ selectedPaths, statusByPath, onUnselect, on
3334
const st = statusByPath.get(path) ?? 'unchanged'
3435
const tokens = counts.get(path) ?? 0
3536
const name = path.includes('/') ? path.slice(path.lastIndexOf('/') + 1) : path
36-
const lower = path.toLowerCase()
37-
const exts = ['.png','.jpg','.jpeg','.gif','.webp','.svg','.ico','.pdf','.zip','.gz','.tgz','.rar','.7z','.mp4','.mp3','.wav','.mov','.avi','.mkv','.woff','.woff2','.ttf']
38-
const isLikelyBinary = exts.some((e) => lower.endsWith(e))
37+
const isLikelyBinary = isBinaryPath(path)
3938
entries.push({ path, name, status: st, tokens, isLikelyBinary })
4039
}
4140
const q = (filterText || '').trim().toLowerCase()
@@ -123,9 +122,10 @@ export function SelectedFilesPanel({ selectedPaths, statusByPath, onUnselect, on
123122
<button
124123
type="button"
125124
onClick={() => onPreview(it.path, it.status)}
126-
title="Preview"
125+
title={it.isLikelyBinary ? 'Preview disabled for binary files' : 'Preview'}
127126
aria-label="Preview"
128127
className="btn btn-ghost btn-icon"
128+
disabled={it.isLikelyBinary}
129129
>
130130
<Search size={14} />
131131
</button>

src/web/src/context/TokenCountsContext.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ type ProviderProps = {
2020
selectedPaths: Set<string>
2121
statusByPath: Map<string, FileDiffStatus>
2222
diffContextLines: number
23+
includeBinaryPaths?: boolean
2324
children: React.ReactNode
2425
}
2526

@@ -30,6 +31,7 @@ export function TokenCountsProvider({
3031
selectedPaths,
3132
statusByPath,
3233
diffContextLines,
34+
includeBinaryPaths = true,
3335
children,
3436
}: ProviderProps) {
3537
const [progress, setProgress] = useState<ProgressState>({ completed: 0, total: 0, percent: 0 })
@@ -41,6 +43,7 @@ export function TokenCountsProvider({
4143
selectedPaths,
4244
statusByPath,
4345
diffContextLines,
46+
includeBinaryPaths,
4447
onBatch: (done, totalFiles) => {
4548
const pct =
4649
totalFiles <= 0

src/web/src/hooks/useFileTree.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { useCallback, useState } from 'react'
22
import type { GitWorkerClient } from '../utils/gitWorkerClient'
33
import type { AppStatus } from '../types/appStatus'
4+
import { isBinaryPath } from '../utils/binary'
45

56
export type FileDiffStatus = 'modify' | 'add' | 'remove' | 'unchanged'
67

@@ -39,12 +40,7 @@ export function useFileTree(setAppStatus?: (s: AppStatus) => void) {
3940
return node
4041
}
4142

42-
const likelyBinary = (p: string): boolean => {
43-
const lower = p.toLowerCase()
44-
// Heuristic similar to App.tsx; keep in sync
45-
const exts = ['.png','.jpg','.jpeg','.gif','.webp','.svg','.ico','.pdf','.zip','.gz','.tgz','.rar','.7z','.mp4','.mp3','.wav','.mov','.avi','.mkv','.woff','.woff2','.ttf']
46-
return exts.some((e) => lower.endsWith(e))
47-
}
43+
const likelyBinary = (p: string): boolean => isBinaryPath(p)
4844

4945
for (const fullPath of allPaths) {
5046
const parts = fullPath.split('/')

src/web/src/hooks/useTokenCounts.ts

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import type { GitEngine, TokenizerEngine } from '../platform/types'
33
import { createTokenizer } from '../platform/tokenizerFactory'
44
import type { FileDiffStatus } from './useFileTree'
55
import { buildUnifiedDiffForStatus } from '../utils/diff'
6+
import { isBinaryPath } from '../utils/binary'
67

78
export type TokenCounts = Map<string, number>
89

@@ -13,11 +14,22 @@ type Args = {
1314
selectedPaths: Set<string>
1415
statusByPath: Map<string, FileDiffStatus>
1516
diffContextLines: number
17+
includeBinaryPaths?: boolean
1618
tokenizer?: TokenizerEngine
1719
onBatch?: (completed: number, total: number) => void
1820
}
1921

20-
export function useTokenCounts({ gitClient, baseRef, compareRef, selectedPaths, statusByPath, diffContextLines, tokenizer, onBatch }: Args) {
22+
export function useTokenCounts({
23+
gitClient,
24+
baseRef,
25+
compareRef,
26+
selectedPaths,
27+
statusByPath,
28+
diffContextLines,
29+
includeBinaryPaths = true,
30+
tokenizer,
31+
onBatch,
32+
}: Args) {
2133
const [counts, setCounts] = useState<TokenCounts>(new Map())
2234
const [busy, setBusy] = useState(false)
2335
const tok: TokenizerEngine = useMemo(() => tokenizer ?? createTokenizer(), [tokenizer])
@@ -49,31 +61,51 @@ export function useTokenCounts({ gitClient, baseRef, compareRef, selectedPaths,
4961
const batch = selectedList.slice(i, i + BATCH_SIZE)
5062
await Promise.all(
5163
batch.map(async (path) => {
52-
const status = statusByPath.get(path) ?? 'unchanged'
53-
const needBase = status !== 'add'
54-
const needCompare = status !== 'remove'
55-
const [baseRes, compareRes] = await Promise.all([
56-
needBase && baseRef ? gitClient.readFile(baseRef, path) : Promise.resolve(undefined as any),
57-
needCompare && compareRef ? gitClient.readFile(compareRef, path) : Promise.resolve(undefined as any),
58-
])
59-
// Mirror final output generation logic
60-
const MAX_CONTEXT = 999
61-
const ctx = diffContextLines >= MAX_CONTEXT ? Number.MAX_SAFE_INTEGER : diffContextLines
62-
let textForCount = ''
63-
if (status === 'modify' || status === 'add' || status === 'remove') {
64-
const isBinary = Boolean((baseRes as any)?.binary) || Boolean((compareRes as any)?.binary)
65-
if (isBinary) {
66-
textForCount = ''
67-
} else if (status === 'add' && ctx === Number.MAX_SAFE_INTEGER) {
68-
textForCount = (compareRes as { text?: string } | undefined)?.text ?? ''
64+
const status = statusByPath.get(path) ?? 'unchanged'
65+
const looksBinary = isBinaryPath(path)
66+
let textForCount = ''
67+
68+
// Fast path: known-binary files never load content
69+
if (looksBinary) {
70+
if (includeBinaryPaths) {
71+
// Mirror the exact header we output during copy
72+
const header = `## FILE: ${path} (${(status || 'unchanged').toUpperCase()})\n\n`
73+
textForCount = header
74+
} else {
75+
textForCount = ''
76+
}
6977
} else {
70-
textForCount = buildUnifiedDiffForStatus(status, path, baseRes as any, compareRes as any, { context: ctx }) || ''
78+
// Textual path -> maybe fetch content/diff
79+
const needBase = status !== 'add'
80+
const needCompare = status !== 'remove'
81+
const [baseRes, compareRes] = await Promise.all([
82+
needBase && baseRef ? gitClient.readFile(baseRef, path) : Promise.resolve(undefined as any),
83+
needCompare && compareRef ? gitClient.readFile(compareRef, path) : Promise.resolve(undefined as any),
84+
])
85+
// Mirror final output generation logic
86+
const MAX_CONTEXT = 999
87+
const ctx = diffContextLines >= MAX_CONTEXT ? Number.MAX_SAFE_INTEGER : diffContextLines
88+
if (status === 'modify' || status === 'add' || status === 'remove') {
89+
const isBinary = Boolean((baseRes as any)?.binary) || Boolean((compareRes as any)?.binary)
90+
if (isBinary) {
91+
// Edge: unknown ext but worker says binary; treat same as looksBinary
92+
if (includeBinaryPaths) {
93+
const header = `## FILE: ${path} (${(status || 'unchanged').toUpperCase()})\n\n`
94+
textForCount = header
95+
} else {
96+
textForCount = ''
97+
}
98+
} else if (status === 'add' && ctx === Number.MAX_SAFE_INTEGER) {
99+
textForCount = (compareRes as { text?: string } | undefined)?.text ?? ''
100+
} else {
101+
textForCount = buildUnifiedDiffForStatus(status, path, baseRes as any, compareRes as any, { context: ctx }) || ''
102+
}
103+
} else {
104+
const isBinary = Boolean((baseRes as any)?.binary)
105+
const oldText = isBinary || (baseRes as any)?.notFound ? '' : (baseRes as any)?.text ?? ''
106+
textForCount = oldText
107+
}
71108
}
72-
} else {
73-
const isBinary = Boolean((baseRes as any)?.binary)
74-
const oldText = isBinary || (baseRes as any)?.notFound ? '' : (baseRes as any)?.text ?? ''
75-
textForCount = oldText
76-
}
77109
const n = textForCount ? await tok.count(textForCount) : 0
78110
next.set(path, n)
79111
}),

src/web/src/utils/binary.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Centralized binary-file heuristics to keep UI, workers, and counters in sync.
2+
// Note: We treat SVG as binary here for safety/perf (often very large).
3+
const BINARY_EXTS = [
4+
'.png','.jpg','.jpeg','.gif','.bmp','.webp','.ico',
5+
'.pdf','.zip','.rar','.7z','.tar','.gz','.tgz',
6+
'.mp3','.wav','.flac',
7+
'.mp4','.mov','.avi','.mkv','.webm',
8+
'.exe','.dll','.bin','.dmg','.pkg','.iso',
9+
'.woff','.woff2','.ttf','.otf',
10+
'.svg'
11+
]
12+
13+
export function isBinaryPath(p: string): boolean {
14+
const lower = p.toLowerCase()
15+
return BINARY_EXTS.some(ext => lower.endsWith(ext))
16+
}
17+
18+
export { BINARY_EXTS }

src/web/src/workers/gitWorker.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,20 @@ import LightningFS from '@isomorphic-git/lightning-fs'
1616
import * as BufferModule from 'buffer'
1717
import ProcessModule from 'process'
1818
import * as GIT from 'isomorphic-git'
19+
// Lightweight duplicate; workers can't import app utils directly. Keep in sync with utils/binary.ts.
20+
const BINARY_EXTS = [
21+
'.png','.jpg','.jpeg','.gif','.bmp','.webp','.ico',
22+
'.pdf','.zip','.rar','.7z','.tar','.gz','.tgz',
23+
'.mp3','.wav','.flac',
24+
'.mp4','.mov','.avi','.mkv','.webm',
25+
'.exe','.dll','.bin','.dmg','.pkg','.iso',
26+
'.woff','.woff2','.ttf','.otf',
27+
'.svg'
28+
]
29+
function isBinaryPathLocal(p: string): boolean {
30+
const lower = p.toLowerCase()
31+
return BINARY_EXTS.some((ext) => lower.endsWith(ext))
32+
}
1933

2034
;(self as any).Buffer = (self as any).Buffer || (BufferModule as any).Buffer
2135
;(self as any).process = (self as any).process || (ProcessModule as any)
@@ -449,6 +463,21 @@ async function handleReadFile(
449463
): Promise<ResOk> {
450464
if (!pfs) throw new Error('Repository is not initialized in worker')
451465

466+
// Skip heavy reads for known-binary extensions
467+
if (isBinaryPathLocal(filepath)) {
468+
if (ref === WORKDIR_SENTINEL) {
469+
// Best-effort existence check without loading file contents
470+
try {
471+
await pfs.stat('/' + filepath)
472+
return { id, type: 'ok', data: { binary: true, text: null, notFound: false } }
473+
} catch {
474+
return { id, type: 'ok', data: { binary: false, text: null, notFound: true } }
475+
}
476+
}
477+
// For commit refs we assume existence (paths come from list/diff); avoid blob load
478+
return { id, type: 'ok', data: { binary: true, text: null, notFound: false } }
479+
}
480+
452481
// Read raw to detect binary first
453482
if (ref === WORKDIR_SENTINEL) {
454483
try {

0 commit comments

Comments
 (0)