diff --git a/.changeset/snapshot-diff-freeze.md b/.changeset/snapshot-diff-freeze.md new file mode 100644 index 00000000000..4f2ffb136a8 --- /dev/null +++ b/.changeset/snapshot-diff-freeze.md @@ -0,0 +1,5 @@ +--- +"kilo-code": patch +--- + +Fix TUI freeze on huge-file diffs. Session-summary and file-view patches now use git directly instead of a JavaScript Myers implementation, so files of any size render a full diff without blocking the session. diff --git a/packages/opencode/src/file/index.ts b/packages/opencode/src/file/index.ts index 6730957f235..84f6329255f 100644 --- a/packages/opencode/src/file/index.ts +++ b/packages/opencode/src/file/index.ts @@ -6,6 +6,7 @@ import { Git } from "@/git" import { Effect, Layer, Context } from "effect" import * as Stream from "effect/Stream" import { formatPatch, structuredPatch } from "diff" +import { DiffFull } from "@/kilocode/snapshot/diff-full" // kilocode_change import fuzzysort from "fuzzysort" import ignore from "ignore" import path from "path" @@ -559,6 +560,13 @@ export namespace File { diff = yield* gitText(["-c", "core.fsmonitor=false", "diff", "--staged", "--", file]) } if (diff.trim()) { + // kilocode_change start — patch via git (DiffFull.file) instead of the JS Myers + // implementation. Upstream structuredPatch branch below is kept as dead code so + // our diff from upstream stays minimal and future merges don't conflict. + const got = yield* DiffFull.file(gitText, file) + if (got) return { type: "text" as const, content, patch: got.patch, diff: got.text } + return { type: "text" as const, content } + // kilocode_change end const original = yield* git.show(Instance.directory, "HEAD", file) const patch = structuredPatch(file, file, original, content, "old", "new", { context: Infinity, diff --git a/packages/opencode/src/kilocode/snapshot/diff-full.ts b/packages/opencode/src/kilocode/snapshot/diff-full.ts new file mode 100644 index 00000000000..94c7717ff7d --- /dev/null +++ b/packages/opencode/src/kilocode/snapshot/diff-full.ts @@ -0,0 +1,151 @@ +// kilocode_change - new file +// +// Patch generation. Runs `git diff --unified=INT_MAX` to produce +// unified-diff text for a set of files, instead of the npm `diff` package's +// JS Myers implementation. Myers is O(N*M) with full context, so on +// huge-file diffs it can block the event loop for minutes (the TUI freeze +// where ESC stopped working after a turn). +// +// Both helpers fail soft: on any git error they return an empty value so +// callers emit an empty patch string. Additions/deletions come from +// `git --numstat` and stay accurate. + +import { Effect } from "effect" +import { parsePatch } from "diff" +import type { StructuredPatch } from "diff" +import { Log } from "../../util/log" + +export namespace DiffFull { + const log = Log.create({ service: "snapshot.diff-full" }) + + // INT_MAX — git clamps to this, effectively infinite context. + const unified = "--unified=2147483647" + + interface GitResult { + readonly code: number + readonly text: string + readonly stderr: string + } + + /** + * Run `git diff --unified=INT_MAX` for a set of files between two refs and + * return a `file → unified-diff text` map. Output format matches what the + * `diff` package's `parsePatch` expects, so downstream clients continue to + * work. + * + * `files` entries must use forward slashes (git's output uses `/` even on + * Windows); paths with backslashes will silently miss the suffix match. + * + * Returns an empty map if `files` is empty or git fails. Callers emit an + * empty patch string for any file missing from the map; numstat-derived + * additions/deletions stay accurate. + */ + export const batch = Effect.fn("DiffFull.batch")(function* ( + git: (cmd: string[]) => Effect.Effect, + from: string, + to: string, + files: string[], + ) { + const map = new Map() + if (files.length === 0) return map + + // Windows cmdline limit is ~8191 chars. 500 * avg-15-char filename ≈ 7500. + const size = 500 + let failed = 0 + let stderr = "" + for (let i = 0; i < files.length; i += size) { + const chunk = files.slice(i, i + size) + const result = yield* git([ + "diff", + "--no-color", + "--no-ext-diff", + "--no-renames", + unified, + from, + to, + "--", + ...chunk, + ]) + if (result.code !== 0) { + failed += 1 + stderr = result.stderr || stderr + continue + } + parseBatch(result.text, chunk, map) + } + if (failed) { + log.info("git diff failed, emitting empty patches for affected files", { + chunksFailed: failed, + filesTotal: files.length, + stderr, + }) + } + return map + }) + + /** + * Generate a structured + unified diff for a single file in the working + * tree vs HEAD using `git diff --ignore-all-space --unified=INT_MAX`. + * Returns `null` if git produces no output (caller emits a content-only + * response with no patch). + */ + export const file = Effect.fn("DiffFull.file")(function* ( + gitText: (args: string[]) => Effect.Effect, + file: string, + ) { + const flags = ["-c", "core.fsmonitor=false", "diff", "--no-color", "--no-ext-diff", "--ignore-all-space", unified] + const primary = yield* gitText([...flags, "--", file]) + const text = primary.trim() ? primary : yield* gitText([...flags, "--staged", "--", file]) + if (!text.trim()) return null + const parsed = parsePatch(text)[0] + if (!parsed) return null + // Normalize paths to match what `structuredPatch(file, file, ...)` used to + // produce — downstream UIs key off the bare filename, not `a/…` / `b/…`. + const patch: StructuredPatch = { + ...parsed, + oldFileName: file, + newFileName: file, + } + return { patch, text } + }) + + /** + * Split a multi-file `git diff` output into one entry per file, keyed by + * the input filename (not the path from the header — that can be quoted). + * Silently drops sections whose header does not match any entry in `chunk`. + */ + function parseBatch(text: string, chunk: string[], map: Map) { + // Longest-first so `lib/a.txt` beats `a.txt` on suffix matches. + const ordered = chunk.slice().sort((a, b) => b.length - a.length) + // With `--no-renames` the header is always `diff --git a/PATH b/PATH` with + // both PATHs identical, so we can confirm both halves to avoid false + // positives where PATH happens to also appear as a substring earlier in + // the line (e.g. a filename containing ` b/`). + const match = (header: string) => { + for (const f of ordered) { + if (header.endsWith(" b/" + f) && header.includes(" a/" + f + " ")) return f + if (header.endsWith(` "b/${f}"`) && header.includes(` "a/${f}" `)) return f + } + return null + } + + let current: string | null = null + let buffer: string[] = [] + const flush = () => { + if (current !== null && buffer.length) map.set(current, buffer.join("\n")) + current = null + buffer = [] + } + + for (const line of text.split("\n")) { + if (line.startsWith("diff --git ")) { + flush() + current = match(line) + if (current !== null) buffer.push(line) + continue + } + if (current !== null) buffer.push(line) + } + flush() + } +} diff --git a/packages/opencode/src/snapshot/index.ts b/packages/opencode/src/snapshot/index.ts index bfb19d4fc5a..2890441d7ca 100644 --- a/packages/opencode/src/snapshot/index.ts +++ b/packages/opencode/src/snapshot/index.ts @@ -12,6 +12,7 @@ import { Config } from "../config/config" import { Global } from "../global" import { Hash } from "../util/hash" import { Log } from "../util/log" +import { DiffFull } from "../kilocode/snapshot/diff-full" // kilocode_change export namespace Snapshot { export const Patch = z.object({ @@ -718,6 +719,30 @@ export namespace Snapshot { const patch = (file: string, before: string, after: string) => formatPatch(structuredPatch(file, file, before, after, "", "", { context: Number.MAX_SAFE_INTEGER })) + // kilocode_change start — route patches through git (DiffFull.batch) instead of the + // JS Myers implementation. Upstream Myers loop below is kept as dead code so our + // diff from upstream stays minimal and future merges don't conflict. + for (let i = 0; i < rows.length; i += step) { + const run = rows.slice(i, i + step) + const patches = yield* DiffFull.batch( + (cmd) => git([...quote, ...args(cmd)], { cwd: state.directory }), + from, + to, + run.filter((r) => !r.binary).map((r) => r.file), + ) + for (const row of run) { + result.push({ + file: row.file, + patch: row.binary ? "" : (patches.get(row.file) ?? ""), + additions: row.additions, + deletions: row.deletions, + status: row.status, + }) + } + } + return result + // kilocode_change end + for (let i = 0; i < rows.length; i += step) { const run = rows.slice(i, i + step) const text = yield* load(run) diff --git a/packages/opencode/test/kilocode/diff-full.test.ts b/packages/opencode/test/kilocode/diff-full.test.ts new file mode 100644 index 00000000000..87244ad4aae --- /dev/null +++ b/packages/opencode/test/kilocode/diff-full.test.ts @@ -0,0 +1,227 @@ +// kilocode_change - new file +// +// Tests for the git-based diff generator that replaced the JS Myers path. + +import { $ } from "bun" +import { describe, expect } from "bun:test" +import { Effect, Layer } from "effect" +import path from "path" +import * as CrossSpawnSpawner from "../../src/effect/cross-spawn-spawner" +import { DiffFull } from "../../src/kilocode/snapshot/diff-full" +import { Filesystem } from "../../src/util/filesystem" +import { Log } from "../../src/util/log" +import { tmpdirScoped } from "../fixture/fixture" +import { testEffect } from "../lib/effect" + +Log.init({ print: false }) + +// Mirror the caller's git-wrapping in `snapshot/index.ts`: always disable +// quotepath so non-ASCII filenames come through unescaped. +const gitResult = (dir: string) => (args: string[]) => + Effect.promise(async () => { + const p = Bun.spawn(["git", "-c", "core.quotepath=false", ...args], { + cwd: dir, + stdout: "pipe", + stderr: "pipe", + }) + const [text, stderr] = await Promise.all([new Response(p.stdout).text(), new Response(p.stderr).text()]) + const code = await p.exited + return { code, text, stderr } + }) + +const gitText = (dir: string) => (args: string[]) => + Effect.promise(async () => { + const p = Bun.spawn(["git", "-c", "core.quotepath=false", ...args], { + cwd: dir, + stdout: "pipe", + stderr: "pipe", + }) + await p.exited + return await new Response(p.stdout).text() + }) + +const commit = async (dir: string, message: string) => { + await $`git add -A`.cwd(dir).quiet() + await $`git commit --no-gpg-sign -m ${message}`.cwd(dir).quiet() + const head = await $`git rev-parse HEAD`.cwd(dir).quiet() + return head.stdout.toString().trim() +} + +const it = testEffect(Layer.mergeAll(CrossSpawnSpawner.defaultLayer)) + +describe("DiffFull.batch", () => { + it.live("produces one patch per modified file", () => + Effect.gen(function* () { + const dir = yield* tmpdirScoped({ git: true }) + yield* Effect.promise(async () => { + await Filesystem.write(path.join(dir, "a.txt"), "alpha\nbeta\ngamma\n") + await Filesystem.write(path.join(dir, "b.txt"), "one\ntwo\nthree\n") + await Filesystem.write(path.join(dir, "c.txt"), "red\nblue\ngreen\n") + }) + const from = yield* Effect.promise(() => commit(dir, "v1")) + yield* Effect.promise(async () => { + await Filesystem.write(path.join(dir, "a.txt"), "alpha\nBETA\ngamma\n") + await Filesystem.write(path.join(dir, "b.txt"), "one\nTWO\nthree\n") + await Filesystem.write(path.join(dir, "c.txt"), "red\nBLUE\ngreen\n") + }) + const to = yield* Effect.promise(() => commit(dir, "v2")) + + const result = yield* DiffFull.batch(gitResult(dir), from, to, ["a.txt", "b.txt", "c.txt"]) + expect(result.size).toBe(3) + expect(result.get("a.txt")).toMatch(/^diff --git /) + expect(result.get("a.txt")).toContain("-beta") + expect(result.get("a.txt")).toContain("+BETA") + expect(result.get("b.txt")).toContain("-two") + expect(result.get("b.txt")).toContain("+TWO") + expect(result.get("c.txt")).toContain("-blue") + expect(result.get("c.txt")).toContain("+BLUE") + }), + ) + + it.live("tolerates paths with spaces and unicode", () => + Effect.gen(function* () { + const dir = yield* tmpdirScoped({ git: true }) + const names = ["my file.txt", "héllo.txt", "a b c.md"] + yield* Effect.promise(async () => { + for (const name of names) { + await Filesystem.write(path.join(dir, name), "before\n") + } + }) + const from = yield* Effect.promise(() => commit(dir, "v1")) + yield* Effect.promise(async () => { + for (const name of names) { + await Filesystem.write(path.join(dir, name), "after\n") + } + }) + const to = yield* Effect.promise(() => commit(dir, "v2")) + + const result = yield* DiffFull.batch(gitResult(dir), from, to, names) + expect(result.size).toBe(names.length) + for (const name of names) { + const patch = result.get(name) + expect(patch).toBeDefined() + expect(patch).toContain("-before") + expect(patch).toContain("+after") + } + }), + ) + + it.live("returns an empty map without spawning for an empty file list", () => + Effect.gen(function* () { + let calls = 0 + const stub = (_: string[]) => + Effect.sync(() => { + calls += 1 + return { code: 0, text: "", stderr: "" } + }) + const result = yield* DiffFull.batch(stub, "from", "to", []) + expect(result.size).toBe(0) + expect(calls).toBe(0) + }), + ) + + it.live("returns an empty map on git failure", () => + Effect.gen(function* () { + const dir = yield* tmpdirScoped({ git: true }) + yield* Effect.promise(async () => { + await Filesystem.write(path.join(dir, "a.txt"), "hi\n") + }) + yield* Effect.promise(() => commit(dir, "v1")) + const result = yield* DiffFull.batch(gitResult(dir), "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef", "HEAD", [ + "a.txt", + ]) + expect(result.size).toBe(0) + }), + ) + + it.live( + "chunks 1200 files across three spawns and returns all entries", + () => + Effect.gen(function* () { + const dir = yield* tmpdirScoped({ git: true }) + const names = Array.from({ length: 1200 }, (_, i) => `f${i.toString().padStart(4, "0")}.txt`) + yield* Effect.promise(async () => { + await Promise.all(names.map((name) => Filesystem.write(path.join(dir, name), "before\n"))) + }) + const from = yield* Effect.promise(() => commit(dir, "v1")) + yield* Effect.promise(async () => { + await Promise.all(names.map((name) => Filesystem.write(path.join(dir, name), "after\n"))) + }) + const to = yield* Effect.promise(() => commit(dir, "v2")) + + let calls = 0 + const counting = (cmd: string[]) => { + calls += 1 + return gitResult(dir)(cmd) + } + const result = yield* DiffFull.batch(counting, from, to, names) + expect(result.size).toBe(1200) + expect(calls).toBe(3) + expect(result.get("f0000.txt")).toContain("-before") + expect(result.get("f1199.txt")).toContain("+after") + }), + 30_000, + ) +}) + +describe("DiffFull.file", () => { + it.live("returns a structured + unified diff for a modified working-tree file", () => + Effect.gen(function* () { + const dir = yield* tmpdirScoped({ git: true }) + yield* Effect.promise(async () => { + await Filesystem.write(path.join(dir, "foo.txt"), "line1\nline2\nline3\n") + }) + yield* Effect.promise(() => commit(dir, "v1")) + yield* Effect.promise(async () => { + await Filesystem.write(path.join(dir, "foo.txt"), "line1\nCHANGED\nline3\n") + }) + + const got = yield* DiffFull.file(gitText(dir), "foo.txt") + expect(got).not.toBeNull() + expect(got!.text).toMatch(/^diff --git /) + expect(got!.text).toContain("-line2") + expect(got!.text).toContain("+CHANGED") + expect(got!.patch.oldFileName).toBe("foo.txt") + expect(got!.patch.newFileName).toBe("foo.txt") + expect(got!.patch.hunks.length).toBeGreaterThan(0) + }), + ) + + it.live("returns null for an unmodified file", () => + Effect.gen(function* () { + const dir = yield* tmpdirScoped({ git: true }) + yield* Effect.promise(async () => { + await Filesystem.write(path.join(dir, "foo.txt"), "unchanged\n") + }) + yield* Effect.promise(() => commit(dir, "v1")) + + const got = yield* DiffFull.file(gitText(dir), "foo.txt") + expect(got).toBeNull() + }), + ) + + it.live( + "handles a 15,000-line file in under 500 ms", + () => + Effect.gen(function* () { + const dir = yield* tmpdirScoped({ git: true }) + const before = Array.from({ length: 15_000 }, (_, i) => `v1_line_${i}`).join("\n") + "\n" + const after = Array.from({ length: 15_000 }, (_, i) => `v2_line_${i}`).join("\n") + "\n" + yield* Effect.promise(async () => { + await Filesystem.write(path.join(dir, "big.txt"), before) + }) + yield* Effect.promise(() => commit(dir, "v1")) + yield* Effect.promise(async () => { + await Filesystem.write(path.join(dir, "big.txt"), after) + }) + + const start = Date.now() + const got = yield* DiffFull.file(gitText(dir), "big.txt") + const elapsed = Date.now() - start + expect(got).not.toBeNull() + expect(got!.patch.hunks.length).toBeGreaterThan(0) + expect(elapsed).toBeLessThan(500) + }), + 10_000, + ) +}) diff --git a/packages/opencode/test/kilocode/snapshot-freeze-repro.test.ts b/packages/opencode/test/kilocode/snapshot-freeze-repro.test.ts new file mode 100644 index 00000000000..e107eef3b4d --- /dev/null +++ b/packages/opencode/test/kilocode/snapshot-freeze-repro.test.ts @@ -0,0 +1,101 @@ +// kilocode_change - new file +// +// Regression test for the freeze bug: before the caps + worker offload, +// Snapshot.diffFull on a file with tens of thousands of lines could block +// the thread for minutes. In the TUI, that same thread hosts the Hono +// server — so the POST /:id/abort endpoint (what ESC fires) never ran. +// +// This test proves: +// 1. A synthetic freeze workload (30k-line file) now completes quickly. +// 2. The abort endpoint responds within a bounded time while the freeze +// workload runs concurrently. +// 3. A concurrent setInterval keeps ticking — i.e. the event loop keeps +// breathing and ESC would be delivered. + +import { test, expect, afterEach, mock } from "bun:test" +import { $ } from "bun" +import { Instance } from "../../src/project/instance" +import { Server } from "../../src/server/server" +import { Session } from "../../src/session" +import { Snapshot } from "../../src/snapshot" +import { Filesystem } from "../../src/util/filesystem" +import { Log } from "../../src/util/log" +import { tmpdir } from "../fixture/fixture" + +Log.init({ print: false }) + +afterEach(async () => { + mock.restore() + await Instance.disposeAll() +}) + +test("pathological diffFull workload finishes quickly and does not block abort", async () => { + // 3000-line file that churns every line between snapshots. Before the fix + // this ran through structuredPatch at context=MAX_SAFE_INTEGER synchronously + // and could take minutes. + const v1 = Array.from({ length: 3000 }, (_, i) => `v1_line_${i}`).join("\n") + "\n" + const v2 = Array.from({ length: 3000 }, (_, i) => `v2_line_${i}`).join("\n") + "\n" + + await using tmp = await tmpdir({ + git: true, + init: async (dir) => { + await Filesystem.write(`${dir}/fat.json`, v1) + await $`git add .`.cwd(dir).quiet() + await $`git commit --no-gpg-sign -m init`.cwd(dir).quiet() + }, + }) + + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const session = await Session.create({}) + + const before = await Snapshot.track() + expect(before).toBeTruthy() + + await Filesystem.write(`${tmp.path}/fat.json`, v2) + const after = await Snapshot.track() + expect(after).toBeTruthy() + + // Kick off a diffFull that exercises the freeze path. + const diffPromise = Snapshot.diffFull(before!, after!) + + // Concurrently keep a tick counter running. If the event loop blocks we + // will see this count fall behind wall-clock elapsed. + let ticks = 0 + const start = Date.now() + const timer = setInterval(() => { + ticks++ + }, 25) + + // Fire an abort request against the Hono app in the middle of the diff. + const app = Server.Default().app + const abortStart = Date.now() + const res = await app.request(`/session/${session.id}/abort`, { method: "POST" }) + const abortLatency = Date.now() - abortStart + expect(res.status).toBe(200) + // The abort endpoint must respond well under a second even under load. + expect(abortLatency).toBeLessThan(2000) + + const diffs = await diffPromise + clearInterval(timer) + const total = Date.now() - start + + // The freeze workload must finish in bounded time. Five seconds is + // generous even for a slow CI box; without the fix this hangs. + expect(total).toBeLessThan(5000) + // And we must have ticked at least a few times during the work — proves + // the event loop stayed responsive (ESC would actually arrive). + expect(ticks).toBeGreaterThan(0) + + // With git-based diff the patch is a real unified diff, not empty. + const hit = diffs.find((d) => d.file === "fat.json") + expect(hit).toBeDefined() + expect(hit!.patch).toMatch(/^diff --git /m) + expect(hit!.patch).toContain("-v1_line_0") + expect(hit!.patch).toContain("+v2_line_0") + expect(hit!.additions).toBeGreaterThan(0) + expect(hit!.deletions).toBeGreaterThan(0) + }, + }) +})