|
| 1 | +#!/usr/bin/env node |
| 2 | +// Claude Code Stop hook — stale-process-sweeper. |
| 3 | +// |
| 4 | +// Fires at turn-end. Finds Node test/build worker processes that the |
| 5 | +// session left behind (test runner crashed mid-run, hook timed out, |
| 6 | +// user interrupted `Bash`, etc.) and kills them so they don't pile up |
| 7 | +// across turns and exhaust system memory. |
| 8 | +// |
| 9 | +// What's swept: |
| 10 | +// - vitest workers (`vitest/dist/workers/forks` and the threads pool) |
| 11 | +// - vitest itself (orphan parent runners that survived a SIGINT) |
| 12 | +// - tsgo / tsc type-check daemons |
| 13 | +// - type-coverage workers |
| 14 | +// - esbuild service processes |
| 15 | +// |
| 16 | +// What's NOT swept: |
| 17 | +// - Anything spawned by a still-living shell (PPID alive) |
| 18 | +// - Anything matching the user's editors / IDEs / terminals |
| 19 | +// - The Claude Code process itself |
| 20 | +// |
| 21 | +// The hook is fast (one `ps` call + a few regex matches + a couple of |
| 22 | +// `kill -0` probes) and silent on the happy path. It only writes to |
| 23 | +// stderr when it actually killed something — that's a useful signal. |
| 24 | +// |
| 25 | +// Stop hooks receive JSON on stdin (we don't read it; the body |
| 26 | +// shape is irrelevant to our work) and exit code is advisory. |
| 27 | + |
| 28 | +import { spawnSync } from 'node:child_process' |
| 29 | +import process from 'node:process' |
| 30 | + |
| 31 | +// Process-name patterns that indicate a stale test/build worker. |
| 32 | +// Must be specific enough that real user processes (a normal `node` |
| 33 | +// invocation, an editor's language server) don't match. |
| 34 | +const STALE_PATTERNS: Array<{ name: string; rx: RegExp }> = [ |
| 35 | + // Vitest worker pools — both `forks` (process-per-worker) and the |
| 36 | + // path the threads pool uses when isolation is requested. The |
| 37 | + // canonical leak: Vitest spawns N workers, parent crashes/SIGINTs, |
| 38 | + // workers stay alive holding 80–100MB each. |
| 39 | + { |
| 40 | + name: 'vitest-worker', |
| 41 | + rx: /vitest\/dist\/workers\/(forks|threads)/, |
| 42 | + }, |
| 43 | + // Vitest parent runner that survived its own children's exit. |
| 44 | + // Matches `node ... vitest/dist/cli ... run` etc. |
| 45 | + { |
| 46 | + name: 'vitest-runner', |
| 47 | + rx: /vitest\/dist\/(cli|node)\.[mc]?js/, |
| 48 | + }, |
| 49 | + // tsgo / tsc daemons. `tsgo` is the new Go-based type checker; |
| 50 | + // `tsc --watch` daemons can also linger. |
| 51 | + { |
| 52 | + name: 'tsgo', |
| 53 | + rx: /\btsgo\b/, |
| 54 | + }, |
| 55 | + // type-coverage runs as a separate process and sometimes outlives |
| 56 | + // its CI step. |
| 57 | + { |
| 58 | + name: 'type-coverage', |
| 59 | + rx: /type-coverage\/bin\/type-coverage/, |
| 60 | + }, |
| 61 | + // esbuild's daemon service helper. |
| 62 | + { |
| 63 | + name: 'esbuild-service', |
| 64 | + rx: /esbuild\/(bin|lib)\/.*\bservice\b/, |
| 65 | + }, |
| 66 | +] |
| 67 | + |
| 68 | +interface ProcRow { |
| 69 | + pid: number |
| 70 | + ppid: number |
| 71 | + rss: number |
| 72 | + command: string |
| 73 | +} |
| 74 | + |
| 75 | +function listProcesses(): ProcRow[] { |
| 76 | + // -A: all processes, -o: custom format, no truncation. macOS + Linux |
| 77 | + // both support this exact form. Windows isn't supported (Stop hook |
| 78 | + // is unix-only in practice for socket-* repos). |
| 79 | + const result = spawnSync( |
| 80 | + 'ps', |
| 81 | + ['-A', '-o', 'pid=,ppid=,rss=,command='], |
| 82 | + { encoding: 'utf8' }, |
| 83 | + ) |
| 84 | + if (result.status !== 0 || !result.stdout) { |
| 85 | + return [] |
| 86 | + } |
| 87 | + const rows: ProcRow[] = [] |
| 88 | + for (const line of result.stdout.split('\n')) { |
| 89 | + if (!line.trim()) { |
| 90 | + continue |
| 91 | + } |
| 92 | + // Split into [pid, ppid, rss, ...command]. `command` may contain |
| 93 | + // arbitrary spaces, so re-join after the first three fields. |
| 94 | + const parts = line.trim().split(/\s+/) |
| 95 | + if (parts.length < 4) { |
| 96 | + continue |
| 97 | + } |
| 98 | + const pid = Number.parseInt(parts[0]!, 10) |
| 99 | + const ppid = Number.parseInt(parts[1]!, 10) |
| 100 | + const rss = Number.parseInt(parts[2]!, 10) |
| 101 | + if (!Number.isFinite(pid) || !Number.isFinite(ppid)) { |
| 102 | + continue |
| 103 | + } |
| 104 | + const command = parts.slice(3).join(' ') |
| 105 | + rows.push({ pid, ppid, rss, command }) |
| 106 | + } |
| 107 | + return rows |
| 108 | +} |
| 109 | + |
| 110 | +function isAlive(pid: number): boolean { |
| 111 | + if (pid <= 1) { |
| 112 | + // PID 0 / 1 are the kernel / init — if our parent is one of those, |
| 113 | + // we're definitely an orphan, but `kill -0 1` would mislead. |
| 114 | + return false |
| 115 | + } |
| 116 | + try { |
| 117 | + process.kill(pid, 0) |
| 118 | + return true |
| 119 | + } catch { |
| 120 | + return false |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | +function classify(row: ProcRow): string | undefined { |
| 125 | + for (const { name, rx } of STALE_PATTERNS) { |
| 126 | + if (rx.test(row.command)) { |
| 127 | + return name |
| 128 | + } |
| 129 | + } |
| 130 | + return undefined |
| 131 | +} |
| 132 | + |
| 133 | +function sweep(): { killed: Array<{ pid: number; name: string; rssMb: number }>; skipped: number } { |
| 134 | + const rows = listProcesses() |
| 135 | + const myPid = process.pid |
| 136 | + const myPpid = process.ppid |
| 137 | + const killed: Array<{ pid: number; name: string; rssMb: number }> = [] |
| 138 | + let skipped = 0 |
| 139 | + |
| 140 | + for (const row of rows) { |
| 141 | + // Never touch ourselves or our parent (Claude Code). |
| 142 | + if (row.pid === myPid || row.pid === myPpid) { |
| 143 | + continue |
| 144 | + } |
| 145 | + const name = classify(row) |
| 146 | + if (!name) { |
| 147 | + continue |
| 148 | + } |
| 149 | + // Only sweep if the parent is gone (true orphan) or is PID 1 |
| 150 | + // (re-parented to init after the original parent exited). A live |
| 151 | + // parent means the worker is part of a real, in-progress run we |
| 152 | + // should not interrupt. |
| 153 | + const orphan = row.ppid === 1 || !isAlive(row.ppid) |
| 154 | + if (!orphan) { |
| 155 | + skipped += 1 |
| 156 | + continue |
| 157 | + } |
| 158 | + try { |
| 159 | + // SIGTERM first — give the worker a chance to flush. We don't |
| 160 | + // wait for it; the next sweep (next turn) will SIGKILL anything |
| 161 | + // that ignored SIGTERM. Keeping the hook fast matters more than |
| 162 | + // squeezing every last byte. |
| 163 | + process.kill(row.pid, 'SIGTERM') |
| 164 | + killed.push({ |
| 165 | + pid: row.pid, |
| 166 | + name, |
| 167 | + rssMb: Math.round(row.rss / 1024), |
| 168 | + }) |
| 169 | + } catch { |
| 170 | + // Already gone, or we lack permission — nothing to do. |
| 171 | + } |
| 172 | + } |
| 173 | + return { killed, skipped } |
| 174 | +} |
| 175 | + |
| 176 | +function main() { |
| 177 | + // Drain stdin (Stop hook delivers a JSON payload). We don't need |
| 178 | + // the body, but Node will keep the event loop alive if we don't |
| 179 | + // consume it. |
| 180 | + process.stdin.resume() |
| 181 | + process.stdin.on('data', () => {}) |
| 182 | + process.stdin.on('end', runSweep) |
| 183 | + // If stdin is already closed (some hook runners don't pipe input), |
| 184 | + // run immediately. |
| 185 | + if (process.stdin.readable === false) { |
| 186 | + runSweep() |
| 187 | + } |
| 188 | +} |
| 189 | + |
| 190 | +function runSweep() { |
| 191 | + let result: { killed: Array<{ pid: number; name: string; rssMb: number }>; skipped: number } |
| 192 | + try { |
| 193 | + result = sweep() |
| 194 | + } catch (e) { |
| 195 | + // Hooks must never crash a Claude turn. Log and exit clean. |
| 196 | + process.stderr.write( |
| 197 | + `[stale-process-sweeper] unexpected error: ${(e as Error).message}\n`, |
| 198 | + ) |
| 199 | + process.exit(0) |
| 200 | + } |
| 201 | + if (result.killed.length > 0) { |
| 202 | + const totalMb = result.killed.reduce((sum, k) => sum + k.rssMb, 0) |
| 203 | + const breakdown = result.killed |
| 204 | + .map(k => `${k.name}=${k.pid}(${k.rssMb}MB)`) |
| 205 | + .join(', ') |
| 206 | + process.stderr.write( |
| 207 | + `[stale-process-sweeper] reaped ${result.killed.length} stale ` + |
| 208 | + `worker(s), ~${totalMb}MB freed: ${breakdown}\n`, |
| 209 | + ) |
| 210 | + } |
| 211 | + process.exit(0) |
| 212 | +} |
| 213 | + |
| 214 | +main() |
0 commit comments