Skip to content

Commit 837dbb8

Browse files
committed
fix: prevent orphan Claude processes on Neovim exit
Fixes coder#1 - Add defense-in-depth PID recovery from sessions and terminal buffers - Kill entire process tree (not just direct children) using process groups - Follow up with SIGKILL for any survivors after graceful SIGTERM - Add retry mechanism for PID tracking in snacks.lua (handles delayed job_id) - Track PIDs in external terminal provider - Add VimLeavePre autocmd to call cleanup_all() before server stops - Validate cleanup_strategy config option Tests: - Unit tests for defense-in-depth PID recovery - Integration tests with real processes verifying actual termination
1 parent e28a7d9 commit 837dbb8

10 files changed

Lines changed: 1549 additions & 2 deletions

File tree

lua/claudecode/config.lua

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,19 @@ function M.validate(config)
126126
end
127127
end
128128

129+
-- Validate cleanup_strategy if present
130+
if config.terminal.cleanup_strategy ~= nil then
131+
local valid_strategies = { "pkill_children", "jobstop_only", "aggressive", "none" }
132+
local is_valid_strategy = false
133+
for _, strategy in ipairs(valid_strategies) do
134+
if config.terminal.cleanup_strategy == strategy then
135+
is_valid_strategy = true
136+
break
137+
end
138+
end
139+
assert(is_valid_strategy, "terminal.cleanup_strategy must be one of: " .. table.concat(valid_strategies, ", "))
140+
end
141+
129142
local valid_log_levels = { "trace", "debug", "info", "warn", "error" }
130143
local is_valid_log_level = false
131144
for _, level in ipairs(valid_log_levels) do

lua/claudecode/init.lua

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,12 @@ function M.setup(opts)
374374
vim.api.nvim_create_autocmd("VimLeavePre", {
375375
group = vim.api.nvim_create_augroup("ClaudeCodeShutdown", { clear = true }),
376376
callback = function()
377+
-- Kill all Claude terminal processes first to prevent orphans
378+
local ok, terminal = pcall(require, "claudecode.terminal")
379+
if ok and terminal.cleanup_all then
380+
terminal.cleanup_all()
381+
end
382+
377383
if M.state.server then
378384
M.stop()
379385
else

lua/claudecode/terminal.lua

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,91 @@ local claudecode_server_module = require("claudecode.server.init")
99
local osc_handler = require("claudecode.terminal.osc_handler")
1010
local session_manager = require("claudecode.session")
1111

12+
-- Use global to survive module reloads (Fix 3: Plugin Reload Protection)
13+
---@type table<number, number> Map of job_id -> unix_pid
14+
_G._claudecode_tracked_pids = _G._claudecode_tracked_pids or {}
15+
local tracked_pids = _G._claudecode_tracked_pids
16+
17+
-- Buffer to session mapping for cleanup on BufUnload (Fix 1: Zombie Sessions)
18+
---@type table<number, string> Map of bufnr -> session_id
19+
_G._claudecode_buffer_to_session = _G._claudecode_buffer_to_session or {}
20+
local buffer_to_session = _G._claudecode_buffer_to_session
21+
22+
---Cleanup orphaned PIDs from previous module load (Fix 3: Plugin Reload Protection)
23+
---Called on module load to kill any processes that were orphaned by a plugin reload
24+
local function cleanup_orphaned_pids()
25+
for job_id, pid in pairs(tracked_pids) do
26+
-- Check if job still exists
27+
local exists = pcall(vim.fn.jobpid, job_id)
28+
if not exists then
29+
-- Job doesn't exist but PID tracked - orphaned
30+
if pid and pid > 0 then
31+
pcall(vim.fn.system, "pkill -TERM -P " .. pid .. " 2>/dev/null")
32+
pcall(vim.fn.system, "kill -TERM " .. pid .. " 2>/dev/null")
33+
end
34+
tracked_pids[job_id] = nil
35+
end
36+
end
37+
end
38+
39+
-- Run cleanup on module load
40+
cleanup_orphaned_pids()
41+
42+
---Track a terminal job's PID for cleanup on exit
43+
---@param job_id number The Neovim job ID
44+
function M.track_terminal_pid(job_id)
45+
if not job_id then
46+
return
47+
end
48+
local ok, pid = pcall(vim.fn.jobpid, job_id)
49+
if ok and pid and pid > 0 then
50+
tracked_pids[job_id] = pid
51+
end
52+
end
53+
54+
---Untrack a terminal job (called when terminal exits normally)
55+
---@param job_id number The Neovim job ID
56+
function M.untrack_terminal_pid(job_id)
57+
if job_id then
58+
tracked_pids[job_id] = nil
59+
end
60+
end
61+
62+
---Register a buffer-to-session mapping for cleanup on BufUnload (Fix 1)
63+
---@param bufnr number The buffer number
64+
---@param session_id string The session ID
65+
function M.register_buffer_session(bufnr, session_id)
66+
if bufnr and session_id then
67+
buffer_to_session[bufnr] = session_id
68+
end
69+
end
70+
71+
---Unregister a buffer-to-session mapping (called when session is properly destroyed)
72+
---@param bufnr number The buffer number
73+
function M.unregister_buffer_session(bufnr)
74+
if bufnr then
75+
buffer_to_session[bufnr] = nil
76+
end
77+
end
78+
79+
-- Setup global BufUnload handler to cleanup orphaned sessions (Fix 1: Zombie Sessions)
80+
-- This catches :bd! and other direct buffer deletions that bypass close_session()
81+
vim.api.nvim_create_autocmd("BufUnload", {
82+
group = vim.api.nvim_create_augroup("ClaudeCodeBufferCleanup", { clear = true }),
83+
callback = function(ev)
84+
local session_id = buffer_to_session[ev.buf]
85+
if session_id then
86+
buffer_to_session[ev.buf] = nil
87+
-- Destroy orphaned session if it still exists
88+
if session_manager.get_session(session_id) then
89+
local logger = require("claudecode.logger")
90+
logger.debug("terminal", "Auto-destroying orphaned session on BufUnload: " .. session_id)
91+
session_manager.destroy_session(session_id)
92+
end
93+
end
94+
end,
95+
})
96+
1297
---@type ClaudeCodeTerminalConfig
1398
local defaults = {
1499
split_side = "right",
@@ -33,6 +118,12 @@ local defaults = {
33118
-- Smart ESC handling: timeout in ms to wait for second ESC before sending ESC to terminal
34119
-- Set to nil or 0 to disable smart ESC handling (use simple keymap instead)
35120
esc_timeout = 200,
121+
-- Process cleanup strategy when Neovim exits
122+
-- "pkill_children" - Kill child processes first, then shell (recommended, fixes race condition)
123+
-- "jobstop_only" - Only use Neovim's jobstop (relies on shell forwarding SIGTERM)
124+
-- "aggressive" - Use SIGKILL for guaranteed termination (may leave state)
125+
-- "none" - Don't kill processes on exit (manual cleanup)
126+
cleanup_strategy = "pkill_children",
36127
-- Tab bar for session switching (optional)
37128
tabs = {
38129
enabled = false, -- Off by default
@@ -711,6 +802,18 @@ function M.setup(user_term_config, p_terminal_cmd, p_env)
711802
vim.log.levels.WARN
712803
)
713804
end
805+
elseif k == "cleanup_strategy" then
806+
local valid_strategies = { pkill_children = true, jobstop_only = true, aggressive = true, none = true }
807+
if valid_strategies[v] then
808+
defaults.cleanup_strategy = v
809+
else
810+
vim.notify(
811+
"claudecode.terminal.setup: Invalid value for cleanup_strategy: "
812+
.. tostring(v)
813+
.. ". Must be one of: pkill_children, jobstop_only, aggressive, none.",
814+
vim.log.levels.WARN
815+
)
816+
end
714817
elseif k == "tabs" then
715818
if type(v) == "table" then
716819
defaults.tabs = defaults.tabs or {}
@@ -1195,4 +1298,160 @@ function M.ensure_session()
11951298
return session_manager.ensure_session()
11961299
end
11971300

1301+
---Cleanup all terminal processes (called on Neovim exit).
1302+
---Ensures no orphan Claude processes remain by killing all terminal jobs.
1303+
---Uses the configured cleanup_strategy to determine how processes are terminated.
1304+
---Implements defense-in-depth: recovers PIDs from sessions and terminal buffers
1305+
---even if they weren't properly tracked.
1306+
function M.cleanup_all()
1307+
local logger = require("claudecode.logger")
1308+
local strategy = defaults.cleanup_strategy or "pkill_children"
1309+
1310+
-- Defense-in-depth: Recover PIDs from session manager
1311+
-- This catches any terminals whose PIDs weren't properly tracked
1312+
local session_mgr_ok, session_mgr = pcall(require, "claudecode.session")
1313+
if session_mgr_ok and session_mgr.list_sessions then
1314+
for _, session in ipairs(session_mgr.list_sessions()) do
1315+
if session.terminal_jobid and not tracked_pids[session.terminal_jobid] then
1316+
local pid_ok, pid = pcall(vim.fn.jobpid, session.terminal_jobid)
1317+
if pid_ok and pid and pid > 0 then
1318+
tracked_pids[session.terminal_jobid] = pid
1319+
logger.debug("terminal", "Recovered PID " .. pid .. " from session " .. session.id)
1320+
end
1321+
end
1322+
end
1323+
end
1324+
1325+
-- Defense-in-depth: Recover PIDs from terminal buffers
1326+
-- This catches any terminal buffers that weren't associated with sessions
1327+
local list_bufs_ok, bufs = pcall(vim.api.nvim_list_bufs)
1328+
if list_bufs_ok and bufs then
1329+
for _, bufnr in ipairs(bufs) do
1330+
local valid_ok, is_valid = pcall(vim.api.nvim_buf_is_valid, bufnr)
1331+
if valid_ok and is_valid then
1332+
local buftype_ok, buftype = pcall(vim.api.nvim_get_option_value, "buftype", { buf = bufnr })
1333+
if buftype_ok and buftype == "terminal" then
1334+
local job_ok, job_id = pcall(vim.api.nvim_buf_get_var, bufnr, "terminal_job_id")
1335+
if job_ok and job_id and not tracked_pids[job_id] then
1336+
local pid_ok, pid = pcall(vim.fn.jobpid, job_id)
1337+
if pid_ok and pid and pid > 0 then
1338+
tracked_pids[job_id] = pid
1339+
logger.debug("terminal", "Recovered PID " .. pid .. " from terminal buffer " .. bufnr)
1340+
end
1341+
end
1342+
end
1343+
end
1344+
end
1345+
end
1346+
1347+
-- Collect PIDs and job IDs first (don't stop jobs yet - that's the race condition!)
1348+
local pids_to_kill = {}
1349+
local job_ids_to_stop = {}
1350+
1351+
for job_id, pid in pairs(tracked_pids) do
1352+
if pid and pid > 0 then
1353+
table.insert(pids_to_kill, pid)
1354+
end
1355+
table.insert(job_ids_to_stop, job_id)
1356+
end
1357+
1358+
-- DEBUG: Write to file so we can see what happens after Neovim exits
1359+
local debug_file = io.open("/tmp/claudecode_cleanup_debug.log", "a")
1360+
if debug_file then
1361+
debug_file:write(
1362+
os.date() .. " cleanup_all: strategy=" .. strategy .. ", pids=" .. table.concat(pids_to_kill, ",") .. "\n"
1363+
)
1364+
debug_file:close()
1365+
end
1366+
1367+
logger.debug("terminal", "cleanup_all: strategy=" .. strategy .. ", found " .. #pids_to_kill .. " PIDs")
1368+
1369+
-- Handle "none" strategy - don't kill anything
1370+
if strategy == "none" then
1371+
logger.debug("terminal", "cleanup_all: strategy=none, skipping process cleanup")
1372+
-- Clear tracking but don't kill
1373+
tracked_pids = {}
1374+
_G._claudecode_tracked_pids = tracked_pids
1375+
return
1376+
end
1377+
1378+
-- For pkill_children strategy: kill children FIRST to fix race condition
1379+
-- This must happen BEFORE jobstop(), otherwise the shell is killed before children
1380+
if strategy == "pkill_children" and #pids_to_kill > 0 then
1381+
local kill_cmds = {}
1382+
for _, pid in ipairs(pids_to_kill) do
1383+
-- Kill the entire process tree recursively, not just direct children
1384+
-- 1. First, try to kill by process group (catches all descendants)
1385+
table.insert(kill_cmds, "kill -TERM -" .. pid .. " 2>/dev/null")
1386+
-- 2. Kill direct children
1387+
table.insert(kill_cmds, "pkill -TERM -P " .. pid .. " 2>/dev/null")
1388+
-- 3. Kill the shell process itself
1389+
table.insert(kill_cmds, "kill -TERM " .. pid .. " 2>/dev/null")
1390+
end
1391+
local cmd = table.concat(kill_cmds, "; ") .. "; true"
1392+
1393+
debug_file = io.open("/tmp/claudecode_cleanup_debug.log", "a")
1394+
if debug_file then
1395+
debug_file:write(os.date() .. " pkill_children command: " .. cmd .. "\n")
1396+
debug_file:close()
1397+
end
1398+
1399+
vim.fn.system(cmd)
1400+
1401+
-- Give processes time to die gracefully
1402+
vim.fn.system("sleep 0.1")
1403+
1404+
-- Second pass: kill any survivors with SIGKILL
1405+
local kill9_cmds = {}
1406+
for _, pid in ipairs(pids_to_kill) do
1407+
-- Kill entire process group with SIGKILL
1408+
table.insert(kill9_cmds, "kill -KILL -" .. pid .. " 2>/dev/null")
1409+
-- Kill remaining children with SIGKILL
1410+
table.insert(kill9_cmds, "pkill -KILL -P " .. pid .. " 2>/dev/null")
1411+
-- Kill the process itself with SIGKILL
1412+
table.insert(kill9_cmds, "kill -KILL " .. pid .. " 2>/dev/null")
1413+
end
1414+
local cmd9 = table.concat(kill9_cmds, "; ") .. "; true"
1415+
1416+
debug_file = io.open("/tmp/claudecode_cleanup_debug.log", "a")
1417+
if debug_file then
1418+
debug_file:write(os.date() .. " SIGKILL followup: " .. cmd9 .. "\n")
1419+
debug_file:close()
1420+
end
1421+
1422+
vim.fn.system(cmd9)
1423+
logger.debug("terminal", "cleanup_all: killed process trees of PIDs: " .. table.concat(pids_to_kill, ", "))
1424+
end
1425+
1426+
-- For aggressive strategy: use SIGKILL for guaranteed termination
1427+
if strategy == "aggressive" and #pids_to_kill > 0 then
1428+
local kill_cmds = {}
1429+
for _, pid in ipairs(pids_to_kill) do
1430+
-- Kill children with SIGKILL
1431+
table.insert(kill_cmds, "pkill -KILL -P " .. pid)
1432+
-- Kill the process itself with SIGKILL
1433+
table.insert(kill_cmds, "kill -KILL " .. pid)
1434+
end
1435+
local cmd = table.concat(kill_cmds, "; ") .. "; true"
1436+
1437+
debug_file = io.open("/tmp/claudecode_cleanup_debug.log", "a")
1438+
if debug_file then
1439+
debug_file:write(os.date() .. " aggressive kill command: " .. cmd .. "\n")
1440+
debug_file:close()
1441+
end
1442+
1443+
vim.fn.system(cmd)
1444+
logger.debug("terminal", "cleanup_all: aggressively killed PIDs: " .. table.concat(pids_to_kill, ", "))
1445+
end
1446+
1447+
-- Stop jobs via Neovim API (all strategies except "none")
1448+
for _, job_id in ipairs(job_ids_to_stop) do
1449+
pcall(vim.fn.jobstop, job_id)
1450+
end
1451+
1452+
-- Clear tracked PIDs (update both local and global)
1453+
tracked_pids = {}
1454+
_G._claudecode_tracked_pids = tracked_pids
1455+
end
1456+
11981457
return M

lua/claudecode/terminal/external.lua

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,24 @@ function M.open(cmd_string, env_table)
137137
cleanup_state()
138138
return
139139
end
140+
141+
-- Track PID for cleanup on Neovim exit
142+
local terminal_ok, terminal_module = pcall(require, "claudecode.terminal")
143+
if terminal_ok and terminal_module.track_terminal_pid then
144+
terminal_module.track_terminal_pid(jobid)
145+
logger.debug("terminal", "Tracked external terminal PID for job_id: " .. tostring(jobid))
146+
end
140147
end
141148

142149
function M.close()
143150
if is_valid() then
144-
-- Try to stop the job gracefully
151+
-- Kill child processes first (Fix 2: same pattern as native/snacks)
152+
-- Shell wrappers like fish don't forward SIGTERM to child processes
153+
local pid_ok, pid = pcall(vim.fn.jobpid, jobid)
154+
if pid_ok and pid and pid > 0 then
155+
pcall(vim.fn.system, "pkill -TERM -P " .. pid .. " 2>/dev/null")
156+
end
157+
-- Then stop the job gracefully
145158
vim.fn.jobstop(jobid)
146159
cleanup_state()
147160
end

0 commit comments

Comments
 (0)