Skip to content

Commit a7e17f1

Browse files
committed
fix(agents): improve process termination with multi-layer kill strategy
Resolves #87 and #9 by implementing a robust three-tier process termination approach: 1. ProcessRegistry kill - primary method using run_id tracking 2. ClaudeProcessState kill - fallback via stored process handle 3. System kill command - last resort using PID and OS commands Key improvements: - Enhanced logging throughout termination flow for better debugging - Graceful fallback between termination methods - Proper UI state management even when backend termination fails - Track run_id in AgentExecution component for targeted process killing - Comprehensive error handling with user-friendly feedback - Consistent event emission for UI synchronization This ensures agents can be properly stopped without requiring application restart, addressing the core issue where STOP requests were ignored and processes continued running.
1 parent e8c54d7 commit a7e17f1

4 files changed

Lines changed: 170 additions & 42 deletions

File tree

src-tauri/src/commands/claude.rs

Lines changed: 76 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -917,50 +917,99 @@ pub async fn cancel_claude_execution(
917917
session_id
918918
);
919919

920-
let killed = if let Some(sid) = &session_id {
921-
// Try to find and kill via ProcessRegistry first
920+
let mut killed = false;
921+
let mut attempted_methods = Vec::new();
922+
923+
// Method 1: Try to find and kill via ProcessRegistry using session ID
924+
if let Some(sid) = &session_id {
922925
let registry = app.state::<crate::process::ProcessRegistryState>();
923-
if let Ok(Some(process_info)) = registry.0.get_claude_session_by_id(sid) {
924-
match registry.0.kill_process(process_info.run_id).await {
925-
Ok(success) => success,
926-
Err(e) => {
927-
log::warn!("Failed to kill via registry: {}", e);
928-
false
926+
match registry.0.get_claude_session_by_id(sid) {
927+
Ok(Some(process_info)) => {
928+
log::info!("Found process in registry for session {}: run_id={}, PID={}",
929+
sid, process_info.run_id, process_info.pid);
930+
match registry.0.kill_process(process_info.run_id).await {
931+
Ok(success) => {
932+
if success {
933+
log::info!("Successfully killed process via registry");
934+
killed = true;
935+
} else {
936+
log::warn!("Registry kill returned false");
937+
}
938+
}
939+
Err(e) => {
940+
log::warn!("Failed to kill via registry: {}", e);
941+
}
929942
}
943+
attempted_methods.push("registry");
944+
}
945+
Ok(None) => {
946+
log::warn!("Session {} not found in ProcessRegistry", sid);
947+
}
948+
Err(e) => {
949+
log::error!("Error querying ProcessRegistry: {}", e);
930950
}
931-
} else {
932-
false
933951
}
934-
} else {
935-
false
936-
};
952+
}
937953

938-
// If registry kill didn't work, try the legacy approach
954+
// Method 2: Try the legacy approach via ClaudeProcessState
939955
if !killed {
940956
let claude_state = app.state::<ClaudeProcessState>();
941957
let mut current_process = claude_state.current_process.lock().await;
942958

943959
if let Some(mut child) = current_process.take() {
944960
// Try to get the PID before killing
945961
let pid = child.id();
946-
log::info!("Attempting to kill Claude process with PID: {:?}", pid);
962+
log::info!("Attempting to kill Claude process via ClaudeProcessState with PID: {:?}", pid);
947963

948964
// Kill the process
949965
match child.kill().await {
950966
Ok(_) => {
951-
log::info!("Successfully killed Claude process");
967+
log::info!("Successfully killed Claude process via ClaudeProcessState");
968+
killed = true;
952969
}
953970
Err(e) => {
954-
log::error!("Failed to kill Claude process: {}", e);
955-
return Err(format!("Failed to kill Claude process: {}", e));
971+
log::error!("Failed to kill Claude process via ClaudeProcessState: {}", e);
972+
973+
// Method 3: If we have a PID, try system kill as last resort
974+
if let Some(pid) = pid {
975+
log::info!("Attempting system kill as last resort for PID: {}", pid);
976+
let kill_result = if cfg!(target_os = "windows") {
977+
std::process::Command::new("taskkill")
978+
.args(["/F", "/PID", &pid.to_string()])
979+
.output()
980+
} else {
981+
std::process::Command::new("kill")
982+
.args(["-KILL", &pid.to_string()])
983+
.output()
984+
};
985+
986+
match kill_result {
987+
Ok(output) if output.status.success() => {
988+
log::info!("Successfully killed process via system command");
989+
killed = true;
990+
}
991+
Ok(output) => {
992+
let stderr = String::from_utf8_lossy(&output.stderr);
993+
log::error!("System kill failed: {}", stderr);
994+
}
995+
Err(e) => {
996+
log::error!("Failed to execute system kill command: {}", e);
997+
}
998+
}
999+
}
9561000
}
9571001
}
1002+
attempted_methods.push("claude_state");
9581003
} else {
959-
log::warn!("No active Claude process to cancel");
1004+
log::warn!("No active Claude process in ClaudeProcessState");
9601005
}
9611006
}
9621007

963-
// Emit cancellation events
1008+
if !killed && attempted_methods.is_empty() {
1009+
log::warn!("No active Claude process found to cancel");
1010+
}
1011+
1012+
// Always emit cancellation events for UI consistency
9641013
if let Some(sid) = session_id {
9651014
let _ = app.emit(&format!("claude-cancelled:{}", sid), true);
9661015
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
@@ -972,6 +1021,12 @@ pub async fn cancel_claude_execution(
9721021
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
9731022
let _ = app.emit("claude-complete", false);
9741023

1024+
if killed {
1025+
log::info!("Claude process cancellation completed successfully");
1026+
} else if !attempted_methods.is_empty() {
1027+
log::warn!("Claude process cancellation attempted but process may have already exited. Attempted methods: {:?}", attempted_methods);
1028+
}
1029+
9751030
Ok(())
9761031
}
9771032

@@ -2063,3 +2118,4 @@ pub async fn track_session_messages(
20632118
}
20642119
Ok(())
20652120
}
2121+

src-tauri/src/process/registry.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ impl ProcessRegistry {
213213
if let Some(handle) = processes.get(&run_id) {
214214
(handle.info.pid, handle.child.clone())
215215
} else {
216+
warn!("Process {} not found in registry", run_id);
216217
return Ok(false); // Process not found
217218
}
218219
};
@@ -233,16 +234,25 @@ impl ProcessRegistry {
233234
}
234235
Err(e) => {
235236
error!("Failed to send kill signal to process {}: {}", run_id, e);
236-
return Err(format!("Failed to kill process: {}", e));
237+
// Don't return error here, try fallback method
238+
false
237239
}
238240
}
239241
} else {
240-
false // Process already killed
242+
warn!("No child handle available for process {} (PID: {}), attempting system kill", run_id, pid);
243+
false // Process handle not available, try fallback
241244
}
242245
};
243246

247+
// If direct kill didn't work, try system command as fallback
244248
if !kill_sent {
245-
return Ok(false);
249+
info!("Attempting fallback kill for process {} (PID: {})", run_id, pid);
250+
match self.kill_process_by_pid(run_id, pid) {
251+
Ok(true) => return Ok(true),
252+
Ok(false) => warn!("Fallback kill also failed for process {} (PID: {})", run_id, pid),
253+
Err(e) => error!("Error during fallback kill: {}", e),
254+
}
255+
// Continue with the rest of the cleanup even if fallback failed
246256
}
247257

248258
// Wait for the process to exit (with timeout)
@@ -297,6 +307,8 @@ impl ProcessRegistry {
297307
if let Ok(mut child_guard) = child_arc.lock() {
298308
*child_guard = None;
299309
}
310+
// One more attempt with system kill
311+
let _ = self.kill_process_by_pid(run_id, pid);
300312
}
301313
}
302314

src/components/AgentExecution.tsx

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
9292
const fullscreenMessagesEndRef = useRef<HTMLDivElement>(null);
9393
const unlistenRefs = useRef<UnlistenFn[]>([]);
9494
const elapsedTimeIntervalRef = useRef<NodeJS.Timeout | null>(null);
95+
const [runId, setRunId] = useState<number | null>(null);
9596

9697
// Filter out messages that shouldn't be displayed
9798
const displayableMessages = React.useMemo(() => {
@@ -266,24 +267,24 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
266267
};
267268

268269
const handleExecute = async () => {
269-
if (!projectPath || !task.trim()) return;
270-
271-
let runId: number | null = null;
272-
273270
try {
274271
setIsRunning(true);
275-
setError(null);
272+
setExecutionStartTime(Date.now());
276273
setMessages([]);
277274
setRawJsonlOutput([]);
278-
setExecutionStartTime(Date.now());
279-
setElapsedTime(0);
280-
setTotalTokens(0);
281-
282-
// Execute the agent with model override and get run ID
283-
runId = await api.executeAgent(agent.id!, projectPath, task, model);
275+
setRunId(null);
276+
277+
// Clear any existing listeners
278+
unlistenRefs.current.forEach(unlisten => unlisten());
279+
unlistenRefs.current = [];
280+
281+
// Execute the agent and get the run ID
282+
const executionRunId = await api.executeAgent(agent.id!, projectPath, task, model);
283+
console.log("Agent execution started with run ID:", executionRunId);
284+
setRunId(executionRunId);
284285

285286
// Set up event listeners with run ID isolation
286-
const outputUnlisten = await listen<string>(`agent-output:${runId}`, (event) => {
287+
const outputUnlisten = await listen<string>(`agent-output:${executionRunId}`, (event) => {
287288
try {
288289
// Store raw JSONL
289290
setRawJsonlOutput(prev => [...prev, event.payload]);
@@ -296,20 +297,20 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
296297
}
297298
});
298299

299-
const errorUnlisten = await listen<string>(`agent-error:${runId}`, (event) => {
300+
const errorUnlisten = await listen<string>(`agent-error:${executionRunId}`, (event) => {
300301
console.error("Agent error:", event.payload);
301302
setError(event.payload);
302303
});
303304

304-
const completeUnlisten = await listen<boolean>(`agent-complete:${runId}`, (event) => {
305+
const completeUnlisten = await listen<boolean>(`agent-complete:${executionRunId}`, (event) => {
305306
setIsRunning(false);
306307
setExecutionStartTime(null);
307308
if (!event.payload) {
308309
setError("Agent execution failed");
309310
}
310311
});
311312

312-
const cancelUnlisten = await listen<boolean>(`agent-cancelled:${runId}`, () => {
313+
const cancelUnlisten = await listen<boolean>(`agent-cancelled:${executionRunId}`, () => {
313314
setIsRunning(false);
314315
setExecutionStartTime(null);
315316
setError("Agent execution was cancelled");
@@ -318,16 +319,41 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
318319
unlistenRefs.current = [outputUnlisten, errorUnlisten, completeUnlisten, cancelUnlisten];
319320
} catch (err) {
320321
console.error("Failed to execute agent:", err);
321-
setError("Failed to execute agent");
322322
setIsRunning(false);
323323
setExecutionStartTime(null);
324+
setRunId(null);
325+
// Show error in messages
326+
setMessages(prev => [...prev, {
327+
type: "result",
328+
subtype: "error",
329+
is_error: true,
330+
result: `Failed to execute agent: ${err instanceof Error ? err.message : 'Unknown error'}`,
331+
duration_ms: 0,
332+
usage: {
333+
input_tokens: 0,
334+
output_tokens: 0
335+
}
336+
}]);
324337
}
325338
};
326339

327340
const handleStop = async () => {
328341
try {
329-
// TODO: Implement actual stop functionality via API
330-
// For now, just update the UI state
342+
if (!runId) {
343+
console.error("No run ID available to stop");
344+
return;
345+
}
346+
347+
// Call the API to kill the agent session
348+
const success = await api.killAgentSession(runId);
349+
350+
if (success) {
351+
console.log(`Successfully stopped agent session ${runId}`);
352+
} else {
353+
console.warn(`Failed to stop agent session ${runId} - it may have already finished`);
354+
}
355+
356+
// Update UI state
331357
setIsRunning(false);
332358
setExecutionStartTime(null);
333359

@@ -349,6 +375,22 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
349375
}]);
350376
} catch (err) {
351377
console.error("Failed to stop agent:", err);
378+
// Still update UI state even if the backend call failed
379+
setIsRunning(false);
380+
setExecutionStartTime(null);
381+
382+
// Show error message
383+
setMessages(prev => [...prev, {
384+
type: "result",
385+
subtype: "error",
386+
is_error: true,
387+
result: `Failed to stop execution: ${err instanceof Error ? err.message : 'Unknown error'}`,
388+
duration_ms: elapsedTime * 1000,
389+
usage: {
390+
input_tokens: totalTokens,
391+
output_tokens: 0
392+
}
393+
}]);
352394
}
353395
};
354396

src/components/ClaudeCodeSession.tsx

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,25 @@ export const ClaudeCodeSession: React.FC<ClaudeCodeSessionProps> = ({
606606
setError(null);
607607
} catch (err) {
608608
console.error("Failed to cancel execution:", err);
609-
setError("Failed to cancel execution");
609+
610+
// Even if backend fails, we should update UI to reflect stopped state
611+
// Add error message but still stop the UI loading state
612+
const errorMessage: ClaudeStreamMessage = {
613+
type: "system",
614+
subtype: "error",
615+
result: `Failed to cancel execution: ${err instanceof Error ? err.message : 'Unknown error'}. The process may still be running in the background.`,
616+
timestamp: new Date().toISOString()
617+
};
618+
setMessages(prev => [...prev, errorMessage]);
619+
620+
// Clean up listeners anyway
621+
unlistenRefs.current.forEach(unlisten => unlisten());
622+
unlistenRefs.current = [];
623+
624+
// Reset states to allow user to continue
625+
setIsLoading(false);
626+
hasActiveSessionRef.current = false;
627+
setError(null);
610628
} finally {
611629
setIsCancelling(false);
612630
}

0 commit comments

Comments
 (0)