diff --git a/.changeset/cold-pumas-hide.md b/.changeset/cold-pumas-hide.md new file mode 100644 index 0000000000..a2fb5b8d3c --- /dev/null +++ b/.changeset/cold-pumas-hide.md @@ -0,0 +1,6 @@ +--- +"@e2b/python-sdk": patch +"e2b": patch +--- + +Fix `commands.kill()` / command handle `kill()` leaving child processes running. envd's `SendSignal` only signals the single process it manages, so processes spawned by the command kept running (and kept consuming resources) after `kill()`. `kill()` now terminates the command together with its whole process tree. diff --git a/packages/js-sdk/src/sandbox/commands/index.ts b/packages/js-sdk/src/sandbox/commands/index.ts index 108a750739..bcf1219f20 100644 --- a/packages/js-sdk/src/sandbox/commands/index.ts +++ b/packages/js-sdk/src/sandbox/commands/index.ts @@ -1,10 +1,4 @@ -import { - Client, - Code, - ConnectError, - Transport, - createClient, -} from '@connectrpc/connect' +import { Client, Transport, createClient } from '@connectrpc/connect' import { compareVersions } from 'compare-versions' import { @@ -16,16 +10,40 @@ import { Username, } from '../../connectionConfig' import { handleProcessStartEvent } from '../../envd/api' -import { - Process as ProcessService, - Signal, -} from '../../envd/process/process_pb' +import { Process as ProcessService } from '../../envd/process/process_pb' import { authenticationHeader, handleRpcError } from '../../envd/rpc' import { ENVD_COMMANDS_STDIN, ENVD_ENVD_CLOSE } from '../../envd/versions' import { SandboxError } from '../../errors' import { CommandHandle, CommandResult } from './commandHandle' export { Pty } from './pty' +/** + * Build a shell command that terminates `pid` together with all of its + * descendant processes. + * + * envd's `SendSignal` RPC only delivers a signal to the single process it + * manages, so any child processes the command spawned keep running after a + * plain kill (see https://github.com/e2b-dev/E2B/issues/1034). This walks the + * process tree with `pgrep` and sends `SIGKILL` from the leaves up so the whole + * tree is reliably stopped. + * + * The command prints `1` if `pid` existed (so the caller can report whether a + * process was found) or `0` otherwise. It degrades gracefully to killing just + * `pid` if `pgrep` is unavailable in the sandbox. + */ +function killProcessTreeCommand(pid: number): string { + return ( + '__e2b_kill_tree() { ' + + 'local child; ' + + 'for child in $(pgrep -P "$1" 2>/dev/null); do __e2b_kill_tree "$child"; done; ' + + 'kill -KILL "$1" 2>/dev/null || true; ' + + '}; ' + + `if kill -0 ${pid} 2>/dev/null; then __e2b_found=1; else __e2b_found=0; fi; ` + + `__e2b_kill_tree ${pid}; ` + + 'echo "$__e2b_found"' + ) +} + /** * Options for sending a command request. */ @@ -259,7 +277,7 @@ export class Commands { /** * Kill a running command specified by its process ID. - * It uses `SIGKILL` signal to kill the command. + * It uses `SIGKILL` signal to kill the command and all of its child processes. * * @param pid process ID of the command. You can get the list of running commands using {@link Commands.list}. * @param opts connection options. @@ -267,35 +285,15 @@ export class Commands { * @returns `true` if the command was killed, `false` if the command was not found. */ async kill(pid: number, opts?: CommandRequestOpts): Promise { - try { - await this.rpc.sendSignal( - { - process: { - selector: { - case: 'pid', - value: pid, - }, - }, - signal: Signal.SIGKILL, - }, - { - signal: this.connectionConfig.getSignal( - opts?.requestTimeoutMs, - opts?.signal - ), - } - ) - - return true - } catch (err) { - if (err instanceof ConnectError) { - if (err.code === Code.NotFound) { - return false - } - } - - throw handleRpcError(err) - } + // envd's SendSignal RPC only signals the single process it manages, so any + // child processes the command spawned would keep running. Kill the whole + // process tree from inside the sandbox instead (see issue #1034). + const result = await this.run(killProcessTreeCommand(pid), { + requestTimeoutMs: opts?.requestTimeoutMs, + signal: opts?.signal, + }) + + return result.stdout.trim() === '1' } /** diff --git a/packages/js-sdk/tests/sandbox/commands/kill.test.ts b/packages/js-sdk/tests/sandbox/commands/kill.test.ts index 4eddcc4460..f8a31612e8 100644 --- a/packages/js-sdk/tests/sandbox/commands/kill.test.ts +++ b/packages/js-sdk/tests/sandbox/commands/kill.test.ts @@ -7,13 +7,40 @@ sandboxTest('kill process', async ({ sandbox }) => { const cmd = await sandbox.commands.run('sleep 10', { background: true }) const pid = cmd.pid - await sandbox.commands.kill(pid) + await expect(sandbox.commands.kill(pid)).resolves.toBe(true) await expect(sandbox.commands.run(`kill -0 ${pid}`)).rejects.toThrowError( ProcessExitError ) }) +sandboxTest('kill process tree', async ({ sandbox }) => { + // Regression test for https://github.com/e2b-dev/E2B/issues/1034 + // + // envd's SendSignal RPC only signals the single process it manages, so child + // processes the command spawned used to keep running after kill(). Killing the + // command must terminate its whole process tree. + const cmd = await sandbox.commands.run('sleep 120 & sleep 120 & wait', { + background: true, + }) + + // Capture the child PIDs while the leader is still alive. + const children = await sandbox.commands.run(`pgrep -P ${cmd.pid}`) + const childPids = children.stdout.split(/\s+/).filter(Boolean) + expect(childPids).toHaveLength(2) + + await expect(cmd.kill()).resolves.toBe(true) + + // The leader and every child must be gone. + await expect(sandbox.commands.run(`kill -0 ${cmd.pid}`)).rejects.toThrowError( + ProcessExitError + ) + const alive = await sandbox.commands.run( + `for p in ${childPids.join(' ')}; do kill -0 $p 2>/dev/null && echo $p; done; true` + ) + expect(alive.stdout.trim()).toBe('') +}) + sandboxTest('kill non-existing process', async ({ sandbox }) => { const nonExistingPid = 999999 diff --git a/packages/python-sdk/e2b/sandbox/commands/main.py b/packages/python-sdk/e2b/sandbox/commands/main.py index 67d52ed35e..dddac0007b 100644 --- a/packages/python-sdk/e2b/sandbox/commands/main.py +++ b/packages/python-sdk/e2b/sandbox/commands/main.py @@ -2,6 +2,33 @@ from typing import Dict, List, Optional +def kill_process_tree_command(pid: int) -> str: + """ + Build a shell command that terminates the process ``pid`` together with all + of its descendant processes. + + envd's ``SendSignal`` RPC only delivers a signal to the single process it + manages, so any child processes the command spawned keep running after a + plain ``kill`` (see https://github.com/e2b-dev/E2B/issues/1034). This walks + the process tree with ``pgrep`` and sends ``SIGKILL`` from the leaves up so + the whole tree is reliably stopped. + + The command prints ``1`` if ``pid`` existed (so the caller can report whether + a process was found) or ``0`` otherwise. It degrades gracefully to killing + just ``pid`` if ``pgrep`` is unavailable in the sandbox. + """ + return ( + "__e2b_kill_tree() { " + "local child; " + 'for child in $(pgrep -P "$1" 2>/dev/null); do __e2b_kill_tree "$child"; done; ' + 'kill -KILL "$1" 2>/dev/null || true; ' + "}; " + f"if kill -0 {pid} 2>/dev/null; then __e2b_found=1; else __e2b_found=0; fi; " + f"__e2b_kill_tree {pid}; " + 'echo "$__e2b_found"' + ) + + @dataclass class ProcessInfo: """ diff --git a/packages/python-sdk/e2b/sandbox_async/commands/command.py b/packages/python-sdk/e2b/sandbox_async/commands/command.py index 32b75fd26b..83cad56a16 100644 --- a/packages/python-sdk/e2b/sandbox_async/commands/command.py +++ b/packages/python-sdk/e2b/sandbox_async/commands/command.py @@ -1,6 +1,5 @@ from typing import Dict, List, Literal, Optional, Union, overload -import e2b_connect import httpcore from packaging.version import Version from e2b.connection_config import ( @@ -13,7 +12,7 @@ from e2b.envd.rpc import authentication_header, handle_rpc_exception from e2b.envd.versions import ENVD_COMMANDS_STDIN from e2b.exceptions import SandboxException -from e2b.sandbox.commands.main import ProcessInfo +from e2b.sandbox.commands.main import ProcessInfo, kill_process_tree_command from e2b.sandbox.commands.command_handle import CommandResult from e2b.sandbox_async.commands.command_handle import AsyncCommandHandle, Stderr, Stdout from e2b.sandbox_async.utils import OutputHandler @@ -81,29 +80,21 @@ async def kill( ) -> bool: """ Kill a running command specified by its process ID. - It uses `SIGKILL` signal to kill the command. + It uses `SIGKILL` signal to kill the command and all of its child processes. :param pid: Process ID of the command. You can get the list of processes using `sandbox.commands.list()` :param request_timeout: Timeout for the request in **seconds** :return: `True` if the command was killed, `False` if the command was not found """ - try: - await self._rpc.asend_signal( - process_pb2.SendSignalRequest( - process=process_pb2.ProcessSelector(pid=pid), - signal=process_pb2.Signal.SIGNAL_SIGKILL, - ), - request_timeout=self._connection_config.get_request_timeout( - request_timeout - ), - ) - return True - except Exception as e: - if isinstance(e, e2b_connect.ConnectException): - if e.status == e2b_connect.Code.not_found: - return False - raise handle_rpc_exception(e) + # envd's SendSignal RPC only signals the single process it manages, so any + # child processes the command spawned would keep running. Kill the whole + # process tree from inside the sandbox instead (see issue #1034). + result = await self.run( + kill_process_tree_command(pid), + request_timeout=request_timeout, + ) + return result.stdout.strip() == "1" async def send_stdin( self, diff --git a/packages/python-sdk/e2b/sandbox_sync/commands/command.py b/packages/python-sdk/e2b/sandbox_sync/commands/command.py index 512b7d9923..90d59fe388 100644 --- a/packages/python-sdk/e2b/sandbox_sync/commands/command.py +++ b/packages/python-sdk/e2b/sandbox_sync/commands/command.py @@ -1,6 +1,5 @@ from typing import Callable, Dict, List, Literal, Optional, Union, overload -import e2b_connect import httpcore from packaging.version import Version from e2b.connection_config import ( @@ -13,7 +12,7 @@ from e2b.envd.rpc import authentication_header, handle_rpc_exception from e2b.envd.versions import ENVD_COMMANDS_STDIN from e2b.exceptions import SandboxException -from e2b.sandbox.commands.main import ProcessInfo +from e2b.sandbox.commands.main import ProcessInfo, kill_process_tree_command from e2b.sandbox.commands.command_handle import CommandResult from e2b.sandbox_sync.commands.command_handle import CommandHandle @@ -80,29 +79,21 @@ def kill( ) -> bool: """ Kills a running command specified by its process ID. - It uses `SIGKILL` signal to kill the command. + It uses `SIGKILL` signal to kill the command and all of its child processes. :param pid: Process ID of the command. You can get the list of processes using `sandbox.commands.list()` :param request_timeout: Timeout for the request in **seconds** :return: `True` if the command was killed, `False` if the command was not found """ - try: - self._rpc.send_signal( - process_pb2.SendSignalRequest( - process=process_pb2.ProcessSelector(pid=pid), - signal=process_pb2.Signal.SIGNAL_SIGKILL, - ), - request_timeout=self._connection_config.get_request_timeout( - request_timeout - ), - ) - return True - except Exception as e: - if isinstance(e, e2b_connect.ConnectException): - if e.status == e2b_connect.Code.not_found: - return False - raise handle_rpc_exception(e) + # envd's SendSignal RPC only signals the single process it manages, so any + # child processes the command spawned would keep running. Kill the whole + # process tree from inside the sandbox instead (see issue #1034). + result = self.run( + kill_process_tree_command(pid), + request_timeout=request_timeout, + ) + return result.stdout.strip() == "1" def send_stdin( self, diff --git a/packages/python-sdk/tests/async/sandbox_async/commands/test_cmd_kill.py b/packages/python-sdk/tests/async/sandbox_async/commands/test_cmd_kill.py index a92f9b5a8a..663a84b15d 100644 --- a/packages/python-sdk/tests/async/sandbox_async/commands/test_cmd_kill.py +++ b/packages/python-sdk/tests/async/sandbox_async/commands/test_cmd_kill.py @@ -7,12 +7,39 @@ async def test_kill_process(async_sandbox: AsyncSandbox): cmd = await async_sandbox.commands.run("sleep 10", background=True) pid = cmd.pid - await async_sandbox.commands.kill(pid) + assert await async_sandbox.commands.kill(pid) with pytest.raises(CommandExitException): await async_sandbox.commands.run(f"kill -0 {pid}") +async def test_kill_process_tree(async_sandbox: AsyncSandbox): + # Regression test for https://github.com/e2b-dev/E2B/issues/1034 + # + # envd's SendSignal RPC only signals the single process it manages, so child + # processes the command spawned used to keep running after kill(). Killing the + # command must terminate its whole process tree. + cmd = await async_sandbox.commands.run( + "sleep 120 & sleep 120 & wait", + background=True, + ) + + # Capture the child PIDs while the leader is still alive. + children = await async_sandbox.commands.run(f"pgrep -P {cmd.pid}") + child_pids = children.stdout.split() + assert len(child_pids) == 2 + + assert await cmd.kill() + + # The leader and every child must be gone. + with pytest.raises(CommandExitException): + await async_sandbox.commands.run(f"kill -0 {cmd.pid}") + alive = await async_sandbox.commands.run( + f"for p in {' '.join(child_pids)}; do kill -0 $p 2>/dev/null && echo $p; done; true" + ) + assert alive.stdout.strip() == "" + + async def test_kill_non_existing_process(async_sandbox: AsyncSandbox): non_existing_pid = 999999 diff --git a/packages/python-sdk/tests/sync/sandbox_sync/commands/test_cmd_kill.py b/packages/python-sdk/tests/sync/sandbox_sync/commands/test_cmd_kill.py index 45e7ca27bd..2c0d239cf8 100644 --- a/packages/python-sdk/tests/sync/sandbox_sync/commands/test_cmd_kill.py +++ b/packages/python-sdk/tests/sync/sandbox_sync/commands/test_cmd_kill.py @@ -7,12 +7,39 @@ def test_kill_process(sandbox: Sandbox): cmd = sandbox.commands.run("sleep 10", background=True) pid = cmd.pid - sandbox.commands.kill(pid) + assert sandbox.commands.kill(pid) with pytest.raises(CommandExitException): sandbox.commands.run(f"kill -0 {pid}") +def test_kill_process_tree(sandbox: Sandbox): + # Regression test for https://github.com/e2b-dev/E2B/issues/1034 + # + # envd's SendSignal RPC only signals the single process it manages, so child + # processes the command spawned used to keep running after kill(). Killing the + # command must terminate its whole process tree. + cmd = sandbox.commands.run( + "sleep 120 & sleep 120 & wait", + background=True, + ) + + # Capture the child PIDs while the leader is still alive. + children = sandbox.commands.run(f"pgrep -P {cmd.pid}") + child_pids = children.stdout.split() + assert len(child_pids) == 2 + + assert cmd.kill() + + # The leader and every child must be gone. + with pytest.raises(CommandExitException): + sandbox.commands.run(f"kill -0 {cmd.pid}") + alive = sandbox.commands.run( + f"for p in {' '.join(child_pids)}; do kill -0 $p 2>/dev/null && echo $p; done; true" + ) + assert alive.stdout.strip() == "" + + def test_kill_non_existing_process(sandbox): non_existing_pid = 999999