diff --git a/components/execd/pkg/runtime/command.go b/components/execd/pkg/runtime/command.go index 893956366..fcabe1414 100644 --- a/components/execd/pkg/runtime/command.go +++ b/components/execd/pkg/runtime/command.go @@ -170,7 +170,32 @@ func (c *Controller) runCommand(ctx context.Context, request *ExecuteCodeRequest safego.Go(func() { for { select { + case <-done: + // cmd.Wait() has returned (or start failed). The pid is + // about to be — or already has been — reaped, so we + // must not signal it. Execute()'s defer cancel() fires + // after every foreground command, including successful + // ones, so without this gate the SIGKILL below would + // run on a recycled pid/pgid and could kill an + // unrelated process group. + return case <-ctx.Done(): + // Re-check `done` to avoid a race with cmd.Wait() + // returning concurrently. If cmd.Wait() has just + // finished, the leader pid may be reaped and recycled + // at any moment; signaling -pid would then target a + // foreign process group. + select { + case <-done: + return + default: + } + // Genuine cancellation (timeout, client disconnect, + // Interrupt). Kill the whole process group so children + // don't outlive the cancelled context. + if cmd.Process != nil { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } return case sig := <-signals: if sig == nil { diff --git a/components/execd/pkg/runtime/command_signal_test.go b/components/execd/pkg/runtime/command_signal_test.go new file mode 100644 index 000000000..bd23b52d3 --- /dev/null +++ b/components/execd/pkg/runtime/command_signal_test.go @@ -0,0 +1,246 @@ +// Copyright 2025 Alibaba Group Holding Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !windows +// +build !windows + +package runtime + +import ( + "context" + "errors" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync" + "syscall" + "testing" + "time" + + "github.com/alibaba/opensandbox/execd/pkg/jupyter/execute" + "github.com/stretchr/testify/require" +) + +// TestRunCommand_CancelKillsChildren verifies that cancelling the context +// terminates not only the bash group leader but also its descendant +// processes. Regression test for +// https://github.com/alibaba/OpenSandbox/issues/922. +func TestRunCommand_CancelKillsChildren(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skip("bash not found in PATH") + } + + pidFile := filepath.Join(t.TempDir(), "child.pid") + + c := NewController("", "") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + started := make(chan struct{}) + var once sync.Once + + req := &ExecuteCodeRequest{ + // Spawn a sleep child, record its pid, then wait so the bash + // leader stays alive until the context is cancelled. + Code: `sleep 30 & echo $! > "` + pidFile + `"; echo READY; wait`, + Cwd: t.TempDir(), + Timeout: 30 * time.Second, + Hooks: ExecuteResultHook{ + OnExecuteInit: func(_ string) {}, + OnExecuteStdout: func(s string) { + if strings.TrimSpace(s) == "READY" { + once.Do(func() { close(started) }) + } + }, + OnExecuteStderr: func(_ string) {}, + OnExecuteError: func(_ *execute.ErrorOutput) {}, + OnExecuteComplete: func(_ time.Duration) {}, + }, + } + + done := make(chan struct{}) + go func() { + _ = c.runCommand(ctx, req) + close(done) + }() + + select { + case <-started: + case <-time.After(10 * time.Second): + cancel() + <-done + t.Fatal("command did not emit READY in time") + } + + pidBytes, err := os.ReadFile(pidFile) + require.NoError(t, err, "expected child pid file") + childPid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) + require.NoError(t, err) + require.Positive(t, childPid) + + require.NoError(t, syscall.Kill(childPid, 0), "child should be alive before cancel") + + cancel() + + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("runCommand did not return after cancel") + } + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if err := syscall.Kill(childPid, 0); err != nil { + require.True(t, errors.Is(err, syscall.ESRCH), + "unexpected liveness probe error: %v", err) + return + } + time.Sleep(50 * time.Millisecond) + } + t.Fatalf("child pid %d still alive 2s after cancel — process leak", childPid) +} + +// TestInterrupt_AfterFinished_ReturnsError verifies that an Interrupt +// arriving after the command has completed does not signal a recycled PID. +// Without this guard, group-wide kill would amplify the stale-PID hazard +// to every process in an unrelated process group. +func TestInterrupt_AfterFinished_ReturnsError(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skip("bash not found in PATH") + } + + c := NewController("", "") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + var session string + completeCh := make(chan struct{}, 1) + req := &ExecuteCodeRequest{ + Code: `echo done`, + Cwd: t.TempDir(), + Timeout: 5 * time.Second, + Hooks: ExecuteResultHook{ + OnExecuteInit: func(s string) { session = s }, + OnExecuteStdout: func(_ string) {}, + OnExecuteStderr: func(_ string) {}, + OnExecuteError: func(_ *execute.ErrorOutput) {}, + OnExecuteComplete: func(_ time.Duration) { completeCh <- struct{}{} }, + }, + } + require.NoError(t, c.runCommand(ctx, req)) + + select { + case <-completeCh: + case <-time.After(3 * time.Second): + t.Fatal("command did not complete in time") + } + require.NotEmpty(t, session) + + err := c.Interrupt(session) + require.Error(t, err, "Interrupt on finished session must error") + require.Contains(t, err.Error(), "not running") + + snap := c.commandSnapshot(session) + require.NotNil(t, snap) + require.False(t, snap.running, "running flag should be cleared") + require.Equal(t, 0, snap.pid, "pid should be cleared to avoid stale-PID kill") +} + +// TestKillPid_ZombieLeaderDoesNotFail verifies that killPid does not +// return an error when a group leader becomes a zombie before its parent +// has reaped it. kill(-pid, 0) keeps reporting the group as observable +// while the zombie lingers, but SIGKILL has already been delivered and +// the kernel will tear the group down once Wait() runs. Treating that +// state as a failure caused Interrupt to surface a 500 even though the +// kill succeeded. +func TestKillPid_ZombieLeaderDoesNotFail(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skip("bash not found in PATH") + } + + cmd := exec.Command("bash", "-c", `sleep 30 & wait`) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + require.NoError(t, cmd.Start()) + // Deliberately omit a reaper goroutine so the leader stays as a + // zombie after kill — that is the condition we want to exercise. + t.Cleanup(func() { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + _, _ = cmd.Process.Wait() + }) + + // Give bash a moment to spawn the sleep child so the group has more + // than just the leader. + time.Sleep(100 * time.Millisecond) + + c := &Controller{} + require.NoError(t, c.killPid(cmd.Process.Pid), + "slow post-SIGKILL teardown must not be reported as a hard failure") +} + +// TestKillPid_TerminatesEntireProcessGroup verifies that killPid signals +// the whole process group, not just the leader. Regression test for +// https://github.com/alibaba/OpenSandbox/issues/922. +func TestKillPid_TerminatesEntireProcessGroup(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skip("bash not found in PATH") + } + + pidFile := filepath.Join(t.TempDir(), "child.pid") + cmd := exec.Command("bash", "-c", + `sleep 30 & echo $! > "`+pidFile+`"; wait`) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + require.NoError(t, cmd.Start()) + // Reap the leader concurrently so it doesn't linger as a zombie that + // keeps the process group "alive" from killPid's liveness probe + // perspective. Mirrors how runCommand's cmd.Wait() reaps in production. + waitDone := make(chan struct{}) + go func() { + _, _ = cmd.Process.Wait() + close(waitDone) + }() + t.Cleanup(func() { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + <-waitDone + }) + + var childPid int + deadline := time.Now().Add(3 * time.Second) + for time.Now().Before(deadline) { + if data, err := os.ReadFile(pidFile); err == nil { + if pid, perr := strconv.Atoi(strings.TrimSpace(string(data))); perr == nil && pid > 0 { + childPid = pid + break + } + } + time.Sleep(50 * time.Millisecond) + } + require.Positive(t, childPid, "failed to capture child pid") + require.NoError(t, syscall.Kill(childPid, 0), "child should be alive before kill") + + c := &Controller{} + require.NoError(t, c.killPid(cmd.Process.Pid)) + + deadline = time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if err := syscall.Kill(childPid, 0); err != nil { + require.True(t, errors.Is(err, syscall.ESRCH), + "unexpected liveness probe error: %v", err) + return + } + time.Sleep(50 * time.Millisecond) + } + t.Fatalf("child pid %d still alive 2s after killPid — process leak", childPid) +} diff --git a/components/execd/pkg/runtime/command_status.go b/components/execd/pkg/runtime/command_status.go index 6dbc6d4f2..c0883d0fc 100644 --- a/components/execd/pkg/runtime/command_status.go +++ b/components/execd/pkg/runtime/command_status.go @@ -40,6 +40,9 @@ type CommandOutput struct { } func (c *Controller) commandSnapshot(session string) *commandKernel { + c.mu.RLock() + defer c.mu.RUnlock() + var kernel *commandKernel if v, ok := c.commandClientMap.Load(session); ok { kernel, _ = v.(*commandKernel) @@ -128,4 +131,8 @@ func (c *Controller) markCommandFinished(session string, exitCode int, errMsg st kernel.errMsg = errMsg kernel.running = false kernel.finishedAt = &now + // Clear the PID so a late or retried Interrupt cannot signal a recycled + // process. Group-wide kill would otherwise amplify the impact of a + // stale-PID hit to every process in the unrelated process group. + kernel.pid = 0 } diff --git a/components/execd/pkg/runtime/interrupt.go b/components/execd/pkg/runtime/interrupt.go index b9cd2a545..3419f1ae7 100644 --- a/components/execd/pkg/runtime/interrupt.go +++ b/components/execd/pkg/runtime/interrupt.go @@ -20,13 +20,9 @@ package runtime import ( "errors" "fmt" - "os" - "strings" "syscall" "time" - "github.com/alibaba/opensandbox/internal/safego" - "github.com/alibaba/opensandbox/execd/pkg/log" ) @@ -38,8 +34,16 @@ func (c *Controller) Interrupt(sessionID string) error { log.Warning("Interrupting Jupyter kernel %s", kernel.kernelID) return kernel.client.InterruptKernel(kernel.kernelID) case c.getCommandKernel(sessionID) != nil: - kernel := c.getCommandKernel(sessionID) - return c.killPid(kernel.pid) + // Snapshot under c.mu so running/pid are observed consistently with + // markCommandFinished. killPid signals the entire process group, so + // guarding against a stale PID is critical: a late Interrupt on a + // finished session must not blast SIGTERM/SIGKILL at an unrelated + // process group that has reused the PID. + snapshot := c.commandSnapshot(sessionID) + if snapshot == nil || !snapshot.running || snapshot.pid <= 0 { + return fmt.Errorf("command session %s is not running", sessionID) + } + return c.killPid(snapshot.pid) case c.getBashSession(sessionID) != nil: return c.closeBashSession(sessionID) default: @@ -48,53 +52,71 @@ func (c *Controller) Interrupt(sessionID string) error { } // killPid sends SIGTERM followed by SIGKILL if needed. +// +// Commands are launched with Setpgid: true, so pid is also the process group +// id. We signal the entire group via syscall.Kill(-pid, sig) so child and +// grandchild processes are terminated, not just the group leader. +// +// kill(2) on a process group only guarantees delivery to at least one +// member, and kill(-pid, 0) keeps reporting the group as observable while +// any unreaped zombie lingers. The probe loops below are therefore +// best-effort logging — once a kill signal has been delivered, a slow or +// asynchronous teardown is not treated as a hard failure that would +// surface as a 500 from Interrupt. func (c *Controller) killPid(pid int) error { - process, err := os.FindProcess(pid) - if err != nil { - return err + if pid <= 0 { + return fmt.Errorf("invalid pid %d", pid) } - log.Warning("Attempting to terminate process %d", pid) + log.Warning("Attempting to terminate process group %d", pid) - if err := process.Signal(syscall.SIGTERM); err != nil { - if strings.Contains(err.Error(), "already finished") { + sigtermDelivered := false + if err := syscall.Kill(-pid, syscall.SIGTERM); err != nil { + if errors.Is(err, syscall.ESRCH) { return nil } - log.Warning("SIGTERM failed for pid %d: %v, trying SIGKILL", pid, err) + log.Warning("SIGTERM failed for pgroup %d: %v, trying SIGKILL", pid, err) } else { - done := make(chan error, 1) - safego.Go(func() { - _, err := process.Wait() - done <- err - }) - - select { - case err := <-done: - if err == nil { - log.Info("Process %d terminated gracefully", pid) - return nil + sigtermDelivered = true + // Probe the group for liveness. os.Process.Wait() doesn't apply + // because the leader is not a child of this goroutine. + deadline := time.Now().Add(3 * time.Second) + for time.Now().Before(deadline) { + if err := syscall.Kill(-pid, 0); err != nil { + if errors.Is(err, syscall.ESRCH) { + log.Info("Process group %d terminated gracefully", pid) + return nil + } } - case <-time.After(3 * time.Second): - log.Warning("Process %d did not terminate after SIGTERM, using SIGKILL", pid) + time.Sleep(50 * time.Millisecond) } + log.Warning("Process group %d did not exit after SIGTERM, escalating to SIGKILL", pid) } - if err := process.Signal(syscall.SIGKILL); err != nil { - if strings.Contains(err.Error(), "already finished") { + if err := syscall.Kill(-pid, syscall.SIGKILL); err != nil { + if errors.Is(err, syscall.ESRCH) { + return nil + } + if sigtermDelivered { + // SIGTERM was already delivered to at least one member, so the + // kill is in flight. SIGKILL failure here is commonly EPERM on + // a group reduced to zombies — the kernel will reap them once + // the parent runs Wait(). Surface as a warning rather than a + // hard error. + log.Warning("SIGKILL on pgroup %d failed: %v; teardown likely already in progress", pid, err) return nil } - return fmt.Errorf("failed to kill process %d: %w", pid, err) + return fmt.Errorf("failed to kill process group %d: %w", pid, err) } for range 3 { - if err := process.Signal(syscall.Signal(0)); err != nil { - if strings.Contains(err.Error(), "already finished") || - strings.Contains(err.Error(), "no such process") { - log.Info("Process %d confirmed terminated", pid) + if err := syscall.Kill(-pid, 0); err != nil { + if errors.Is(err, syscall.ESRCH) { + log.Info("Process group %d confirmed terminated", pid) return nil } } time.Sleep(50 * time.Millisecond) } - - return fmt.Errorf("process %d might still be running", pid) + log.Warning("Process group %d still observable after SIGKILL; teardown may complete asynchronously", pid) + return nil } diff --git a/components/execd/pkg/runtime/interrupt_windows.go b/components/execd/pkg/runtime/interrupt_windows.go index 6e1044d77..bbcd3ccdb 100644 --- a/components/execd/pkg/runtime/interrupt_windows.go +++ b/components/execd/pkg/runtime/interrupt_windows.go @@ -35,8 +35,14 @@ func (c *Controller) Interrupt(sessionID string) error { log.Warning("Interrupting Jupyter kernel %s", kernel.kernelID) return kernel.client.InterruptKernel(kernel.kernelID) case c.getCommandKernel(sessionID) != nil: - kernel := c.getCommandKernel(sessionID) - return c.killPid(kernel.pid) + // Guard against a stale PID after the command has finished: the + // kernel is retained in commandClientMap, so a late Interrupt could + // otherwise terminate an unrelated process that reused the PID. + snapshot := c.commandSnapshot(sessionID) + if snapshot == nil || !snapshot.running || snapshot.pid <= 0 { + return fmt.Errorf("command session %s is not running", sessionID) + } + return c.killPid(snapshot.pid) default: return errors.New("no such session") }