From c4098f194e9663cc0bb56c74aa8eb2c04e1f0bd2 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 17 May 2026 00:32:10 -0700 Subject: [PATCH 1/7] feat(envd): run envd at SCHED_FIFO 1, reset user processes via wrapper Mirrors envd's existing Nice=-20 CPU priority with a real-time scheduling class so envd preempts customer SCHED_OTHER work during pause/resume storms. Uses the lowest RT priority (1) so envd cannot starve kernel threads or other RT services; the default kernel.sched_rt_runtime_us throttle (95% per 1s) caps total RT bandwidth so a hypothetical envd busy-loop cannot DoS the system. User-spawned processes are reset to SCHED_OTHER (nice 0, no ambient caps) via the existing /bin/sh wrapper: CAP_SYS_NICE is passed through setuid via SysProcAttr.AmbientCaps so chrt(1) can drop the RT policy, then setpriv(1) strips the ambient cap so the user command cannot re-raise itself. socat intentionally inherits SCHED_FIFO + Nice=-20: port forwarding is infrastructure-critical and dropping connections under load is much worse than the small RT budget cost. --- packages/envd/internal/port/forward.go | 3 +++ .../services/process/handler/handler.go | 23 +++++++++++++++---- .../process/handler/handler_caps_linux.go | 13 +++++++++++ .../process/handler/handler_caps_other.go | 7 ++++++ .../build/core/rootfs/files/envd.service.tpl | 8 +++++++ 5 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 packages/envd/internal/services/process/handler/handler_caps_linux.go create mode 100644 packages/envd/internal/services/process/handler/handler_caps_other.go diff --git a/packages/envd/internal/port/forward.go b/packages/envd/internal/port/forward.go index eaf8978f7a..2b364c8d9c 100644 --- a/packages/envd/internal/port/forward.go +++ b/packages/envd/internal/port/forward.go @@ -143,6 +143,9 @@ func (f *Forwarder) startPortForwarding(ctx context.Context, p *PortToForward) { cgroupFD, ok := f.cgroupManager.GetFileDescriptor(cgroups.ProcessTypeSocat) + // socat intentionally inherits envd's SCHED_FIFO + Nice=-20 — port + // forwarding is infrastructure-critical and dropping connections under + // load is much worse than the small RT budget cost. cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, } diff --git a/packages/envd/internal/services/process/handler/handler.go b/packages/envd/internal/services/process/handler/handler.go index 82e17d2796..2ec39294f4 100644 --- a/packages/envd/internal/services/process/handler/handler.go +++ b/packages/envd/internal/services/process/handler/handler.go @@ -96,12 +96,20 @@ func New( // User command string for logging (without the internal wrapper details). userCmd := strings.Join(append([]string{req.GetProcess().GetCmd()}, req.GetProcess().GetArgs()...), " ") - // Wrap the command in a shell that sets the OOM score and nice value before exec-ing the actual command. - // This eliminates the race window where grandchildren could inherit the parent's protected OOM score (-1000) - // or high CPU priority (nice -20) before the post-start calls had a chance to correct them. - // nice(1) applies a relative adjustment, so we compute the delta from the current (inherited) nice to the target. + // Wrap the command in a shell that resets envd's elevated priorities before exec-ing the actual command. + // This eliminates the race window where grandchildren could inherit envd's protected OOM score (-1000), + // real-time CPU class (SCHED_FIFO 1) or CPU priority (nice -20) before any post-start fixup runs. + // chrt(1) lowers SCHED_FIFO back to SCHED_OTHER. It needs CAP_SYS_NICE, which is supplied via + // SysProcAttr.AmbientCaps below; setpriv(1) then strips the ambient cap so the final user command + // cannot raise itself back to RT. + // nice(1) applies a relative adjustment, so we compute the delta from the current (inherited) nice + // to the target (0). This is a no-op while the process is still SCHED_FIFO (RT processes ignore + // nice), but takes effect once chrt has switched the policy to SCHED_OTHER. niceDelta := defaultNice - currentNice() - oomWrapperScript := fmt.Sprintf(`echo %d > /proc/$$/oom_score_adj && exec /usr/bin/nice -n %d "${@}"`, defaultOomScore, niceDelta) + oomWrapperScript := fmt.Sprintf( + `echo %d > /proc/$$/oom_score_adj && exec /usr/bin/chrt --other 0 /usr/bin/setpriv --ambient-caps -all -- /usr/bin/nice -n %d "${@}"`, + defaultOomScore, niceDelta, + ) wrapperArgs := append([]string{"-c", oomWrapperScript, "--", req.GetProcess().GetCmd()}, req.GetProcess().GetArgs()...) cmd := exec.CommandContext(ctx, "/bin/sh", wrapperArgs...) @@ -131,6 +139,11 @@ func New( }, } applyCgroupFD(cmd.SysProcAttr, cgroupFD, ok) + // Pass CAP_SYS_NICE through setuid so the wrapper can run + // `chrt --other 0` (lowering from SCHED_FIFO to SCHED_OTHER requires it). + // setpriv in the wrapper drops this from the ambient set before the user + // command is exec-ed. No-op on non-Linux platforms. + applyAmbientCapSysNice(cmd.SysProcAttr) resolvedPath, err := permissions.ExpandAndResolve(req.GetProcess().GetCwd(), user, defaults.Workdir) if err != nil { diff --git a/packages/envd/internal/services/process/handler/handler_caps_linux.go b/packages/envd/internal/services/process/handler/handler_caps_linux.go new file mode 100644 index 0000000000..0b367ee841 --- /dev/null +++ b/packages/envd/internal/services/process/handler/handler_caps_linux.go @@ -0,0 +1,13 @@ +//go:build linux + +package handler + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +func applyAmbientCapSysNice(attr *syscall.SysProcAttr) { + attr.AmbientCaps = append(attr.AmbientCaps, unix.CAP_SYS_NICE) +} diff --git a/packages/envd/internal/services/process/handler/handler_caps_other.go b/packages/envd/internal/services/process/handler/handler_caps_other.go new file mode 100644 index 0000000000..501c32dc8e --- /dev/null +++ b/packages/envd/internal/services/process/handler/handler_caps_other.go @@ -0,0 +1,7 @@ +//go:build !linux + +package handler + +import "syscall" + +func applyAmbientCapSysNice(_ *syscall.SysProcAttr) {} diff --git a/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl b/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl index 3fc1eab0fc..229fd7ffb9 100644 --- a/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl +++ b/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl @@ -17,6 +17,14 @@ LimitCORE=infinity ExecStartPre=/bin/sh -c 'mountpoint -q /etc/ssl/certs || (mkdir -p /run/e2b/certs && mount --bind /run/e2b/certs /etc/ssl/certs) && ([ -s /etc/ssl/certs/ca-certificates.crt ] || update-ca-certificates)' ExecStart=/bin/bash -l -c "/usr/bin/envd" Nice=-20 +# Realtime CPU scheduling with the lowest RT priority (1) so envd preempts +# user-space SCHED_OTHER work but cannot starve higher-priority kernel +# threads or other RT services. The default kernel.sched_rt_runtime_us +# throttle (95% per 1s) caps total RT bandwidth so envd cannot DoS the +# system even if it loops. User processes spawned by envd are reset to +# SCHED_OTHER via the in-process wrapper using AmbientCaps + chrt(1). +CPUSchedulingPolicy=fifo +CPUSchedulingPriority=1 OOMPolicy=continue OOMScoreAdjust=-1000 Environment="GOMEMLIMIT={{ .MemoryLimit }}MiB" From bad68076c9d9c17994525ab75358fa8d385d91ea Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 17 May 2026 02:33:05 -0700 Subject: [PATCH 2/7] chore: trim verbose comments --- packages/envd/internal/port/forward.go | 4 +--- .../services/process/handler/handler.go | 18 ++++-------------- .../build/core/rootfs/files/envd.service.tpl | 6 ------ 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/packages/envd/internal/port/forward.go b/packages/envd/internal/port/forward.go index 2b364c8d9c..709b79dfc4 100644 --- a/packages/envd/internal/port/forward.go +++ b/packages/envd/internal/port/forward.go @@ -143,9 +143,7 @@ func (f *Forwarder) startPortForwarding(ctx context.Context, p *PortToForward) { cgroupFD, ok := f.cgroupManager.GetFileDescriptor(cgroups.ProcessTypeSocat) - // socat intentionally inherits envd's SCHED_FIFO + Nice=-20 — port - // forwarding is infrastructure-critical and dropping connections under - // load is much worse than the small RT budget cost. + // socat intentionally inherits envd's SCHED_FIFO + Nice=-20 (infra-critical). cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, } diff --git a/packages/envd/internal/services/process/handler/handler.go b/packages/envd/internal/services/process/handler/handler.go index 2ec39294f4..d305d012e3 100644 --- a/packages/envd/internal/services/process/handler/handler.go +++ b/packages/envd/internal/services/process/handler/handler.go @@ -96,15 +96,8 @@ func New( // User command string for logging (without the internal wrapper details). userCmd := strings.Join(append([]string{req.GetProcess().GetCmd()}, req.GetProcess().GetArgs()...), " ") - // Wrap the command in a shell that resets envd's elevated priorities before exec-ing the actual command. - // This eliminates the race window where grandchildren could inherit envd's protected OOM score (-1000), - // real-time CPU class (SCHED_FIFO 1) or CPU priority (nice -20) before any post-start fixup runs. - // chrt(1) lowers SCHED_FIFO back to SCHED_OTHER. It needs CAP_SYS_NICE, which is supplied via - // SysProcAttr.AmbientCaps below; setpriv(1) then strips the ambient cap so the final user command - // cannot raise itself back to RT. - // nice(1) applies a relative adjustment, so we compute the delta from the current (inherited) nice - // to the target (0). This is a no-op while the process is still SCHED_FIFO (RT processes ignore - // nice), but takes effect once chrt has switched the policy to SCHED_OTHER. + // Wrap in a shell that resets oom_score_adj, lowers SCHED_FIFO to SCHED_OTHER (chrt, needs + // CAP_SYS_NICE supplied via AmbientCaps), drops the ambient cap (setpriv), and resets nice. niceDelta := defaultNice - currentNice() oomWrapperScript := fmt.Sprintf( `echo %d > /proc/$$/oom_score_adj && exec /usr/bin/chrt --other 0 /usr/bin/setpriv --ambient-caps -all -- /usr/bin/nice -n %d "${@}"`, @@ -139,11 +132,8 @@ func New( }, } applyCgroupFD(cmd.SysProcAttr, cgroupFD, ok) - // Pass CAP_SYS_NICE through setuid so the wrapper can run - // `chrt --other 0` (lowering from SCHED_FIFO to SCHED_OTHER requires it). - // setpriv in the wrapper drops this from the ambient set before the user - // command is exec-ed. No-op on non-Linux platforms. - applyAmbientCapSysNice(cmd.SysProcAttr) + // CAP_SYS_NICE so the wrapper's chrt(1) can drop SCHED_FIFO to SCHED_OTHER. + applyAmbientCapSysNice(cmd.SysProcAttr) resolvedPath, err := permissions.ExpandAndResolve(req.GetProcess().GetCwd(), user, defaults.Workdir) if err != nil { diff --git a/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl b/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl index 229fd7ffb9..3bbce29ea3 100644 --- a/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl +++ b/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl @@ -17,12 +17,6 @@ LimitCORE=infinity ExecStartPre=/bin/sh -c 'mountpoint -q /etc/ssl/certs || (mkdir -p /run/e2b/certs && mount --bind /run/e2b/certs /etc/ssl/certs) && ([ -s /etc/ssl/certs/ca-certificates.crt ] || update-ca-certificates)' ExecStart=/bin/bash -l -c "/usr/bin/envd" Nice=-20 -# Realtime CPU scheduling with the lowest RT priority (1) so envd preempts -# user-space SCHED_OTHER work but cannot starve higher-priority kernel -# threads or other RT services. The default kernel.sched_rt_runtime_us -# throttle (95% per 1s) caps total RT bandwidth so envd cannot DoS the -# system even if it loops. User processes spawned by envd are reset to -# SCHED_OTHER via the in-process wrapper using AmbientCaps + chrt(1). CPUSchedulingPolicy=fifo CPUSchedulingPriority=1 OOMPolicy=continue From d02cab55df9f272d2d3b737b241ce568bef32402 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 17 May 2026 09:38:31 +0000 Subject: [PATCH 3/7] chore: auto-commit generated changes --- packages/envd/internal/services/process/handler/handler.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/envd/internal/services/process/handler/handler.go b/packages/envd/internal/services/process/handler/handler.go index d305d012e3..1360ba222f 100644 --- a/packages/envd/internal/services/process/handler/handler.go +++ b/packages/envd/internal/services/process/handler/handler.go @@ -132,8 +132,8 @@ func New( }, } applyCgroupFD(cmd.SysProcAttr, cgroupFD, ok) - // CAP_SYS_NICE so the wrapper's chrt(1) can drop SCHED_FIFO to SCHED_OTHER. - applyAmbientCapSysNice(cmd.SysProcAttr) + // CAP_SYS_NICE so the wrapper's chrt(1) can drop SCHED_FIFO to SCHED_OTHER. + applyAmbientCapSysNice(cmd.SysProcAttr) resolvedPath, err := permissions.ExpandAndResolve(req.GetProcess().GetCwd(), user, defaults.Workdir) if err != nil { From 70c1d4c944639ef44feb69f82ac291bd95c5d28e Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 17 May 2026 23:04:19 -0700 Subject: [PATCH 4/7] polish: rename wrapper var + tighten comments --- .../internal/services/process/handler/handler.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/envd/internal/services/process/handler/handler.go b/packages/envd/internal/services/process/handler/handler.go index 1360ba222f..393d9e6452 100644 --- a/packages/envd/internal/services/process/handler/handler.go +++ b/packages/envd/internal/services/process/handler/handler.go @@ -96,14 +96,16 @@ func New( // User command string for logging (without the internal wrapper details). userCmd := strings.Join(append([]string{req.GetProcess().GetCmd()}, req.GetProcess().GetArgs()...), " ") - // Wrap in a shell that resets oom_score_adj, lowers SCHED_FIFO to SCHED_OTHER (chrt, needs - // CAP_SYS_NICE supplied via AmbientCaps), drops the ambient cap (setpriv), and resets nice. + // Reset everything we inherit from envd so the child runs at user + // priority: write oom_score_adj, then chrt SCHED_FIFO->SCHED_OTHER + // (needs CAP_SYS_NICE supplied via AmbientCaps below), setpriv to drop + // that cap, and finally nice for SCHED_OTHER weight. niceDelta := defaultNice - currentNice() - oomWrapperScript := fmt.Sprintf( + wrapperScript := fmt.Sprintf( `echo %d > /proc/$$/oom_score_adj && exec /usr/bin/chrt --other 0 /usr/bin/setpriv --ambient-caps -all -- /usr/bin/nice -n %d "${@}"`, defaultOomScore, niceDelta, ) - wrapperArgs := append([]string{"-c", oomWrapperScript, "--", req.GetProcess().GetCmd()}, req.GetProcess().GetArgs()...) + wrapperArgs := append([]string{"-c", wrapperScript, "--", req.GetProcess().GetCmd()}, req.GetProcess().GetArgs()...) cmd := exec.CommandContext(ctx, "/bin/sh", wrapperArgs...) uid, gid, err := permissions.GetUserIdUints(user) @@ -132,8 +134,7 @@ func New( }, } applyCgroupFD(cmd.SysProcAttr, cgroupFD, ok) - // CAP_SYS_NICE so the wrapper's chrt(1) can drop SCHED_FIFO to SCHED_OTHER. - applyAmbientCapSysNice(cmd.SysProcAttr) + applyAmbientCapSysNice(cmd.SysProcAttr) // chrt(1) needs CAP_SYS_NICE to drop SCHED_FIFO resolvedPath, err := permissions.ExpandAndResolve(req.GetProcess().GetCwd(), user, defaults.Workdir) if err != nil { From 1acfcc4e9361dcaea17f0578239d86692b21e3e7 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 17 May 2026 23:11:11 -0700 Subject: [PATCH 5/7] polish: trim comments --- packages/envd/internal/port/forward.go | 2 +- packages/envd/internal/services/process/handler/handler.go | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/packages/envd/internal/port/forward.go b/packages/envd/internal/port/forward.go index 709b79dfc4..f3f945692b 100644 --- a/packages/envd/internal/port/forward.go +++ b/packages/envd/internal/port/forward.go @@ -143,7 +143,7 @@ func (f *Forwarder) startPortForwarding(ctx context.Context, p *PortToForward) { cgroupFD, ok := f.cgroupManager.GetFileDescriptor(cgroups.ProcessTypeSocat) - // socat intentionally inherits envd's SCHED_FIFO + Nice=-20 (infra-critical). + // socat keeps envd's SCHED_FIFO + Nice=-20 by design. cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, } diff --git a/packages/envd/internal/services/process/handler/handler.go b/packages/envd/internal/services/process/handler/handler.go index 393d9e6452..de0573a414 100644 --- a/packages/envd/internal/services/process/handler/handler.go +++ b/packages/envd/internal/services/process/handler/handler.go @@ -96,10 +96,8 @@ func New( // User command string for logging (without the internal wrapper details). userCmd := strings.Join(append([]string{req.GetProcess().GetCmd()}, req.GetProcess().GetArgs()...), " ") - // Reset everything we inherit from envd so the child runs at user - // priority: write oom_score_adj, then chrt SCHED_FIFO->SCHED_OTHER - // (needs CAP_SYS_NICE supplied via AmbientCaps below), setpriv to drop - // that cap, and finally nice for SCHED_OTHER weight. + // Reset oom_score_adj, drop SCHED_FIFO via chrt, drop the SYS_NICE + // ambient cap, then apply nice. niceDelta := defaultNice - currentNice() wrapperScript := fmt.Sprintf( `echo %d > /proc/$$/oom_score_adj && exec /usr/bin/chrt --other 0 /usr/bin/setpriv --ambient-caps -all -- /usr/bin/nice -n %d "${@}"`, From fbce53f61e090eac1db205aa20554efd74645001 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Mon, 18 May 2026 01:10:39 -0700 Subject: [PATCH 6/7] use systemd AmbientCapabilities; drop Go-side AmbientCaps --- .../internal/services/process/handler/handler.go | 1 - .../services/process/handler/handler_caps_linux.go | 13 ------------- .../services/process/handler/handler_caps_other.go | 7 ------- .../build/core/rootfs/files/envd.service.tpl | 1 + 4 files changed, 1 insertion(+), 21 deletions(-) delete mode 100644 packages/envd/internal/services/process/handler/handler_caps_linux.go delete mode 100644 packages/envd/internal/services/process/handler/handler_caps_other.go diff --git a/packages/envd/internal/services/process/handler/handler.go b/packages/envd/internal/services/process/handler/handler.go index de0573a414..bb90ffc220 100644 --- a/packages/envd/internal/services/process/handler/handler.go +++ b/packages/envd/internal/services/process/handler/handler.go @@ -132,7 +132,6 @@ func New( }, } applyCgroupFD(cmd.SysProcAttr, cgroupFD, ok) - applyAmbientCapSysNice(cmd.SysProcAttr) // chrt(1) needs CAP_SYS_NICE to drop SCHED_FIFO resolvedPath, err := permissions.ExpandAndResolve(req.GetProcess().GetCwd(), user, defaults.Workdir) if err != nil { diff --git a/packages/envd/internal/services/process/handler/handler_caps_linux.go b/packages/envd/internal/services/process/handler/handler_caps_linux.go deleted file mode 100644 index 0b367ee841..0000000000 --- a/packages/envd/internal/services/process/handler/handler_caps_linux.go +++ /dev/null @@ -1,13 +0,0 @@ -//go:build linux - -package handler - -import ( - "syscall" - - "golang.org/x/sys/unix" -) - -func applyAmbientCapSysNice(attr *syscall.SysProcAttr) { - attr.AmbientCaps = append(attr.AmbientCaps, unix.CAP_SYS_NICE) -} diff --git a/packages/envd/internal/services/process/handler/handler_caps_other.go b/packages/envd/internal/services/process/handler/handler_caps_other.go deleted file mode 100644 index 501c32dc8e..0000000000 --- a/packages/envd/internal/services/process/handler/handler_caps_other.go +++ /dev/null @@ -1,7 +0,0 @@ -//go:build !linux - -package handler - -import "syscall" - -func applyAmbientCapSysNice(_ *syscall.SysProcAttr) {} diff --git a/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl b/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl index 3bbce29ea3..8843b3c7dc 100644 --- a/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl +++ b/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl @@ -19,6 +19,7 @@ ExecStart=/bin/bash -l -c "/usr/bin/envd" Nice=-20 CPUSchedulingPolicy=fifo CPUSchedulingPriority=1 +AmbientCapabilities=CAP_SYS_NICE OOMPolicy=continue OOMScoreAdjust=-1000 Environment="GOMEMLIMIT={{ .MemoryLimit }}MiB" From 27c8fa037acaea7670792fb6aedaf04692aec0d4 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Mon, 18 May 2026 01:30:48 -0700 Subject: [PATCH 7/7] debug: temporarily remove SCHED_FIFO directives to isolate CI failure --- .../pkg/template/build/core/rootfs/files/envd.service.tpl | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl b/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl index 8843b3c7dc..3fc1eab0fc 100644 --- a/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl +++ b/packages/orchestrator/pkg/template/build/core/rootfs/files/envd.service.tpl @@ -17,9 +17,6 @@ LimitCORE=infinity ExecStartPre=/bin/sh -c 'mountpoint -q /etc/ssl/certs || (mkdir -p /run/e2b/certs && mount --bind /run/e2b/certs /etc/ssl/certs) && ([ -s /etc/ssl/certs/ca-certificates.crt ] || update-ca-certificates)' ExecStart=/bin/bash -l -c "/usr/bin/envd" Nice=-20 -CPUSchedulingPolicy=fifo -CPUSchedulingPriority=1 -AmbientCapabilities=CAP_SYS_NICE OOMPolicy=continue OOMScoreAdjust=-1000 Environment="GOMEMLIMIT={{ .MemoryLimit }}MiB"