Skip to content

Commit a9f9629

Browse files
authored
feat(sandbox): pre-pause guest reclaim via envd (#2551)
Adds an opt-in pre-pause step that runs `sync`, `drop_caches`, `compact_memory`, and `fstrim -av` on the live VM via envd's Process service to shrink the memfile/rootfs diff. Each step is wrapped in `timeout -s KILL` with its own cap, so a stuck step (most realistically a slow `sync` on a large dirty backlog) cannot starve the rest — and a killed step does not abort the chain (`;`-separated, not `&&`). Pausing FC is unaffected by an in-flight guest `sync` we time out: FC only drains in-flight virtio I/O before completing the pause; any unflushed dirty pages stay in the memfile snapshot and converge on resume. Per-step timeouts trade reclaim payoff, never correctness — `drop_caches` is documented non-destructive, `fstrim` consults FS allocation metadata not pagecache, and a partial `compact_memory` is just less-compacted. Disabled by default — the LD flag's null default leaves every step at 0 (skipped). Missing keys, zero, negative, and wrong-type values all collapse to "skip". The orchestrator skips the envd call entirely when the chain is empty. The outer `Connect-Timeout-Ms` is the sum of per-step caps plus a small slack. Single LD flag, one rule per cohort: - `guest-pause-reclaim` (JSON) — per-step caps in milliseconds keyed by step name, evaluated against sandbox / team / template LD contexts so targeting is configured in LaunchDarkly. Example value: ```json {"sync":500,"drop_caches":200,"compact_memory":1000,"fstrim":500} ``` `resume-build` exposes `-reclaim` to inject the example values into the offline LD store for local testing. Pairs cleanly with #2553 (disable proactive compaction in the guest base image), but is independent of it and of FPH (#2552). Split out from #2550.
1 parent cec0e81 commit a9f9629

5 files changed

Lines changed: 197 additions & 32 deletions

File tree

packages/orchestrator/cmd/resume-build/main.go

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"fmt"
88
"log"
99
"math"
10-
"net/http"
1110
"os"
1211
"os/signal"
1312
"path/filepath"
@@ -16,11 +15,11 @@ import (
1615
"syscall"
1716
"time"
1817

19-
"connectrpc.com/connect"
2018
"github.com/containernetworking/plugins/pkg/ns"
2119
"github.com/coreos/go-iptables/iptables"
2220
"github.com/google/uuid"
2321
"github.com/launchdarkly/go-sdk-common/v3/ldlog"
22+
"github.com/launchdarkly/go-sdk-common/v3/ldvalue"
2423
"github.com/vishvananda/netlink"
2524
"golang.org/x/sys/unix"
2625

@@ -39,11 +38,8 @@ import (
3938
"github.com/e2b-dev/infra/packages/orchestrator/pkg/tcpfirewall"
4039
"github.com/e2b-dev/infra/packages/orchestrator/pkg/template/build/core/rootfs"
4140
"github.com/e2b-dev/infra/packages/orchestrator/pkg/template/metadata"
42-
"github.com/e2b-dev/infra/packages/shared/pkg/consts"
4341
"github.com/e2b-dev/infra/packages/shared/pkg/featureflags"
44-
"github.com/e2b-dev/infra/packages/shared/pkg/grpc"
4542
"github.com/e2b-dev/infra/packages/shared/pkg/grpc/envd/process"
46-
"github.com/e2b-dev/infra/packages/shared/pkg/grpc/envd/process/processconnect"
4743
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
4844
sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox"
4945
"github.com/e2b-dev/infra/packages/shared/pkg/storage"
@@ -73,8 +69,20 @@ func main() {
7369
optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)")
7470
shell := flag.Bool("shell", false, "attach an interactive PTY shell via envd (no sshd required in the sandbox)")
7571

72+
// Enables the pre-pause reclaim chain with sensible per-step caps.
73+
reclaim := flag.Bool("reclaim", false, "enable pre-pause reclaim chain (fstrim 500ms, sync 500ms, drop_caches 200ms, compact 1s)")
74+
7675
flag.Parse()
7776

77+
if *reclaim {
78+
featureflags.NewJSONFlag("guest-pause-reclaim", ldvalue.FromJSONMarshal(map[string]int{
79+
"sync": 500,
80+
"drop_caches": 200,
81+
"compact_memory": 1000,
82+
"fstrim": 500,
83+
}))
84+
}
85+
7886
if *fromBuild == "" {
7987
log.Fatal("-from-build required")
8088
}
@@ -1184,32 +1192,15 @@ func printTemplateInfo(ctx context.Context, tmpl template.Template, meta metadat
11841192
}
11851193
}
11861194

1187-
// runCommandInSandbox runs a command inside the sandbox via envd
1188-
func runCommandInSandbox(ctx context.Context, sbx *sandbox.Sandbox, command string) error {
1189-
// Connect directly to envd on the sandbox
1190-
envdURL := fmt.Sprintf("http://%s:%d", sbx.Slot.HostIPString(), consts.DefaultEnvdServerPort)
1191-
1192-
hc := http.Client{
1193-
Timeout: 10 * time.Minute,
1194-
Transport: sandbox.SandboxHttpTransport,
1195-
}
1196-
1197-
processC := processconnect.NewProcessClient(&hc, envdURL)
1195+
// runCommandInSandboxTimeout caps how long a single resume-build command may
1196+
// run before envd kills it. Restores the prior 10-minute upper bound that the
1197+
// shared http.Client used to enforce, so a stuck command can't block the CLI.
1198+
const runCommandInSandboxTimeout = 10 * time.Minute
11981199

1199-
req := connect.NewRequest(&process.StartRequest{
1200-
Process: &process.ProcessConfig{
1201-
Cmd: "/bin/bash",
1202-
Args: []string{"-l", "-c", command},
1203-
},
1204-
})
1205-
grpc.SetUserHeader(req.Header(), "root")
1206-
1207-
// Set access token if available
1208-
if sbx.Config.Envd.AccessToken != nil {
1209-
req.Header().Set("X-Access-Token", *sbx.Config.Envd.AccessToken)
1210-
}
1211-
1212-
stream, err := processC.Start(ctx, req)
1200+
// runCommandInSandbox runs a command inside the sandbox via envd as a
1201+
// login shell so /etc/profile is sourced.
1202+
func runCommandInSandbox(ctx context.Context, sbx *sandbox.Sandbox, command string) error {
1203+
stream, err := sbx.StartEnvdShell(ctx, "/bin/bash", []string{"-l", "-c", command}, "root", runCommandInSandboxTimeout)
12131204
if err != nil {
12141205
return fmt.Errorf("failed to start process: %w", err)
12151206
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package sandbox
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
"strconv"
8+
"time"
9+
10+
"connectrpc.com/connect"
11+
12+
"github.com/e2b-dev/infra/packages/shared/pkg/consts"
13+
"github.com/e2b-dev/infra/packages/shared/pkg/grpc"
14+
"github.com/e2b-dev/infra/packages/shared/pkg/grpc/envd/process"
15+
"github.com/e2b-dev/infra/packages/shared/pkg/grpc/envd/process/processconnect"
16+
)
17+
18+
// StartEnvdShell opens a streaming Process.Start call against the sandbox's
19+
// envd. timeout > 0 sets Connect-Timeout-Ms so envd kills the process at
20+
// the deadline. Caller owns the returned stream.
21+
func (s *Sandbox) StartEnvdShell(
22+
ctx context.Context,
23+
shell string,
24+
shellArgs []string,
25+
user string,
26+
timeout time.Duration,
27+
) (*connect.ServerStreamForClient[process.StartResponse], error) {
28+
addr := fmt.Sprintf("http://%s:%d", s.Slot.HostIPString(), consts.DefaultEnvdServerPort)
29+
pc := processconnect.NewProcessClient(&http.Client{Transport: sandboxHttpClient.Transport}, addr)
30+
31+
req := connect.NewRequest(&process.StartRequest{
32+
Process: &process.ProcessConfig{Cmd: shell, Args: shellArgs},
33+
})
34+
if timeout > 0 {
35+
req.Header().Set("Connect-Timeout-Ms", strconv.FormatInt(timeout.Milliseconds(), 10))
36+
}
37+
if s.Config.Envd.AccessToken != nil {
38+
req.Header().Set("X-Access-Token", *s.Config.Envd.AccessToken)
39+
}
40+
grpc.SetUserHeader(req.Header(), user)
41+
42+
return pc.Start(ctx, req)
43+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package sandbox
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
"time"
8+
9+
"go.uber.org/zap"
10+
11+
"github.com/e2b-dev/infra/packages/shared/pkg/featureflags"
12+
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
13+
)
14+
15+
// Slack covers shell start + envd round-trip overhead.
16+
const reclaimOuterSlack = 500 * time.Millisecond
17+
18+
// Order: fstrim → sync → drop_caches → compact_memory. fstrim runs first so
19+
// that the fs metadata it pulls into the page cache and the superblock dirties
20+
// (last-trim timestamps) get flushed by sync and evicted by drop_caches in the
21+
// same pass; compact_memory then consolidates the minimal RSS so the snapshot
22+
// has long contiguous zero runs that compress well. Each step is disabled at
23+
// sub-ms cap. Returns ("", 0) when every step is disabled.
24+
func (s *Sandbox) buildReclaimScript(ctx context.Context) (string, time.Duration) {
25+
cfg := featureflags.GetReclaimConfig(ctx, s.featureFlags,
26+
featureflags.SandboxContext(s.Runtime.SandboxID),
27+
featureflags.TeamContext(s.Runtime.TeamID),
28+
featureflags.TemplateContext(s.Runtime.TemplateID),
29+
)
30+
31+
steps := []struct {
32+
cap time.Duration
33+
cmd string
34+
}{
35+
{cfg.Fstrim, "fstrim -av"},
36+
{cfg.Sync, "sync"},
37+
{cfg.DropCaches, "echo 3 > /proc/sys/vm/drop_caches"},
38+
{cfg.CompactMemory, "echo 1 > /proc/sys/vm/compact_memory"},
39+
}
40+
41+
var (
42+
parts []string
43+
sum time.Duration
44+
)
45+
for _, st := range steps {
46+
// %.3f at <1ms renders as 0.000 → GNU timeout reads as "no timeout".
47+
if st.cap < time.Millisecond {
48+
continue
49+
}
50+
parts = append(parts, fmt.Sprintf("timeout -s KILL %.3f sh -c %q >/dev/null 2>&1 || rc=$?", st.cap.Seconds(), st.cmd))
51+
sum += st.cap
52+
}
53+
if len(parts) == 0 {
54+
return "", 0
55+
}
56+
57+
return "rc=0; " + strings.Join(parts, "; ") + "; exit $rc", sum + reclaimOuterSlack
58+
}
59+
60+
// bestEffortReclaim runs the reclaim chain via envd before pause.
61+
func (s *Sandbox) bestEffortReclaim(ctx context.Context) {
62+
script, timeout := s.buildReclaimScript(ctx)
63+
if script == "" {
64+
return
65+
}
66+
67+
ctx, span := tracer.Start(ctx, "envd-reclaim")
68+
defer span.End()
69+
70+
rcCtx, cancel := context.WithTimeout(ctx, timeout)
71+
defer cancel()
72+
73+
stream, err := s.StartEnvdShell(rcCtx, "/bin/sh", []string{"-c", script}, "root", timeout)
74+
if err != nil {
75+
logger.L().Warn(ctx, "envd reclaim failed", logger.WithSandboxID(s.Runtime.SandboxID), zap.Error(err))
76+
77+
return
78+
}
79+
defer stream.Close()
80+
81+
var exitCode int32
82+
for stream.Receive() {
83+
if end := stream.Msg().GetEvent().GetEnd(); end != nil {
84+
exitCode = end.GetExitCode()
85+
}
86+
}
87+
if err := stream.Err(); err != nil {
88+
logger.L().Warn(ctx, "envd reclaim stream error", logger.WithSandboxID(s.Runtime.SandboxID), zap.Error(err))
89+
90+
return
91+
}
92+
if exitCode != 0 {
93+
logger.L().Warn(ctx, "envd reclaim non-zero exit", logger.WithSandboxID(s.Runtime.SandboxID), zap.Int32("exit_code", exitCode))
94+
}
95+
}

packages/orchestrator/pkg/sandbox/sandbox.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ type Sandbox struct {
217217
files *storage.SandboxFiles
218218
cleanup *Cleanup
219219

220+
featureFlags *featureflags.Client
221+
220222
process *fc.Process
221223
cgroupHandle *cgroup.CgroupHandle
222224

@@ -457,7 +459,8 @@ func (f *Factory) CreateSandbox(
457459
files: sandboxFiles,
458460
process: fcHandle,
459461

460-
cleanup: cleanup,
462+
cleanup: cleanup,
463+
featureFlags: f.featureFlags,
461464

462465
APIStoredConfig: apiConfigToStore,
463466

@@ -797,7 +800,8 @@ func (f *Factory) ResumeSandbox(
797800
files: sandboxFiles,
798801
process: fcHandle,
799802

800-
cleanup: cleanup,
803+
cleanup: cleanup,
804+
featureFlags: f.featureFlags,
801805

802806
APIStoredConfig: apiConfigToStore,
803807
CABundle: f.egressProxy.CABundle(),
@@ -1051,6 +1055,11 @@ func (s *Sandbox) Pause(
10511055
// Stop the health check before pausing the VM
10521056
s.Checks.Stop()
10531057

1058+
// Best-effort pre-pause guest reclaim (fstrim, sync, drop_caches,
1059+
// compact_memory) on the live VM via envd. Per-step caps are LD-flag-driven;
1060+
// all default to 0 which disables the chain entirely. Non-fatal.
1061+
s.bestEffortReclaim(ctx)
1062+
10541063
if err := s.process.Pause(ctx); err != nil {
10551064
return nil, fmt.Errorf("failed to pause VM: %w", err)
10561065
}

packages/shared/pkg/featureflags/flags.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"strings"
7+
"time"
78

89
"github.com/launchdarkly/go-sdk-common/v3/ldcontext"
910
"github.com/launchdarkly/go-sdk-common/v3/ldvalue"
@@ -213,6 +214,32 @@ var (
213214
MinChunkerReadSizeKB = NewIntFlag("min-chunker-read-size-kb", 16)
214215
)
215216

217+
// ReclaimConfigFlag holds per-step caps in milliseconds for the pre-pause
218+
// reclaim chain. Missing/zero/negative values disable the step.
219+
// Example: {"sync":500,"drop_caches":200,"compact_memory":1000,"fstrim":500}
220+
var ReclaimConfigFlag = NewJSONFlag("guest-pause-reclaim", ldvalue.Null())
221+
222+
type ReclaimConfig struct {
223+
Sync time.Duration
224+
DropCaches time.Duration
225+
CompactMemory time.Duration
226+
Fstrim time.Duration
227+
}
228+
229+
func GetReclaimConfig(ctx context.Context, ff *Client, contexts ...ldcontext.Context) ReclaimConfig {
230+
v := ff.JSONFlag(ctx, ReclaimConfigFlag, contexts...)
231+
ms := func(key string) time.Duration {
232+
return time.Duration(v.GetByKey(key).IntValue()) * time.Millisecond
233+
}
234+
235+
return ReclaimConfig{
236+
Sync: ms("sync"),
237+
DropCaches: ms("drop_caches"),
238+
CompactMemory: ms("compact_memory"),
239+
Fstrim: ms("fstrim"),
240+
}
241+
}
242+
216243
type StringFlag struct {
217244
name string
218245
fallback string

0 commit comments

Comments
 (0)