Skip to content

Commit acc8ac4

Browse files
authored
Use UFFD only for initial snapshot fork restores (#270)
* Use UFFD only for initial snapshot fork restore * Defer UFFD fork memory clone until standby * Document one-shot UFFD fork lifecycle * Add UFFD fork standby lifecycle test * Clean up UFFD snapshot ownership * Reproduce chained UFFD fork backing path bug * Preserve deferred UFFD backing path across chained forks * Fix UFFD one-shot standby lifecycle * Guard Firecracker snapshot source alias during UFFD forks * Bump UFFD pager version for one-shot restore * Install isolated UFFD pager units in CI * Release UFFD source lock before fork restore * Remove UFFD restore source lock * Clear deferred Firecracker memory on stop * Assert UFFD lifecycle guest state persists * Use regexp for UFFD systemd instance sanitizer * Consolidate UFFD restore state cleanup
1 parent 2211346 commit acc8ac4

34 files changed

Lines changed: 1209 additions & 70 deletions

.github/workflows/test.yml

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,29 @@ jobs:
9595
- name: Build
9696
run: make build
9797

98+
- name: Install UFFD pager systemd template
99+
run: |
100+
sudo mkdir -p /run/hypeman/uffd
101+
sudo tee /etc/systemd/system/hypeman-uffd@.service > /dev/null <<'EOF'
102+
[Unit]
103+
Description=Hypeman UFFD Pager (%i)
104+
Documentation=https://github.com/kernel/hypeman
105+
After=network.target
106+
107+
[Service]
108+
Type=simple
109+
Environment="HYPEMAN_UFFD_BINARY=/opt/hypeman/bin/hypeman-uffd-pager"
110+
Environment="HYPEMAN_UFFD_DATA_DIR=/var/lib/hypeman"
111+
Environment="HYPEMAN_UFFD_VERSION_KEY=%i"
112+
Environment="HYPEMAN_UFFD_CACHE_MAX_BYTES=4294967296"
113+
EnvironmentFile=-/run/hypeman/uffd/%i.env
114+
ExecStart=/bin/sh -c 'exec "$HYPEMAN_UFFD_BINARY" --data-dir "$HYPEMAN_UFFD_DATA_DIR" --version-key "$HYPEMAN_UFFD_VERSION_KEY" --cache-max-bytes "$HYPEMAN_UFFD_CACHE_MAX_BYTES"'
115+
Restart=on-failure
116+
RestartSec=5
117+
KillMode=process
118+
EOF
119+
sudo systemctl daemon-reload
120+
98121
- name: Prewarm test cache
99122
env:
100123
HYPEMAN_TEST_REGISTRY: 127.0.0.1:5001
@@ -131,13 +154,25 @@ jobs:
131154
TLS_ALLOWED_DOMAINS: '*.hypeman-development.com'
132155
HYPEMAN_TEST_PREWARM_STRICT: "1"
133156
HYPEMAN_TEST_REGISTRY: 127.0.0.1:5001
157+
HYPEMAN_UFFD_PAGER_BINARY: ${{ runner.temp }}/hypeman-uffd-pager-${{ github.run_id }}-${{ github.run_attempt }}
158+
HYPEMAN_UFFD_SYSTEMD_INSTANCE_PREFIX: ci-${{ github.run_id }}-${{ github.run_attempt }}
134159
run: |
160+
cp "$PWD/bin/hypeman-uffd-pager" "$HYPEMAN_UFFD_PAGER_BINARY"
161+
chmod +x "$HYPEMAN_UFFD_PAGER_BINARY"
135162
export HYPEMAN_TEST_PREWARM_DIR="$HOME/.cache/hypeman-ci/linux-amd64"
136163
make test TEST_TIMEOUT=20m
137164
138165
- name: Cleanup
139166
if: always()
140-
run: sudo rm -rf "/tmp/hm-net-${{ github.run_id }}-${{ github.run_attempt }}"
167+
run: |
168+
units="$(systemctl list-units --all --full --plain 'hypeman-uffd@ci-${{ github.run_id }}-${{ github.run_attempt }}-*.service' --no-legend | awk '{print $1}' || true)"
169+
if [ -n "$units" ]; then
170+
echo "$units" | xargs -r sudo systemctl stop || true
171+
echo "$units" | xargs -r sudo systemctl reset-failed || true
172+
fi
173+
sudo rm -f /run/hypeman/uffd/ci-${{ github.run_id }}-${{ github.run_attempt }}-*.env
174+
sudo rm -rf "/tmp/hm-net-${{ github.run_id }}-${{ github.run_attempt }}"
175+
rm -f "${{ runner.temp }}/hypeman-uffd-pager-${{ github.run_id }}-${{ github.run_attempt }}"
141176
142177
test-darwin:
143178
runs-on: [self-hosted, macos, arm64]

Makefile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ SHELL := /bin/bash
44
# Directory where local binaries will be installed
55
BIN_DIR ?= $(CURDIR)/bin
66
GO_TEST_TIMEOUT ?= 300s
7+
UFFD_PAGER_BINARY ?= $(BIN_DIR)/hypeman-uffd-pager
78

89
$(BIN_DIR):
910
mkdir -p $(BIN_DIR)
@@ -256,6 +257,9 @@ build-linux: ensure-ch-binaries ensure-firecracker-binaries ensure-caddy-binarie
256257
go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api
257258
go build -o $(BIN_DIR)/hypeman-uffd-pager ./cmd/uffd-pager
258259

260+
$(BIN_DIR)/hypeman-uffd-pager: | $(BIN_DIR)
261+
go build -o $@ ./cmd/uffd-pager
262+
259263
# Build all binaries
260264
build-all: build
261265

@@ -289,19 +293,23 @@ else
289293
endif
290294

291295
# Linux tests (as root for network capabilities)
292-
test-linux: ensure-ch-binaries ensure-firecracker-binaries ensure-caddy-binaries build-embedded
296+
test-linux: ensure-ch-binaries ensure-firecracker-binaries ensure-caddy-binaries build-embedded $(BIN_DIR)/hypeman-uffd-pager
293297
@VERBOSE_FLAG=""; \
294298
TEST_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:$$PATH"; \
295299
if [ -n "$(VERBOSE)" ]; then VERBOSE_FLAG="-v"; fi; \
296300
if [ -n "$(TEST)" ]; then \
297301
echo "Running specific test: $(TEST)"; \
298302
sudo env "PATH=$$TEST_PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" "CI=$${CI:-}" \
303+
"HYPEMAN_UFFD_PAGER_BINARY=$${HYPEMAN_UFFD_PAGER_BINARY:-$(UFFD_PAGER_BINARY)}" \
304+
"HYPEMAN_UFFD_SYSTEMD_INSTANCE_PREFIX=$${HYPEMAN_UFFD_SYSTEMD_INSTANCE_PREFIX:-}" \
299305
"HYPEMAN_TEST_PREWARM_DIR=$${HYPEMAN_TEST_PREWARM_DIR:-}" \
300306
"HYPEMAN_TEST_PREWARM_STRICT=$${HYPEMAN_TEST_PREWARM_STRICT:-}" \
301307
"HYPEMAN_TEST_REGISTRY=$${HYPEMAN_TEST_REGISTRY:-}" \
302308
go test -tags containers_image_openpgp -run=$(TEST) $$VERBOSE_FLAG -timeout=$(TEST_TIMEOUT) ./...; \
303309
else \
304310
sudo env "PATH=$$TEST_PATH" "DOCKER_CONFIG=$${DOCKER_CONFIG:-$$HOME/.docker}" "CI=$${CI:-}" \
311+
"HYPEMAN_UFFD_PAGER_BINARY=$${HYPEMAN_UFFD_PAGER_BINARY:-$(UFFD_PAGER_BINARY)}" \
312+
"HYPEMAN_UFFD_SYSTEMD_INSTANCE_PREFIX=$${HYPEMAN_UFFD_SYSTEMD_INSTANCE_PREFIX:-}" \
305313
"HYPEMAN_TEST_PREWARM_DIR=$${HYPEMAN_TEST_PREWARM_DIR:-}" \
306314
"HYPEMAN_TEST_PREWARM_STRICT=$${HYPEMAN_TEST_PREWARM_STRICT:-}" \
307315
"HYPEMAN_TEST_REGISTRY=$${HYPEMAN_TEST_REGISTRY:-}" \

lib/forkvm/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,20 @@ instead of reusing the source identity.
5959
- Network override fields are supplied at snapshot load to bind the fork to its
6060
own TAP device.
6161
- Vsock CID remains stable for snapshot-based flows.
62+
- When the Firecracker snapshot memory backend is configured as UFFD, UFFD is
63+
used as a one-shot acceleration for the first restore of a newly forked
64+
standby snapshot. The fork initially reuses the source snapshot memory as the
65+
pager backing file instead of cloning the large memory file during fanout.
66+
- That deferred memory clone is paid when the fork later enters standby. Before
67+
Firecracker writes the fork's diff snapshot, Hypeman materializes the fork's
68+
own `snapshot-latest/memory` file from the original backing memory. After that
69+
point the fork has a normal on-disk snapshot base, independent from the source.
70+
- Subsequent direct restores of that same fork use Firecracker's normal
71+
file-backed memory backend. If that standby fork is itself forked again, the
72+
new child gets its own one-shot UFFD restore.
73+
- This keeps UFFD on the high-fanout path where shared snapshot cache is most
74+
useful, while preserving the normal Firecracker diff-snapshot lifecycle for
75+
per-instance standby/resume cycles.
6276

6377
## VZ (Virtualization.framework)
6478

lib/forkvm/copy.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,22 @@ type copyState struct {
3333
reflinkDead bool
3434
}
3535

36+
// CopyOptions controls which guest-directory files are copied.
37+
type CopyOptions struct {
38+
SkipRelativePaths map[string]struct{}
39+
}
40+
3641
// CopyGuestDirectory recursively copies a guest directory to a new destination.
3742
// Regular files are cloned via reflink (FICLONE) when the underlying filesystem
3843
// supports it; otherwise we fall back to a sparse extent copy
3944
// (SEEK_DATA/SEEK_HOLE). Runtime sockets and logs are skipped because they are
4045
// host-runtime artifacts.
4146
func CopyGuestDirectory(srcDir, dstDir string) error {
47+
return CopyGuestDirectoryWithOptions(srcDir, dstDir, CopyOptions{})
48+
}
49+
50+
// CopyGuestDirectoryWithOptions is CopyGuestDirectory with optional path skips.
51+
func CopyGuestDirectoryWithOptions(srcDir, dstDir string, opts CopyOptions) error {
4252
srcInfo, err := os.Stat(srcDir)
4353
if err != nil {
4454
return fmt.Errorf("stat source directory: %w", err)
@@ -68,9 +78,15 @@ func CopyGuestDirectory(srcDir, dstDir string) error {
6878
if relPath == "." {
6979
return nil
7080
}
81+
if _, ok := opts.SkipRelativePaths[filepath.Clean(relPath)]; ok {
82+
return nil
83+
}
7184
if d.IsDir() && shouldSkipDirectory(relPath) {
7285
return filepath.SkipDir
7386
}
87+
if !d.IsDir() && shouldSkipRuntimeSocket(relPath) {
88+
return nil
89+
}
7490
if !d.IsDir() && shouldSkipRegularFile(relPath) {
7591
return nil
7692
}
@@ -115,6 +131,26 @@ func CopyGuestDirectory(srcDir, dstDir string) error {
115131
})
116132
}
117133

134+
// CopyRegularFile copies one regular file using the same reflink-first behavior
135+
// as CopyGuestDirectory.
136+
func CopyRegularFile(srcPath, dstPath string) error {
137+
info, err := os.Stat(srcPath)
138+
if err != nil {
139+
return fmt.Errorf("stat source file: %w", err)
140+
}
141+
if !info.Mode().IsRegular() {
142+
return fmt.Errorf("source path is not a regular file: %s", srcPath)
143+
}
144+
if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil {
145+
return fmt.Errorf("create destination parent: %w", err)
146+
}
147+
state := &copyState{}
148+
if reflinkDisabled.Load() {
149+
state.reflinkDead = true
150+
}
151+
return copyRegularFile(state, srcPath, dstPath, info.Mode().Perm())
152+
}
153+
118154
// copyRegularFile clones path to dstPath, preferring FICLONE reflink and
119155
// falling back to sparse extent copy. The state object lets us short-circuit
120156
// future reflink attempts once we observe an "unsupported" signal from the
@@ -142,3 +178,7 @@ func shouldSkipDirectory(relPath string) bool {
142178
func shouldSkipRegularFile(relPath string) bool {
143179
return strings.HasSuffix(relPath, ".lz4.tmp") || strings.HasSuffix(relPath, ".zst.tmp")
144180
}
181+
182+
func shouldSkipRuntimeSocket(relPath string) bool {
183+
return strings.HasSuffix(filepath.Base(relPath), ".sock")
184+
}

lib/forkvm/copy_sparse_unix_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ func TestCopyGuestDirectory_SkipsSocketRuntimeArtifacts(t *testing.T) {
8181
dst := filepath.Join(base, "dst")
8282
require.NoError(t, os.MkdirAll(src, 0755))
8383
require.NoError(t, os.WriteFile(filepath.Join(src, "metadata.json"), []byte(`{"id":"abc"}`), 0644))
84+
require.NoError(t, os.WriteFile(filepath.Join(src, "vsock.sock"), []byte("stale regular artifact"), 0644))
8485

8586
socketPath := filepath.Join(src, fmt.Sprintf("vz-%d.sock", time.Now().UnixNano()))
8687
listener, err := net.Listen("unix", socketPath)
@@ -90,6 +91,7 @@ func TestCopyGuestDirectory_SkipsSocketRuntimeArtifacts(t *testing.T) {
9091
require.NoError(t, CopyGuestDirectory(src, dst))
9192

9293
assert.NoFileExists(t, filepath.Join(dst, filepath.Base(socketPath)))
94+
assert.NoFileExists(t, filepath.Join(dst, "vsock.sock"))
9395
assert.FileExists(t, filepath.Join(dst, "metadata.json"))
9496
}
9597

lib/forkvm/copy_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,43 @@ func TestCopyGuestDirectory(t *testing.T) {
4444
assert.Equal(t, "metadata.json", linkTarget)
4545
}
4646

47+
func TestCopyGuestDirectoryWithOptionsSkipsRelativePaths(t *testing.T) {
48+
src := filepath.Join(t.TempDir(), "src")
49+
dst := filepath.Join(t.TempDir(), "dst")
50+
51+
require.NoError(t, os.MkdirAll(filepath.Join(src, "snapshots", "snapshot-latest"), 0755))
52+
require.NoError(t, os.WriteFile(filepath.Join(src, "snapshots", "snapshot-latest", "memory"), []byte("memory"), 0644))
53+
require.NoError(t, os.WriteFile(filepath.Join(src, "snapshots", "snapshot-latest", "state"), []byte("state"), 0644))
54+
require.NoError(t, os.WriteFile(filepath.Join(src, "overlay.raw"), []byte("overlay"), 0644))
55+
56+
require.NoError(t, CopyGuestDirectoryWithOptions(src, dst, CopyOptions{
57+
SkipRelativePaths: map[string]struct{}{
58+
filepath.Join("snapshots", "snapshot-latest", "memory"): {},
59+
},
60+
}))
61+
62+
assert.NoFileExists(t, filepath.Join(dst, "snapshots", "snapshot-latest", "memory"))
63+
assert.FileExists(t, filepath.Join(dst, "snapshots", "snapshot-latest", "state"))
64+
assert.FileExists(t, filepath.Join(dst, "overlay.raw"))
65+
}
66+
67+
func TestCopyRegularFile(t *testing.T) {
68+
src := filepath.Join(t.TempDir(), "src", "memory")
69+
dst := filepath.Join(t.TempDir(), "dst", "snapshots", "snapshot-latest", "memory")
70+
71+
require.NoError(t, os.MkdirAll(filepath.Dir(src), 0755))
72+
require.NoError(t, os.WriteFile(src, []byte("memory"), 0640))
73+
74+
require.NoError(t, CopyRegularFile(src, dst))
75+
76+
got, err := os.ReadFile(dst)
77+
require.NoError(t, err)
78+
assert.Equal(t, []byte("memory"), got)
79+
info, err := os.Stat(dst)
80+
require.NoError(t, err)
81+
assert.Equal(t, os.FileMode(0640), info.Mode().Perm())
82+
}
83+
4784
func TestCopyGuestDirectory_DoesNotSkipTmpSuffixedDirectories(t *testing.T) {
4885
src := filepath.Join(t.TempDir(), "src")
4986
dst := filepath.Join(t.TempDir(), "dst")

lib/guestmemory/controller_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,11 @@ func (s *stubHypervisor) Shutdown(ctx context.Context) error { return nil }
4747
func (s *stubHypervisor) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) {
4848
return &hypervisor.VMInfo{State: hypervisor.StateRunning}, nil
4949
}
50-
func (s *stubHypervisor) Pause(ctx context.Context) error { return nil }
51-
func (s *stubHypervisor) Resume(ctx context.Context) error { return nil }
52-
func (s *stubHypervisor) Snapshot(ctx context.Context, destPath string) error { return nil }
50+
func (s *stubHypervisor) Pause(ctx context.Context) error { return nil }
51+
func (s *stubHypervisor) Resume(ctx context.Context) error { return nil }
52+
func (s *stubHypervisor) Snapshot(ctx context.Context, destPath string, _ hypervisor.SnapshotOptions) error {
53+
return nil
54+
}
5355
func (s *stubHypervisor) ResizeMemory(ctx context.Context, bytes int64) error { return nil }
5456
func (s *stubHypervisor) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error {
5557
return nil

lib/hypervisor/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Before using optional features, callers check capabilities:
3333

3434
```go
3535
if hv.Capabilities().SupportsSnapshot {
36-
hv.Snapshot(ctx, path)
36+
hv.Snapshot(ctx, path, hypervisor.SnapshotOptions{})
3737
}
3838
```
3939

lib/hypervisor/cloudhypervisor/cloudhypervisor.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ func (c *CloudHypervisor) Resume(ctx context.Context) error {
160160
}
161161

162162
// Snapshot creates a VM snapshot.
163-
func (c *CloudHypervisor) Snapshot(ctx context.Context, destPath string) error {
163+
func (c *CloudHypervisor) Snapshot(ctx context.Context, destPath string, _ hypervisor.SnapshotOptions) error {
164164
snapshotURL := "file://" + destPath
165165
snapshotConfig := vmm.VmSnapshotConfig{DestinationUrl: &snapshotURL}
166166
resp, err := c.client.PutVmSnapshotWithResponse(ctx, snapshotConfig)

lib/hypervisor/firecracker/config_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,37 @@ func TestSnapshotLoadParamsSupportsUFFDBackend(t *testing.T) {
101101
assert.Equal(t, "/tmp/pager.sock", load.MemBackend.BackendPath)
102102
}
103103

104+
func TestMaterializeDeferredSnapshotMemory(t *testing.T) {
105+
t.Parallel()
106+
107+
sourcePath := filepath.Join(t.TempDir(), "source-memory")
108+
snapshotDir := filepath.Join(t.TempDir(), "snapshot-latest")
109+
require.NoError(t, os.WriteFile(sourcePath, []byte("memory"), 0644))
110+
111+
require.NoError(t, materializeDeferredSnapshotMemory(snapshotDir, sourcePath))
112+
113+
got, err := os.ReadFile(filepath.Join(snapshotDir, "memory"))
114+
require.NoError(t, err)
115+
assert.Equal(t, []byte("memory"), got)
116+
}
117+
118+
func TestMaterializeDeferredSnapshotMemoryUsesRetainedSnapshotAlternate(t *testing.T) {
119+
t.Parallel()
120+
121+
root := t.TempDir()
122+
sourcePath := filepath.Join(root, "snapshots", "snapshot-base", "memory")
123+
alternatePath := filepath.Join(root, "snapshots", "snapshot-latest", "memory")
124+
destPath := filepath.Join(t.TempDir(), "snapshot-latest")
125+
require.NoError(t, os.MkdirAll(filepath.Dir(alternatePath), 0755))
126+
require.NoError(t, os.WriteFile(alternatePath, []byte("memory"), 0644))
127+
128+
require.NoError(t, materializeDeferredSnapshotMemory(destPath, sourcePath))
129+
130+
got, err := os.ReadFile(filepath.Join(destPath, "memory"))
131+
require.NoError(t, err)
132+
assert.Equal(t, []byte("memory"), got)
133+
}
134+
104135
func TestToBalloonConfig(t *testing.T) {
105136
cfg := hypervisor.VMConfig{
106137
GuestMemory: hypervisor.GuestMemoryConfig{

0 commit comments

Comments
 (0)