Skip to content

Commit 0f0137b

Browse files
committed
test: verify firecracker fork-from-snapshot CoW isolation
Adds TestFirecrackerForkFromSnapshot_DoesNotMutateSource. It puts a firecracker source into Standby, forks it, restores the fork, writes a divergent guest sentinel, then takes a diff snapshot of the fork. Across the fork's full lifecycle (and after the fork is deleted) the source's snapshot mem-file must keep the same inode and SHA-256, and the source must still be restorable with its pre-fork guest state. Includes a soft disk-usage check (gated on a FICLONE probe) that the fork lifecycle consumes substantially less than a full guest-mem copy on reflink-capable filesystems. This documents the CoW isolation properties of reflink-based fork-from- snapshot and guards against fan-out optimizations that would share an inode with the source's snapshot mem-file.
1 parent 58b9a9f commit 0f0137b

1 file changed

Lines changed: 253 additions & 0 deletions

File tree

lib/instances/firecracker_test.go

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@ package instances
44

55
import (
66
"context"
7+
"crypto/sha256"
8+
"encoding/hex"
79
"fmt"
10+
"io"
811
"net"
912
"net/http"
1013
"os"
1114
"path/filepath"
1215
"strings"
16+
"syscall"
1317
"testing"
1418
"time"
1519

@@ -26,6 +30,7 @@ import (
2630
"github.com/stretchr/testify/assert"
2731
"github.com/stretchr/testify/require"
2832
"github.com/vishvananda/netlink"
33+
"golang.org/x/sys/unix"
2934
)
3035

3136
func setupTestManagerForFirecrackerWithNetworkConfig(t *testing.T, networkCfg config.NetworkConfig) (*manager, string) {
@@ -551,3 +556,251 @@ func TestFirecrackerSnapshotFeature(t *testing.T) {
551556
forkName: "fc-snapshot-fork",
552557
})
553558
}
559+
560+
// TestFirecrackerForkFromSnapshot_DoesNotMutateSource verifies CoW isolation
561+
// between a firecracker source's standby snapshot and a fork derived from it.
562+
// A fork must end up with its own mem-file inode (reflink-cloned, not
563+
// hardlinked) so that mutating the fork — including taking a diff snapshot of
564+
// the fork after divergence — never alters the source's snapshot bytes. This
565+
// guards against the family of hazards where fan-out optimizations
566+
// inadvertently share an inode with the source and let later writes propagate
567+
// back through it.
568+
func TestFirecrackerForkFromSnapshot_DoesNotMutateSource(t *testing.T) {
569+
t.Parallel()
570+
requireFirecrackerIntegrationPrereqs(t)
571+
572+
mgr, tmpDir := setupTestManagerForFirecrackerNoNetwork(t)
573+
ctx := context.Background()
574+
p := paths.New(tmpDir)
575+
576+
imageManager, err := images.NewManager(p, 1, nil)
577+
require.NoError(t, err)
578+
createNginxImageAndWait(t, ctx, imageManager)
579+
580+
systemManager := system.NewManager(p)
581+
require.NoError(t, systemManager.EnsureSystemFiles(ctx))
582+
583+
const guestMemBytes = int64(1024 * 1024 * 1024)
584+
585+
source, err := mgr.CreateInstance(ctx, CreateInstanceRequest{
586+
Name: "fc-fork-isolation-src",
587+
Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"),
588+
Size: guestMemBytes,
589+
OverlaySize: 10 * 1024 * 1024 * 1024,
590+
Vcpus: 1,
591+
NetworkEnabled: false,
592+
Hypervisor: hypervisor.TypeFirecracker,
593+
})
594+
require.NoError(t, err)
595+
sourceID := source.Id
596+
sourceDeleted := false
597+
t.Cleanup(func() {
598+
if !sourceDeleted {
599+
_ = mgr.DeleteInstance(context.Background(), sourceID)
600+
}
601+
})
602+
603+
source, err = waitForInstanceState(ctx, mgr, sourceID, StateRunning, integrationTestTimeout(20*time.Second))
604+
require.NoError(t, err)
605+
require.NoError(t, waitForExecAgent(ctx, mgr, sourceID, 30*time.Second))
606+
607+
const sourceSentinelPath = "/tmp/source-sentinel.txt"
608+
const sourceSentinelContents = "source-only"
609+
output, exitCode, err := execCommand(ctx, source, "sh", "-c",
610+
fmt.Sprintf("printf %q > %s && sync", sourceSentinelContents, sourceSentinelPath))
611+
require.NoError(t, err)
612+
require.Equalf(t, 0, exitCode, "write source sentinel: %s", output)
613+
614+
// Source standby produces a full firecracker snapshot. We hold the source
615+
// in Standby for the entire fork lifecycle below so the snapshot mem-file
616+
// stays at snapshot-latest/memory and is comparable across phases.
617+
source, err = mgr.StandbyInstance(ctx, sourceID, StandbyInstanceRequest{})
618+
require.NoError(t, err)
619+
require.Equal(t, StateStandby, source.State)
620+
require.True(t, source.HasSnapshot)
621+
622+
sourceMemPath := filepath.Join(p.InstanceSnapshotLatest(sourceID), "memory")
623+
sourceBefore, err := fingerprintFile(sourceMemPath)
624+
require.NoError(t, err, "fingerprint source mem-file after standby")
625+
626+
reflinkOK := probeReflinkSupport(t, tmpDir)
627+
var statBefore syscall.Statfs_t
628+
require.NoError(t, syscall.Statfs(tmpDir, &statBefore))
629+
freeBefore := int64(statBefore.Bavail) * statBefore.Bsize
630+
631+
fork, err := mgr.ForkInstance(ctx, sourceID, ForkInstanceRequest{
632+
Name: "fc-fork-isolation-fork",
633+
})
634+
require.NoError(t, err)
635+
forkID := fork.Id
636+
forkDeleted := false
637+
t.Cleanup(func() {
638+
if !forkDeleted {
639+
_ = mgr.DeleteInstance(context.Background(), forkID)
640+
}
641+
})
642+
require.Equal(t, StateStandby, fork.State)
643+
644+
// Fork's mem-file must be a separate inode from the source's. Hardlinking
645+
// or symlinking would share the inode and allow later writes to corrupt
646+
// the source.
647+
forkMemPath := filepath.Join(p.InstanceSnapshotLatest(forkID), "memory")
648+
forkAfterCreate, err := fingerprintFile(forkMemPath)
649+
require.NoError(t, err, "fingerprint fork mem-file after fork")
650+
require.NotEqual(t, sourceBefore.inode, forkAfterCreate.inode,
651+
"fork mem-file must not share an inode with the source")
652+
653+
sourceAfterFork, err := fingerprintFile(sourceMemPath)
654+
require.NoError(t, err)
655+
require.Equal(t, sourceBefore.inode, sourceAfterFork.inode,
656+
"source mem-file inode must not change after fork creation")
657+
require.Equal(t, sourceBefore.sha, sourceAfterFork.sha,
658+
"source mem-file bytes must not change after fork creation")
659+
660+
// Restore the fork: it should see the source's pre-fork guest state.
661+
fork, err = mgr.RestoreInstance(ctx, forkID)
662+
require.NoError(t, err)
663+
fork, err = waitForInstanceState(ctx, mgr, forkID, StateRunning, integrationTestTimeout(20*time.Second))
664+
require.NoError(t, err)
665+
require.NoError(t, waitForExecAgent(ctx, mgr, forkID, 30*time.Second))
666+
667+
output, exitCode, err = execCommand(ctx, fork, "cat", sourceSentinelPath)
668+
require.NoError(t, err)
669+
require.Equal(t, 0, exitCode)
670+
require.Equal(t, sourceSentinelContents, strings.TrimSpace(output))
671+
672+
// Diverge the fork: write a fork-only sentinel, then standby the fork.
673+
// Firecracker's second standby produces a diff snapshot against the fork's
674+
// retained base — this is the operation most likely to corrupt the source
675+
// if the fork's mem-file were sharing the source's inode.
676+
const forkSentinelPath = "/tmp/fork-sentinel.txt"
677+
const forkSentinelContents = "fork-only"
678+
output, exitCode, err = execCommand(ctx, fork, "sh", "-c",
679+
fmt.Sprintf("printf %q > %s && sync", forkSentinelContents, forkSentinelPath))
680+
require.NoError(t, err)
681+
require.Equalf(t, 0, exitCode, "write fork sentinel: %s", output)
682+
683+
fork, err = mgr.StandbyInstance(ctx, forkID, StandbyInstanceRequest{})
684+
require.NoError(t, err)
685+
require.Equal(t, StateStandby, fork.State)
686+
687+
// Source mem-file must STILL be byte-identical after the fork's full
688+
// lifecycle (restore + write + standby/diff-snapshot).
689+
sourceAfterForkStandby, err := fingerprintFile(sourceMemPath)
690+
require.NoError(t, err)
691+
require.Equal(t, sourceBefore.inode, sourceAfterForkStandby.inode,
692+
"source mem-file inode must not change after fork standby")
693+
require.Equal(t, sourceBefore.sha, sourceAfterForkStandby.sha,
694+
"source mem-file bytes must not change after fork standby")
695+
696+
// Soft disk-usage assertion: on reflink-capable filesystems, the fork
697+
// lifecycle should consume substantially less than a full guest-mem copy
698+
// because pages are shared CoW. Gated on FICLONE probe — ext4 etc. fall
699+
// back to sparse copy which produces full physical copies, so the bound
700+
// would not hold there.
701+
var statAfter syscall.Statfs_t
702+
require.NoError(t, syscall.Statfs(tmpDir, &statAfter))
703+
freeAfter := int64(statAfter.Bavail) * statAfter.Bsize
704+
consumed := freeBefore - freeAfter
705+
t.Logf("fork lifecycle disk-usage delta: consumed=%d guestMem=%d reflink=%v",
706+
consumed, guestMemBytes, reflinkOK)
707+
if reflinkOK {
708+
assert.Less(t, consumed, guestMemBytes/2,
709+
"fork lifecycle should consume substantially less than full guest mem on reflink-capable fs")
710+
}
711+
712+
// Delete the fork — its inode goes away. On a reflink-capable fs, deleting
713+
// a CoW clone must not affect the source's blocks. Verify the source
714+
// mem-file is still readable and byte-identical after the unlink.
715+
require.NoError(t, mgr.DeleteInstance(ctx, forkID))
716+
forkDeleted = true
717+
718+
sourceAfterForkDelete, err := fingerprintFile(sourceMemPath)
719+
require.NoError(t, err, "source mem-file should still be readable after fork delete")
720+
require.Equal(t, sourceBefore.inode, sourceAfterForkDelete.inode,
721+
"source mem-file inode must not change after fork delete")
722+
require.Equal(t, sourceBefore.sha, sourceAfterForkDelete.sha,
723+
"source mem-file bytes must not change after fork delete")
724+
725+
// Strongest end-to-end check: the source snapshot must still be restorable
726+
// after the fork's full lifecycle. Verify the source's sentinel survived
727+
// and the fork-only sentinel did not leak across.
728+
source, err = mgr.RestoreInstance(ctx, sourceID)
729+
require.NoError(t, err)
730+
source, err = waitForInstanceState(ctx, mgr, sourceID, StateRunning, integrationTestTimeout(20*time.Second))
731+
require.NoError(t, err)
732+
require.NoError(t, waitForExecAgent(ctx, mgr, sourceID, 30*time.Second))
733+
734+
output, exitCode, err = execCommand(ctx, source, "cat", sourceSentinelPath)
735+
require.NoError(t, err)
736+
require.Equal(t, 0, exitCode)
737+
require.Equal(t, sourceSentinelContents, strings.TrimSpace(output))
738+
739+
_, exitCode, err = execCommand(ctx, source, "test", "-f", forkSentinelPath)
740+
require.NoError(t, err)
741+
require.NotEqual(t, 0, exitCode, "source must not see the fork-only sentinel")
742+
743+
require.NoError(t, mgr.DeleteInstance(ctx, sourceID))
744+
sourceDeleted = true
745+
}
746+
747+
type fileFingerprint struct {
748+
inode uint64
749+
sha string
750+
}
751+
752+
func fingerprintFile(path string) (fileFingerprint, error) {
753+
st, err := os.Stat(path)
754+
if err != nil {
755+
return fileFingerprint{}, fmt.Errorf("stat %s: %w", path, err)
756+
}
757+
sys, ok := st.Sys().(*syscall.Stat_t)
758+
if !ok {
759+
return fileFingerprint{}, fmt.Errorf("unexpected stat type for %s", path)
760+
}
761+
f, err := os.Open(path)
762+
if err != nil {
763+
return fileFingerprint{}, fmt.Errorf("open %s: %w", path, err)
764+
}
765+
defer f.Close()
766+
h := sha256.New()
767+
if _, err := io.Copy(h, f); err != nil {
768+
return fileFingerprint{}, fmt.Errorf("read %s: %w", path, err)
769+
}
770+
return fileFingerprint{inode: sys.Ino, sha: hex.EncodeToString(h.Sum(nil))}, nil
771+
}
772+
773+
// probeReflinkSupport returns true if FICLONE works on the given directory.
774+
// Used to gate the soft disk-usage assertion: on ext4 and other non-reflink
775+
// filesystems the copy falls back to sparse full-copy semantics, so the
776+
// "fork should consume much less than guest-mem" bound would not hold.
777+
func probeReflinkSupport(t *testing.T, dir string) bool {
778+
t.Helper()
779+
srcPath := filepath.Join(dir, ".reflink-probe-src")
780+
dstPath := filepath.Join(dir, ".reflink-probe-dst")
781+
defer func() {
782+
_ = os.Remove(srcPath)
783+
_ = os.Remove(dstPath)
784+
}()
785+
if err := os.WriteFile(srcPath, []byte("reflink-probe"), 0644); err != nil {
786+
t.Logf("reflink probe: write src failed: %v", err)
787+
return false
788+
}
789+
src, err := os.Open(srcPath)
790+
if err != nil {
791+
t.Logf("reflink probe: open src failed: %v", err)
792+
return false
793+
}
794+
defer src.Close()
795+
dst, err := os.OpenFile(dstPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
796+
if err != nil {
797+
t.Logf("reflink probe: open dst failed: %v", err)
798+
return false
799+
}
800+
defer dst.Close()
801+
if err := unix.IoctlFileClone(int(dst.Fd()), int(src.Fd())); err != nil {
802+
t.Logf("reflink probe: FICLONE failed: %v", err)
803+
return false
804+
}
805+
return true
806+
}

0 commit comments

Comments
 (0)