@@ -4,12 +4,16 @@ package instances
44
55import (
66 "context"
7+ "crypto/sha256"
8+ "encoding/hex"
79 "fmt"
10+ "io"
811 "net"
912 "net/http"
1013 "os"
1114 "path/filepath"
1215 "strings"
16+ "syscall"
1317 "testing"
1418 "time"
1519
@@ -26,6 +30,7 @@ import (
2630 "github.com/stretchr/testify/assert"
2731 "github.com/stretchr/testify/require"
2832 "github.com/vishvananda/netlink"
33+ "golang.org/x/sys/unix"
2934)
3035
3136func setupTestManagerForFirecrackerWithNetworkConfig (t * testing.T , networkCfg config.NetworkConfig ) (* manager , string ) {
@@ -551,3 +556,254 @@ func TestFirecrackerSnapshotFeature(t *testing.T) {
551556 forkName : "fc-snapshot-fork" ,
552557 })
553558}
559+
560+ // TestFirecrackerForkIsolation verifies CoW isolation between a firecracker
561+ // source's standby snapshot and a fork derived from it. A fork must end up
562+ // with its own mem-file inode (reflink-cloned, not hardlinked) so that
563+ // mutating the fork — including taking a diff snapshot of the fork after
564+ // divergence — never alters the source's snapshot bytes. This guards against
565+ // the family of hazards where fan-out optimizations inadvertently share an
566+ // inode with the source and let later writes propagate back through it.
567+ //
568+ // Test name is kept short on purpose: t.TempDir() embeds the test name, and
569+ // firecracker's API socket path under that tempdir must fit within SUN_LEN
570+ // (108 bytes on Linux).
571+ func TestFirecrackerForkIsolation (t * testing.T ) {
572+ t .Parallel ()
573+ requireFirecrackerIntegrationPrereqs (t )
574+
575+ mgr , tmpDir := setupTestManagerForFirecrackerNoNetwork (t )
576+ ctx := context .Background ()
577+ p := paths .New (tmpDir )
578+
579+ imageManager , err := images .NewManager (p , 1 , nil )
580+ require .NoError (t , err )
581+ createNginxImageAndWait (t , ctx , imageManager )
582+
583+ systemManager := system .NewManager (p )
584+ require .NoError (t , systemManager .EnsureSystemFiles (ctx ))
585+
586+ const guestMemBytes = int64 (1024 * 1024 * 1024 )
587+
588+ source , err := mgr .CreateInstance (ctx , CreateInstanceRequest {
589+ Name : "fc-fork-isolation-src" ,
590+ Image : integrationTestImageRef (t , "docker.io/library/nginx:alpine" ),
591+ Size : guestMemBytes ,
592+ OverlaySize : 10 * 1024 * 1024 * 1024 ,
593+ Vcpus : 1 ,
594+ NetworkEnabled : false ,
595+ Hypervisor : hypervisor .TypeFirecracker ,
596+ })
597+ require .NoError (t , err )
598+ sourceID := source .Id
599+ sourceDeleted := false
600+ t .Cleanup (func () {
601+ if ! sourceDeleted {
602+ _ = mgr .DeleteInstance (context .Background (), sourceID )
603+ }
604+ })
605+
606+ source , err = waitForInstanceState (ctx , mgr , sourceID , StateRunning , integrationTestTimeout (20 * time .Second ))
607+ require .NoError (t , err )
608+ require .NoError (t , waitForExecAgent (ctx , mgr , sourceID , 30 * time .Second ))
609+
610+ const sourceSentinelPath = "/tmp/source-sentinel.txt"
611+ const sourceSentinelContents = "source-only"
612+ output , exitCode , err := execCommand (ctx , source , "sh" , "-c" ,
613+ fmt .Sprintf ("printf %q > %s && sync" , sourceSentinelContents , sourceSentinelPath ))
614+ require .NoError (t , err )
615+ require .Equalf (t , 0 , exitCode , "write source sentinel: %s" , output )
616+
617+ // Source standby produces a full firecracker snapshot. We hold the source
618+ // in Standby for the entire fork lifecycle below so the snapshot mem-file
619+ // stays at snapshot-latest/memory and is comparable across phases.
620+ source , err = mgr .StandbyInstance (ctx , sourceID , StandbyInstanceRequest {})
621+ require .NoError (t , err )
622+ require .Equal (t , StateStandby , source .State )
623+ require .True (t , source .HasSnapshot )
624+
625+ sourceMemPath := filepath .Join (p .InstanceSnapshotLatest (sourceID ), "memory" )
626+ sourceBefore , err := fingerprintFile (sourceMemPath )
627+ require .NoError (t , err , "fingerprint source mem-file after standby" )
628+
629+ reflinkOK := probeReflinkSupport (t , tmpDir )
630+ var statBefore syscall.Statfs_t
631+ require .NoError (t , syscall .Statfs (tmpDir , & statBefore ))
632+ freeBefore := int64 (statBefore .Bavail ) * statBefore .Bsize
633+
634+ fork , err := mgr .ForkInstance (ctx , sourceID , ForkInstanceRequest {
635+ Name : "fc-fork-isolation-fork" ,
636+ })
637+ require .NoError (t , err )
638+ forkID := fork .Id
639+ forkDeleted := false
640+ t .Cleanup (func () {
641+ if ! forkDeleted {
642+ _ = mgr .DeleteInstance (context .Background (), forkID )
643+ }
644+ })
645+ require .Equal (t , StateStandby , fork .State )
646+
647+ // Fork's mem-file must be a separate inode from the source's. Hardlinking
648+ // or symlinking would share the inode and allow later writes to corrupt
649+ // the source.
650+ forkMemPath := filepath .Join (p .InstanceSnapshotLatest (forkID ), "memory" )
651+ forkAfterCreate , err := fingerprintFile (forkMemPath )
652+ require .NoError (t , err , "fingerprint fork mem-file after fork" )
653+ require .NotEqual (t , sourceBefore .inode , forkAfterCreate .inode ,
654+ "fork mem-file must not share an inode with the source" )
655+
656+ sourceAfterFork , err := fingerprintFile (sourceMemPath )
657+ require .NoError (t , err )
658+ require .Equal (t , sourceBefore .inode , sourceAfterFork .inode ,
659+ "source mem-file inode must not change after fork creation" )
660+ require .Equal (t , sourceBefore .sha , sourceAfterFork .sha ,
661+ "source mem-file bytes must not change after fork creation" )
662+
663+ // Restore the fork: it should see the source's pre-fork guest state.
664+ fork , err = mgr .RestoreInstance (ctx , forkID )
665+ require .NoError (t , err )
666+ fork , err = waitForInstanceState (ctx , mgr , forkID , StateRunning , integrationTestTimeout (20 * time .Second ))
667+ require .NoError (t , err )
668+ require .NoError (t , waitForExecAgent (ctx , mgr , forkID , 30 * time .Second ))
669+
670+ output , exitCode , err = execCommand (ctx , fork , "cat" , sourceSentinelPath )
671+ require .NoError (t , err )
672+ require .Equal (t , 0 , exitCode )
673+ require .Equal (t , sourceSentinelContents , strings .TrimSpace (output ))
674+
675+ // Diverge the fork: write a fork-only sentinel, then standby the fork.
676+ // Firecracker's second standby produces a diff snapshot against the fork's
677+ // retained base — this is the operation most likely to corrupt the source
678+ // if the fork's mem-file were sharing the source's inode.
679+ const forkSentinelPath = "/tmp/fork-sentinel.txt"
680+ const forkSentinelContents = "fork-only"
681+ output , exitCode , err = execCommand (ctx , fork , "sh" , "-c" ,
682+ fmt .Sprintf ("printf %q > %s && sync" , forkSentinelContents , forkSentinelPath ))
683+ require .NoError (t , err )
684+ require .Equalf (t , 0 , exitCode , "write fork sentinel: %s" , output )
685+
686+ fork , err = mgr .StandbyInstance (ctx , forkID , StandbyInstanceRequest {})
687+ require .NoError (t , err )
688+ require .Equal (t , StateStandby , fork .State )
689+
690+ // Source mem-file must STILL be byte-identical after the fork's full
691+ // lifecycle (restore + write + standby/diff-snapshot).
692+ sourceAfterForkStandby , err := fingerprintFile (sourceMemPath )
693+ require .NoError (t , err )
694+ require .Equal (t , sourceBefore .inode , sourceAfterForkStandby .inode ,
695+ "source mem-file inode must not change after fork standby" )
696+ require .Equal (t , sourceBefore .sha , sourceAfterForkStandby .sha ,
697+ "source mem-file bytes must not change after fork standby" )
698+
699+ // Soft disk-usage assertion: on reflink-capable filesystems, the fork
700+ // lifecycle should consume substantially less than a full guest-mem copy
701+ // because pages are shared CoW. Gated on FICLONE probe — ext4 etc. fall
702+ // back to sparse copy which produces full physical copies, so the bound
703+ // would not hold there.
704+ var statAfter syscall.Statfs_t
705+ require .NoError (t , syscall .Statfs (tmpDir , & statAfter ))
706+ freeAfter := int64 (statAfter .Bavail ) * statAfter .Bsize
707+ consumed := freeBefore - freeAfter
708+ t .Logf ("fork lifecycle disk-usage delta: consumed=%d guestMem=%d reflink=%v" ,
709+ consumed , guestMemBytes , reflinkOK )
710+ if reflinkOK {
711+ assert .Less (t , consumed , guestMemBytes / 2 ,
712+ "fork lifecycle should consume substantially less than full guest mem on reflink-capable fs" )
713+ }
714+
715+ // Delete the fork — its inode goes away. On a reflink-capable fs, deleting
716+ // a CoW clone must not affect the source's blocks. Verify the source
717+ // mem-file is still readable and byte-identical after the unlink.
718+ require .NoError (t , mgr .DeleteInstance (ctx , forkID ))
719+ forkDeleted = true
720+
721+ sourceAfterForkDelete , err := fingerprintFile (sourceMemPath )
722+ require .NoError (t , err , "source mem-file should still be readable after fork delete" )
723+ require .Equal (t , sourceBefore .inode , sourceAfterForkDelete .inode ,
724+ "source mem-file inode must not change after fork delete" )
725+ require .Equal (t , sourceBefore .sha , sourceAfterForkDelete .sha ,
726+ "source mem-file bytes must not change after fork delete" )
727+
728+ // Strongest end-to-end check: the source snapshot must still be restorable
729+ // after the fork's full lifecycle. Verify the source's sentinel survived
730+ // and the fork-only sentinel did not leak across.
731+ source , err = mgr .RestoreInstance (ctx , sourceID )
732+ require .NoError (t , err )
733+ source , err = waitForInstanceState (ctx , mgr , sourceID , StateRunning , integrationTestTimeout (20 * time .Second ))
734+ require .NoError (t , err )
735+ require .NoError (t , waitForExecAgent (ctx , mgr , sourceID , 30 * time .Second ))
736+
737+ output , exitCode , err = execCommand (ctx , source , "cat" , sourceSentinelPath )
738+ require .NoError (t , err )
739+ require .Equal (t , 0 , exitCode )
740+ require .Equal (t , sourceSentinelContents , strings .TrimSpace (output ))
741+
742+ _ , exitCode , err = execCommand (ctx , source , "test" , "-f" , forkSentinelPath )
743+ require .NoError (t , err )
744+ require .NotEqual (t , 0 , exitCode , "source must not see the fork-only sentinel" )
745+
746+ require .NoError (t , mgr .DeleteInstance (ctx , sourceID ))
747+ sourceDeleted = true
748+ }
749+
750+ type fileFingerprint struct {
751+ inode uint64
752+ sha string
753+ }
754+
755+ func fingerprintFile (path string ) (fileFingerprint , error ) {
756+ st , err := os .Stat (path )
757+ if err != nil {
758+ return fileFingerprint {}, fmt .Errorf ("stat %s: %w" , path , err )
759+ }
760+ sys , ok := st .Sys ().(* syscall.Stat_t )
761+ if ! ok {
762+ return fileFingerprint {}, fmt .Errorf ("unexpected stat type for %s" , path )
763+ }
764+ f , err := os .Open (path )
765+ if err != nil {
766+ return fileFingerprint {}, fmt .Errorf ("open %s: %w" , path , err )
767+ }
768+ defer f .Close ()
769+ h := sha256 .New ()
770+ if _ , err := io .Copy (h , f ); err != nil {
771+ return fileFingerprint {}, fmt .Errorf ("read %s: %w" , path , err )
772+ }
773+ return fileFingerprint {inode : sys .Ino , sha : hex .EncodeToString (h .Sum (nil ))}, nil
774+ }
775+
776+ // probeReflinkSupport returns true if FICLONE works on the given directory.
777+ // Used to gate the soft disk-usage assertion: on ext4 and other non-reflink
778+ // filesystems the copy falls back to sparse full-copy semantics, so the
779+ // "fork should consume much less than guest-mem" bound would not hold.
780+ func probeReflinkSupport (t * testing.T , dir string ) bool {
781+ t .Helper ()
782+ srcPath := filepath .Join (dir , ".reflink-probe-src" )
783+ dstPath := filepath .Join (dir , ".reflink-probe-dst" )
784+ defer func () {
785+ _ = os .Remove (srcPath )
786+ _ = os .Remove (dstPath )
787+ }()
788+ if err := os .WriteFile (srcPath , []byte ("reflink-probe" ), 0644 ); err != nil {
789+ t .Logf ("reflink probe: write src failed: %v" , err )
790+ return false
791+ }
792+ src , err := os .Open (srcPath )
793+ if err != nil {
794+ t .Logf ("reflink probe: open src failed: %v" , err )
795+ return false
796+ }
797+ defer src .Close ()
798+ dst , err := os .OpenFile (dstPath , os .O_CREATE | os .O_TRUNC | os .O_WRONLY , 0644 )
799+ if err != nil {
800+ t .Logf ("reflink probe: open dst failed: %v" , err )
801+ return false
802+ }
803+ defer dst .Close ()
804+ if err := unix .IoctlFileClone (int (dst .Fd ()), int (src .Fd ())); err != nil {
805+ t .Logf ("reflink probe: FICLONE failed: %v" , err )
806+ return false
807+ }
808+ return true
809+ }
0 commit comments