diff --git a/lib/forkvm/copy.go b/lib/forkvm/copy.go index 6dc6eecc..fda4a48f 100644 --- a/lib/forkvm/copy.go +++ b/lib/forkvm/copy.go @@ -7,13 +7,37 @@ import ( "os" "path/filepath" "strings" + "sync/atomic" ) -var ErrSparseCopyUnsupported = errors.New("sparse copy unsupported") +var ( + ErrSparseCopyUnsupported = errors.New("sparse copy unsupported") + ErrReflinkUnsupported = errors.New("reflink unsupported") +) + +// reflinkDisabled, when nonzero, forces CopyGuestDirectory to skip the FICLONE +// fast path entirely. Tests set this; production code leaves it untouched. +var reflinkDisabled atomic.Bool + +// SetReflinkDisabled toggles the FICLONE fast path. Intended for tests that +// need to exercise the sparse-copy fallback explicitly. +func SetReflinkDisabled(disabled bool) { + reflinkDisabled.Store(disabled) +} + +// reflinkUnsupportedSticky tracks whether reflink has already been observed to +// fail with an "unsupported" signal for this destination filesystem. Once set, +// we skip subsequent FICLONE attempts within the same CopyGuestDirectory call +// to avoid re-paying the rejection on every file. +type copyState struct { + reflinkDead bool +} // CopyGuestDirectory recursively copies a guest directory to a new destination. -// Regular files are copied using sparse extent copy only (SEEK_DATA/SEEK_HOLE). -// Runtime sockets and logs are skipped because they are host-runtime artifacts. +// Regular files are cloned via reflink (FICLONE) when the underlying filesystem +// supports it; otherwise we fall back to a sparse extent copy +// (SEEK_DATA/SEEK_HOLE). Runtime sockets and logs are skipped because they are +// host-runtime artifacts. func CopyGuestDirectory(srcDir, dstDir string) error { srcInfo, err := os.Stat(srcDir) if err != nil { @@ -27,6 +51,11 @@ func CopyGuestDirectory(srcDir, dstDir string) error { return fmt.Errorf("create destination directory: %w", err) } + state := ©State{} + if reflinkDisabled.Load() { + state.reflinkDead = true + } + return filepath.WalkDir(srcDir, func(path string, d fs.DirEntry, walkErr error) error { if walkErr != nil { return walkErr @@ -61,7 +90,7 @@ func CopyGuestDirectory(srcDir, dstDir string) error { return nil case mode.IsRegular(): - if err := copyRegularFileSparse(path, dstPath, mode.Perm()); err != nil { + if err := copyRegularFile(state, path, dstPath, mode.Perm()); err != nil { return fmt.Errorf("copy file %s: %w", path, err) } return nil @@ -86,6 +115,26 @@ func CopyGuestDirectory(srcDir, dstDir string) error { }) } +// copyRegularFile clones path to dstPath, preferring FICLONE reflink and +// falling back to sparse extent copy. The state object lets us short-circuit +// future reflink attempts once we observe an "unsupported" signal from the +// destination filesystem in the current copy. +func copyRegularFile(state *copyState, srcPath, dstPath string, perms fs.FileMode) error { + if state == nil || !state.reflinkDead { + err := copyRegularFileReflink(srcPath, dstPath, perms) + if err == nil { + return nil + } + if !errors.Is(err, ErrReflinkUnsupported) { + return err + } + if state != nil { + state.reflinkDead = true + } + } + return copyRegularFileSparse(srcPath, dstPath, perms) +} + func shouldSkipDirectory(relPath string) bool { return relPath == "logs" } diff --git a/lib/forkvm/copy_reflink_linux.go b/lib/forkvm/copy_reflink_linux.go new file mode 100644 index 00000000..b16664c8 --- /dev/null +++ b/lib/forkvm/copy_reflink_linux.go @@ -0,0 +1,65 @@ +//go:build linux + +package forkvm + +import ( + "errors" + "fmt" + "io/fs" + "os" + + "golang.org/x/sys/unix" +) + +// copyRegularFileReflink attempts to clone srcPath to dstPath via FICLONE +// (reflink). On filesystems that support copy-on-write at the block layer +// (btrfs, xfs with reflink=1, zfs, bcachefs), this is effectively +// instantaneous and consumes no additional space until pages diverge. +// +// Returns ErrReflinkUnsupported when the filesystem or kernel rejects the +// operation; callers should fall back to a full-copy path. +func copyRegularFileReflink(srcPath, dstPath string, perms fs.FileMode) (retErr error) { + src, err := os.Open(srcPath) + if err != nil { + return err + } + defer src.Close() + + dst, err := os.OpenFile(dstPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, perms) + if err != nil { + return err + } + defer func() { + if cerr := dst.Close(); retErr == nil && cerr != nil { + retErr = cerr + } + if retErr != nil { + _ = os.Remove(dstPath) + } + }() + + if err := unix.IoctlFileClone(int(dst.Fd()), int(src.Fd())); err != nil { + if isReflinkUnsupportedError(err) { + return fmt.Errorf("%w: FICLONE rejected for %s: %v", ErrReflinkUnsupported, srcPath, err) + } + return fmt.Errorf("FICLONE %s -> %s: %w", srcPath, dstPath, err) + } + return nil +} + +// isReflinkUnsupportedError returns true when an FICLONE failure indicates the +// operation cannot be served by the filesystem and the caller should fall +// back. Real errors (EIO, ENOSPC) propagate as-is. +func isReflinkUnsupportedError(err error) bool { + switch { + case errors.Is(err, unix.EINVAL), + errors.Is(err, unix.ENOTSUP), + errors.Is(err, unix.EOPNOTSUPP), + errors.Is(err, unix.EXDEV), + errors.Is(err, unix.ETXTBSY), + errors.Is(err, unix.EISDIR), + errors.Is(err, unix.ENOTTY): + return true + } + return false +} diff --git a/lib/forkvm/copy_reflink_other.go b/lib/forkvm/copy_reflink_other.go new file mode 100644 index 00000000..f2a4fcad --- /dev/null +++ b/lib/forkvm/copy_reflink_other.go @@ -0,0 +1,17 @@ +//go:build !linux + +package forkvm + +import ( + "fmt" + "io/fs" +) + +// copyRegularFileReflink is unavailable on non-Linux platforms. On macOS APFS +// supports clonefile(2) and could be wired up here, but we currently only +// rely on the sparse-copy fallback off-Linux. +func copyRegularFileReflink(srcPath, dstPath string, perms fs.FileMode) error { + _ = dstPath + _ = perms + return fmt.Errorf("%w: reflink unsupported on this platform: %s", ErrReflinkUnsupported, srcPath) +} diff --git a/lib/forkvm/copy_reflink_test.go b/lib/forkvm/copy_reflink_test.go new file mode 100644 index 00000000..5c200b4b --- /dev/null +++ b/lib/forkvm/copy_reflink_test.go @@ -0,0 +1,56 @@ +package forkvm + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestCopyGuestDirectory_ReflinkFallback exercises the sparse-copy fallback +// path. The reflink fast path is fs-dependent and not portable across CI +// runners; this test forces it off and verifies copy correctness. +func TestCopyGuestDirectory_ReflinkFallback(t *testing.T) { + SetReflinkDisabled(true) + t.Cleanup(func() { SetReflinkDisabled(false) }) + + src := filepath.Join(t.TempDir(), "src") + dst := filepath.Join(t.TempDir(), "dst") + + require.NoError(t, os.MkdirAll(src, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(src, "rootfs.ext4"), []byte("rootfs-bytes"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(src, "config.json"), []byte(`{"x":1}`), 0644)) + + require.NoError(t, CopyGuestDirectory(src, dst)) + + got, err := os.ReadFile(filepath.Join(dst, "rootfs.ext4")) + require.NoError(t, err) + assert.Equal(t, "rootfs-bytes", string(got)) + + got, err = os.ReadFile(filepath.Join(dst, "config.json")) + require.NoError(t, err) + assert.Equal(t, `{"x":1}`, string(got)) +} + +// TestCopyGuestDirectory_ReflinkAttempted verifies that with reflink enabled +// (the default), the copy still produces a correct destination on filesystems +// where FICLONE either succeeds or falls back transparently. This is the +// happy-path smoke test for the new fast path; on filesystems that don't +// support FICLONE the fallback handles correctness. +func TestCopyGuestDirectory_ReflinkAttempted(t *testing.T) { + SetReflinkDisabled(false) + + src := filepath.Join(t.TempDir(), "src") + dst := filepath.Join(t.TempDir(), "dst") + + require.NoError(t, os.MkdirAll(src, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(src, "rootfs.ext4"), []byte("rootfs-bytes"), 0644)) + + require.NoError(t, CopyGuestDirectory(src, dst)) + + got, err := os.ReadFile(filepath.Join(dst, "rootfs.ext4")) + require.NoError(t, err) + assert.Equal(t, "rootfs-bytes", string(got)) +} diff --git a/lib/forkvm/copy_sparse_unix_test.go b/lib/forkvm/copy_sparse_unix_test.go index 7cf351df..109c3d5a 100644 --- a/lib/forkvm/copy_sparse_unix_test.go +++ b/lib/forkvm/copy_sparse_unix_test.go @@ -46,6 +46,9 @@ func TestCopyGuestDirectory_PreservesSparseFiles(t *testing.T) { } func TestCopyGuestDirectory_FailsWhenSparseSeekingUnsupported(t *testing.T) { + SetReflinkDisabled(true) + t.Cleanup(func() { SetReflinkDisabled(false) }) + src := filepath.Join(t.TempDir(), "src") dst := filepath.Join(t.TempDir(), "dst") require.NoError(t, os.MkdirAll(src, 0755)) diff --git a/lib/instances/reflink_check_linux_test.go b/lib/instances/reflink_check_linux_test.go new file mode 100644 index 00000000..a50aedad --- /dev/null +++ b/lib/instances/reflink_check_linux_test.go @@ -0,0 +1,143 @@ +//go:build linux + +package instances + +import ( + "io/fs" + "os" + "path/filepath" + "testing" + "unsafe" + + "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" +) + +const ( + fsIOCFiemap = 0xC020660B + fiemapFlagSync = 0x1 + fiemapMaxExtents = 64 +) + +type fiemapHeader struct { + Start uint64 + Length uint64 + Flags uint32 + MappedExtents uint32 + ExtentCount uint32 + Reserved uint32 +} + +type fiemapExtent struct { + Logical uint64 + Physical uint64 + Length uint64 + Reserved64 [2]uint64 + Flags uint32 + Reserved [3]uint32 +} + +type fiemapRequest struct { + Header fiemapHeader + Extents [fiemapMaxExtents]fiemapExtent +} + +func fileExtents(path string) ([]fiemapExtent, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + var req fiemapRequest + req.Header.Length = ^uint64(0) + req.Header.Flags = fiemapFlagSync + req.Header.ExtentCount = fiemapMaxExtents + + if _, _, errno := unix.Syscall(unix.SYS_IOCTL, f.Fd(), uintptr(fsIOCFiemap), uintptr(unsafe.Pointer(&req))); errno != 0 { + return nil, errno + } + return req.Extents[:req.Header.MappedExtents], nil +} + +// assertCopyReflinked walks srcDir and verifies that at least one regular +// file shares a physical extent with its counterpart under dstDir. A +// successful FICLONE leaves the destination pointing at the source's +// extents, so FIEMAP will report identical fe_physical offsets. If every +// inspected pair has disjoint extents, the FICLONE fast path silently +// degraded to a full byte copy and we want to fail loudly. Requires a +// reflink-capable filesystem under the test's scratch directory (XFS with +// reflink=1 in CI). +func assertCopyReflinked(t *testing.T, srcDir, dstDir string) { + t.Helper() + + type candidate struct { + rel string + size int64 + } + var candidates []candidate + require.NoError(t, filepath.WalkDir(srcDir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.Type().IsRegular() { + return nil + } + info, err := d.Info() + if err != nil { + return err + } + if info.Size() == 0 { + return nil + } + rel, err := filepath.Rel(srcDir, path) + if err != nil { + return err + } + candidates = append(candidates, candidate{rel: rel, size: info.Size()}) + return nil + })) + require.NotEmpty(t, candidates, "no non-empty regular files under %s", srcDir) + + var inspected, shared int + for _, c := range candidates { + dstPath := filepath.Join(dstDir, c.rel) + if _, err := os.Stat(dstPath); err != nil { + continue + } + srcExtents, err := fileExtents(filepath.Join(srcDir, c.rel)) + if err != nil { + t.Logf("FIEMAP %s: %v", c.rel, err) + continue + } + dstExtents, err := fileExtents(dstPath) + if err != nil { + t.Logf("FIEMAP %s: %v", dstPath, err) + continue + } + inspected++ + if extentsShareAny(srcExtents, dstExtents) { + shared++ + } + } + require.NotZero(t, inspected, "no files inspected for reflink sharing") + require.NotZero(t, shared, + "no files shared physical extents between %s and %s; FICLONE fast path produced full byte copies", + srcDir, dstDir) +} + +func extentsShareAny(a, b []fiemapExtent) bool { + if len(a) == 0 || len(b) == 0 { + return false + } + seen := make(map[uint64]struct{}, len(a)) + for _, e := range a { + seen[e.Physical] = struct{}{} + } + for _, e := range b { + if _, ok := seen[e.Physical]; ok { + return true + } + } + return false +} diff --git a/lib/instances/reflink_check_other_test.go b/lib/instances/reflink_check_other_test.go new file mode 100644 index 00000000..9a9b418f --- /dev/null +++ b/lib/instances/reflink_check_other_test.go @@ -0,0 +1,10 @@ +//go:build !linux + +package instances + +import "testing" + +func assertCopyReflinked(t *testing.T, srcDir, dstDir string) { + t.Helper() + t.Logf("reflink assertion skipped on non-Linux (src=%s dst=%s)", srcDir, dstDir) +} diff --git a/lib/instances/snapshot_integration_scenario_test.go b/lib/instances/snapshot_integration_scenario_test.go index 805a6203..fa351c00 100644 --- a/lib/instances/snapshot_integration_scenario_test.go +++ b/lib/instances/snapshot_integration_scenario_test.go @@ -106,4 +106,6 @@ func runStandbySnapshotScenario(t *testing.T, mgr *manager, tmpDir string, cfg s currentFork, err := mgr.GetInstance(ctx, forkID) requireNoErr(err) require.Equal(t, StateStandby, currentFork.State) + + assertCopyReflinked(t, p.SnapshotGuestDir(snapshot.Id), p.InstanceDir(forkID)) }