From 4d0e227ce57d33c980c1a6047f0c66b336755466 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Fri, 27 Mar 2026 18:19:12 -0700 Subject: [PATCH 1/9] Add zero copy Signed-off-by: Babis Chalios --- .../orchestrator/pkg/sandbox/block/cache.go | 68 ++++++++++++------- .../orchestrator/pkg/sandbox/diffcreator.go | 6 +- .../orchestrator/pkg/sandbox/rootfs/direct.go | 2 +- .../orchestrator/pkg/sandbox/rootfs/nbd.go | 3 +- .../orchestrator/pkg/sandbox/rootfs/rootfs.go | 3 +- packages/orchestrator/pkg/sandbox/sandbox.go | 2 +- .../shared/pkg/storage/header/metadata.go | 5 ++ 7 files changed, 56 insertions(+), 33 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index d9690b1de0..9de10dc289 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -4,11 +4,9 @@ import ( "context" "errors" "fmt" - "io" "math" "math/rand" "os" - "slices" "sync" "sync/atomic" "syscall" @@ -120,8 +118,8 @@ func (c *Cache) Sync() error { return nil } -func (c *Cache) ExportToDiff(ctx context.Context, out io.Writer) (*header.DiffMetadata, error) { - ctx, childSpan := tracer.Start(ctx, "export-to-diff") +func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMetadata, error) { + _, childSpan := tracer.Start(ctx, "export-to-diff") defer childSpan.End() c.mu.Lock() @@ -146,16 +144,52 @@ func (c *Cache) ExportToDiff(ctx context.Context, out io.Writer) (*header.DiffMe builder := header.NewDiffMetadataBuilder(c.size, c.blockSize) - for _, offset := range c.dirtySortedKeys() { - block := (*c.mmap)[offset : offset+c.blockSize] + // We don't need to sort the keys as the bitset handles the ordering. + c.dirty.Range(func(key, _ any) bool { + builder.AddDirtyOffset(key.(int64)) - err := builder.Process(ctx, block, out, offset) - if err != nil { - return nil, fmt.Errorf("error processing block %d: %w", offset, err) + return true + }) + + diffMetadata := builder.Build() + + f, err := os.Open(c.filePath) + if err != nil { + return nil, fmt.Errorf("error opening file: %w", err) + } + defer f.Close() + + var writeOffset int64 + + for r := range BitsetRanges(diffMetadata.Dirty, diffMetadata.BlockSize) { + remaining := int(r.Size) + readOffset := r.Start + + // The kernel may return short writes (e.g. capped at MAX_RW_COUNT on non-reflink filesystems), + // so we loop until the full range is copied. The offset pointers are advanced by the kernel. + for remaining > 0 { + // On XFS this uses reflink automatically. + n, err := unix.CopyFileRange( + int(f.Fd()), + &readOffset, + int(out.Fd()), + &writeOffset, + remaining, + 0, + ) + if err != nil { + return nil, fmt.Errorf("error copying file range: %w", err) + } + + if n == 0 { + return nil, fmt.Errorf("copy_file_range returned 0 with %d bytes remaining", remaining) + } + + remaining -= n } } - return builder.Build(), nil + return diffMetadata, nil } func (c *Cache) ReadAt(b []byte, off int64) (int, error) { @@ -291,20 +325,6 @@ func (c *Cache) WriteAtWithoutLock(b []byte, off int64) (int, error) { return n, nil } -// dirtySortedKeys returns a sorted list of dirty keys. -// Key represents a block offset. -func (c *Cache) dirtySortedKeys() []int64 { - var keys []int64 - c.dirty.Range(func(key, _ any) bool { - keys = append(keys, key.(int64)) - - return true - }) - slices.Sort(keys) - - return keys -} - // FileSize returns the size of the cache on disk. // The size might differ from the dirty size, as it may not be fully on disk. func (c *Cache) FileSize() (int64, error) { diff --git a/packages/orchestrator/pkg/sandbox/diffcreator.go b/packages/orchestrator/pkg/sandbox/diffcreator.go index a55221d984..bd4a27e30a 100644 --- a/packages/orchestrator/pkg/sandbox/diffcreator.go +++ b/packages/orchestrator/pkg/sandbox/diffcreator.go @@ -2,14 +2,14 @@ package sandbox import ( "context" - "io" + "os" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/rootfs" "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" ) type DiffCreator interface { - process(ctx context.Context, out io.Writer) (*header.DiffMetadata, error) + process(ctx context.Context, out *os.File) (*header.DiffMetadata, error) } type RootfsDiffCreator struct { @@ -17,6 +17,6 @@ type RootfsDiffCreator struct { closeHook func(context.Context) error } -func (r *RootfsDiffCreator) process(ctx context.Context, out io.Writer) (*header.DiffMetadata, error) { +func (r *RootfsDiffCreator) process(ctx context.Context, out *os.File) (*header.DiffMetadata, error) { return r.rootfs.ExportDiff(ctx, out, r.closeHook) } diff --git a/packages/orchestrator/pkg/sandbox/rootfs/direct.go b/packages/orchestrator/pkg/sandbox/rootfs/direct.go index edea9b73ef..13bc98cb4d 100644 --- a/packages/orchestrator/pkg/sandbox/rootfs/direct.go +++ b/packages/orchestrator/pkg/sandbox/rootfs/direct.go @@ -69,7 +69,7 @@ func (o *DirectProvider) Start(_ context.Context) error { func (o *DirectProvider) ExportDiff( ctx context.Context, - out io.Writer, + out *os.File, stopSandbox func(context.Context) error, ) (*header.DiffMetadata, error) { ctx, childSpan := tracer.Start(ctx, "direct-provider-export") diff --git a/packages/orchestrator/pkg/sandbox/rootfs/nbd.go b/packages/orchestrator/pkg/sandbox/rootfs/nbd.go index a29baf9e5a..6a3f63e457 100644 --- a/packages/orchestrator/pkg/sandbox/rootfs/nbd.go +++ b/packages/orchestrator/pkg/sandbox/rootfs/nbd.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "io" "os" "go.uber.org/zap" @@ -71,7 +70,7 @@ func (o *NBDProvider) Start(ctx context.Context) error { func (o *NBDProvider) ExportDiff( ctx context.Context, - out io.Writer, + out *os.File, closeSandbox func(ctx context.Context) error, ) (*header.DiffMetadata, error) { ctx, span := tracer.Start(ctx, "cow-export") diff --git a/packages/orchestrator/pkg/sandbox/rootfs/rootfs.go b/packages/orchestrator/pkg/sandbox/rootfs/rootfs.go index b6802a3a26..b3940536b6 100644 --- a/packages/orchestrator/pkg/sandbox/rootfs/rootfs.go +++ b/packages/orchestrator/pkg/sandbox/rootfs/rootfs.go @@ -3,7 +3,6 @@ package rootfs import ( "context" "fmt" - "io" "os" "syscall" @@ -22,7 +21,7 @@ type Provider interface { Start(ctx context.Context) error Close(ctx context.Context) error Path() (string, error) - ExportDiff(ctx context.Context, out io.Writer, closeSandbox func(context.Context) error) (*header.DiffMetadata, error) + ExportDiff(ctx context.Context, out *os.File, closeSandbox func(context.Context) error) (*header.DiffMetadata, error) } // flush flushes the data to the operating system's buffer. diff --git a/packages/orchestrator/pkg/sandbox/sandbox.go b/packages/orchestrator/pkg/sandbox/sandbox.go index a03d94efb3..3e4048d8c6 100644 --- a/packages/orchestrator/pkg/sandbox/sandbox.go +++ b/packages/orchestrator/pkg/sandbox/sandbox.go @@ -1194,7 +1194,7 @@ func pauseProcessRootfs( return nil, nil, fmt.Errorf("failed to create rootfs diff: %w", err) } - rootfsDiffMetadata, err := diffCreator.process(ctx, rootfsDiffFile) + rootfsDiffMetadata, err := diffCreator.process(ctx, rootfsDiffFile.File) if err != nil { err = errors.Join(err, rootfsDiffFile.Close()) diff --git a/packages/shared/pkg/storage/header/metadata.go b/packages/shared/pkg/storage/header/metadata.go index 32dac10d19..574dea78bf 100644 --- a/packages/shared/pkg/storage/header/metadata.go +++ b/packages/shared/pkg/storage/header/metadata.go @@ -114,6 +114,7 @@ type DiffMetadataBuilder struct { func NewDiffMetadataBuilder(size, blockSize int64) *DiffMetadataBuilder { return &DiffMetadataBuilder{ + // TODO: We might be able to start with 0 as preallocating here actually takes space. dirty: bitset.New(uint(TotalBlocks(size, blockSize))), empty: bitset.New(0), @@ -121,6 +122,10 @@ func NewDiffMetadataBuilder(size, blockSize int64) *DiffMetadataBuilder { } } +func (b *DiffMetadataBuilder) AddDirtyOffset(offset int64) { + b.dirty.Set(uint(BlockIdx(offset, b.blockSize))) +} + func (b *DiffMetadataBuilder) Process(ctx context.Context, block []byte, out io.Writer, offset int64) error { blockIdx := BlockIdx(offset, b.blockSize) From f0e47e07cfe4f00d97576b33247e1cc57f113fca Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Fri, 27 Mar 2026 18:33:23 -0700 Subject: [PATCH 2/9] Cleanup Signed-off-by: Babis Chalios --- packages/orchestrator/pkg/sandbox/block/cache.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index 9de10dc289..d5adb6af6f 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -159,6 +159,8 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet } defer f.Close() + src := int(f.Fd()) + dst := int(out.Fd()) var writeOffset int64 for r := range BitsetRanges(diffMetadata.Dirty, diffMetadata.BlockSize) { @@ -170,9 +172,9 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet for remaining > 0 { // On XFS this uses reflink automatically. n, err := unix.CopyFileRange( - int(f.Fd()), + src, &readOffset, - int(out.Fd()), + dst, &writeOffset, remaining, 0, From 897ca0fa894b9d246aa080b312cff19e738e28a7 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Fri, 27 Mar 2026 18:48:47 -0700 Subject: [PATCH 3/9] Add measurement Signed-off-by: Babis Chalios --- .../orchestrator/pkg/sandbox/block/cache.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index d5adb6af6f..3e70b9775e 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -15,9 +15,11 @@ import ( "github.com/bits-and-blooms/bitset" "github.com/edsrzf/mmap-go" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "golang.org/x/sys/unix" "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" + "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" ) const ( @@ -119,7 +121,7 @@ func (c *Cache) Sync() error { } func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMetadata, error) { - _, childSpan := tracer.Start(ctx, "export-to-diff") + ctx, childSpan := tracer.Start(ctx, "export-to-diff") defer childSpan.End() c.mu.Lock() @@ -137,11 +139,14 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet }, nil } + flushStart := time.Now() err := c.mmap.Flush() if err != nil { return nil, fmt.Errorf("error flushing mmap: %w", err) } + telemetry.SetAttributes(ctx, attribute.Int64("flush_ms", time.Since(flushStart).Milliseconds())) + buildStart := time.Now() builder := header.NewDiffMetadataBuilder(c.size, c.blockSize) // We don't need to sort the keys as the bitset handles the ordering. @@ -152,6 +157,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet }) diffMetadata := builder.Build() + telemetry.SetAttributes(ctx, attribute.Int64("build_metadata_ms", time.Since(buildStart).Milliseconds())) f, err := os.Open(c.filePath) if err != nil { @@ -163,12 +169,13 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet dst := int(out.Fd()) var writeOffset int64 + copyStart := time.Now() for r := range BitsetRanges(diffMetadata.Dirty, diffMetadata.BlockSize) { remaining := int(r.Size) readOffset := r.Start // The kernel may return short writes (e.g. capped at MAX_RW_COUNT on non-reflink filesystems), - // so we loop until the full range is copied. The offset pointers are advanced by the kernel. + // so we loop until the full range is copied. The offset pointers are advanced by the kernel. for remaining > 0 { // On XFS this uses reflink automatically. n, err := unix.CopyFileRange( @@ -191,6 +198,12 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet } } + telemetry.SetAttributes(ctx, + attribute.Int64("copy_ms", time.Since(copyStart).Milliseconds()), + attribute.Int64("total_size_bytes", c.size), + attribute.Int64("dirty_size_bytes", int64(diffMetadata.Dirty.Count())*c.blockSize), + ) + return diffMetadata, nil } From 7a8675d23c82be041b51259ff5d2df90d873788f Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Fri, 27 Mar 2026 18:51:27 -0700 Subject: [PATCH 4/9] Add ranges metric Signed-off-by: Babis Chalios --- packages/orchestrator/pkg/sandbox/block/cache.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index 3e70b9775e..ebac4240b3 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -168,9 +168,11 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet src := int(f.Fd()) dst := int(out.Fd()) var writeOffset int64 + var totalRanges int64 copyStart := time.Now() for r := range BitsetRanges(diffMetadata.Dirty, diffMetadata.BlockSize) { + totalRanges++ remaining := int(r.Size) readOffset := r.Start @@ -202,6 +204,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet attribute.Int64("copy_ms", time.Since(copyStart).Milliseconds()), attribute.Int64("total_size_bytes", c.size), attribute.Int64("dirty_size_bytes", int64(diffMetadata.Dirty.Count())*c.blockSize), + attribute.Int64("total_ranges", totalRanges), ) return diffMetadata, nil From c625d8c3addec0b7abb1b8f776e4848fb5ad38b2 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 28 Mar 2026 18:48:15 -0700 Subject: [PATCH 5/9] Cleanup flush Signed-off-by: Babis Chalios --- .../orchestrator/pkg/sandbox/block/cache.go | 43 ++++++------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index ebac4240b3..52534e1d70 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -100,26 +100,6 @@ func (c *Cache) isClosed() bool { return c.closed.Load() } -func (c *Cache) Sync() error { - c.mu.Lock() - defer c.mu.Unlock() - - if c.isClosed() { - return NewErrCacheClosed(c.filePath) - } - - if c.mmap == nil { - return nil - } - - err := c.mmap.Flush() - if err != nil { - return fmt.Errorf("error syncing cache: %w", err) - } - - return nil -} - func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMetadata, error) { ctx, childSpan := tracer.Start(ctx, "export-to-diff") defer childSpan.End() @@ -139,12 +119,20 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet }, nil } - flushStart := time.Now() - err := c.mmap.Flush() + f, err := os.Open(c.filePath) + if err != nil { + return nil, fmt.Errorf("error opening file: %w", err) + } + defer f.Close() + + src := int(f.Fd()) + + // Explicit mmap flush is not necessary, because the kernel will handle that as part of the copy_file_range syscall. + // Calling sync_file_range marks the range for writeback and starts it early. + err = unix.SyncFileRange(src, 0, c.size, unix.SYNC_FILE_RANGE_WRITE) if err != nil { - return nil, fmt.Errorf("error flushing mmap: %w", err) + return nil, fmt.Errorf("error syncing file: %w", err) } - telemetry.SetAttributes(ctx, attribute.Int64("flush_ms", time.Since(flushStart).Milliseconds())) buildStart := time.Now() builder := header.NewDiffMetadataBuilder(c.size, c.blockSize) @@ -159,13 +147,6 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet diffMetadata := builder.Build() telemetry.SetAttributes(ctx, attribute.Int64("build_metadata_ms", time.Since(buildStart).Milliseconds())) - f, err := os.Open(c.filePath) - if err != nil { - return nil, fmt.Errorf("error opening file: %w", err) - } - defer f.Close() - - src := int(f.Fd()) dst := int(out.Fd()) var writeOffset int64 var totalRanges int64 From 957cd8a0c95d2c866848bb1bb1711442ca30298d Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Thu, 2 Apr 2026 12:49:52 +0200 Subject: [PATCH 6/9] fix(nbd): ensure cache closed on failures In nbd.ExportDiff() we were returning early in case something was going wrong downstream without calling cache.Close(). This causes a leak of mmap'ed memory. Make sure we call cache.Close() in those cases. cache.Close() itself, might fail. In these cases, log a warning and return the initial error. Signed-off-by: Babis Chalios --- packages/orchestrator/pkg/sandbox/rootfs/nbd.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/packages/orchestrator/pkg/sandbox/rootfs/nbd.go b/packages/orchestrator/pkg/sandbox/rootfs/nbd.go index 6a3f63e457..6fa0f85ac2 100644 --- a/packages/orchestrator/pkg/sandbox/rootfs/nbd.go +++ b/packages/orchestrator/pkg/sandbox/rootfs/nbd.go @@ -92,12 +92,26 @@ func (o *NBDProvider) ExportDiff( select { case <-o.finishedOperations: case <-ctx.Done(): + // Close the cache to avoid leaking the mmaped memory. Log an error + // if that failed + closeErr := cache.Close() + if closeErr != nil { + logger.L().Warn(ctx, "error closing cache", zap.Error(closeErr)) + } + return nil, fmt.Errorf("timeout waiting for overlay device to be released") } telemetry.ReportEvent(ctx, "sandbox stopped") m, err := cache.ExportToDiff(ctx, out) if err != nil { + // Close the cache to avoid leaking the mmaped memory. Log an error + // if that failed + closeErr := cache.Close() + if closeErr != nil { + logger.L().Warn(ctx, "error closing cache", zap.Error(closeErr)) + } + return nil, fmt.Errorf("error exporting cache: %w", err) } From c0d4dc1a2651fee48374748ffebe32e6f03b1017 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Thu, 2 Apr 2026 12:56:11 +0200 Subject: [PATCH 7/9] fix(cache): don't hard fail on SyncFileRange SyncFileRange is an optimization to let the kernel start wrting back data from the mmap to the disk on XFS. If that fails, we can rely on the CopyFileRange to actually performs the operation. Just log a warning on SyncFileRange failures and hard fail on CopyFileRange. Signed-off-by: Babis Chalios --- packages/orchestrator/pkg/sandbox/block/cache.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index 52534e1d70..c7a78b9895 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -16,8 +16,10 @@ import ( "github.com/edsrzf/mmap-go" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" + "go.uber.org/zap" "golang.org/x/sys/unix" + "github.com/e2b-dev/infra/packages/shared/pkg/logger" "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" ) @@ -129,9 +131,10 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet // Explicit mmap flush is not necessary, because the kernel will handle that as part of the copy_file_range syscall. // Calling sync_file_range marks the range for writeback and starts it early. + // This is just an optimization, so if it fails just log a warning and let copy_file_range do the actual work. err = unix.SyncFileRange(src, 0, c.size, unix.SYNC_FILE_RANGE_WRITE) if err != nil { - return nil, fmt.Errorf("error syncing file: %w", err) + logger.L().Warn(ctx, "error syncing file", zap.Error(err)) } buildStart := time.Now() From d354f78efe71e43a1037926985fb153bb5073a4c Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Thu, 2 Apr 2026 14:53:25 +0200 Subject: [PATCH 8/9] fix(cache): add fallback for unsupported copy_file_range When copy_file_range fails with EXDEV, EOPNOTSUPP, or ENOSYS (e.g. cross-device copies or filesystems that don't support the syscall), fall back to a regular io.Copy for the remainder of the export. Other errors still hard-fail. Signed-off-by: Babis Chalios --- .../orchestrator/pkg/sandbox/block/cache.go | 50 +++++++++++++------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index c7a78b9895..504223f956 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io" "math" "math/rand" "os" @@ -153,6 +154,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet dst := int(out.Fd()) var writeOffset int64 var totalRanges int64 + fallback := false copyStart := time.Now() for r := range BitsetRanges(diffMetadata.Dirty, diffMetadata.BlockSize) { @@ -163,24 +165,42 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet // The kernel may return short writes (e.g. capped at MAX_RW_COUNT on non-reflink filesystems), // so we loop until the full range is copied. The offset pointers are advanced by the kernel. for remaining > 0 { - // On XFS this uses reflink automatically. - n, err := unix.CopyFileRange( - src, - &readOffset, - dst, - &writeOffset, - remaining, - 0, - ) - if err != nil { - return nil, fmt.Errorf("error copying file range: %w", err) + if !fallback { + // On XFS this uses reflink automatically. + n, err := unix.CopyFileRange( + src, + &readOffset, + dst, + &writeOffset, + remaining, + 0, + ) + switch { + case errors.Is(err, syscall.EXDEV) || errors.Is(err, syscall.EOPNOTSUPP) || errors.Is(err, syscall.ENOSYS): + fallback = true + logger.L().Warn(ctx, "copy_file_range unsupported, falling back to normal copy", zap.Error(err)) + case err != nil: + return nil, fmt.Errorf("error copying file range: %w", err) + case n == 0: + return nil, fmt.Errorf("copy_file_range returned 0 with %d bytes remaining", remaining) + default: + remaining -= n + } } - if n == 0 { - return nil, fmt.Errorf("copy_file_range returned 0 with %d bytes remaining", remaining) + // CopyFileRange failed. Falling back to normal copy + if fallback && remaining > 0 { + if _, err := out.Seek(writeOffset, io.SeekStart); err != nil { + return nil, fmt.Errorf("error seeking: %w", err) + } + sr := io.NewSectionReader(f, readOffset, int64(remaining)) + if _, err := io.Copy(out, sr); err != nil { + return nil, fmt.Errorf("error copying file range. %w", err) + } + + writeOffset += int64(remaining) + remaining = 0 } - - remaining -= n } } From 0af2e06fd9e26311fb077d5ffbe77f5abc930a19 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Thu, 2 Apr 2026 15:30:39 +0200 Subject: [PATCH 9/9] fix(cache): use correct mmap-go constant for read-write mapping mmap.MapRegion expects the library's own constants (mmap.RDWR), not raw unix PROT_* flags. Passing unix.PROT_READ|unix.PROT_WRITE (=3) happened to work due to numerical coincidence, but is fragile. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Babis Chalios --- packages/orchestrator/pkg/sandbox/block/cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index 504223f956..7bf39d7a20 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -85,7 +85,7 @@ func NewCache(size, blockSize int64, filePath string, dirtyFile bool) (*Cache, e return nil, fmt.Errorf("size too big: %d > %d", size, math.MaxInt) } - mm, err := mmap.MapRegion(f, int(size), unix.PROT_READ|unix.PROT_WRITE, 0, 0) + mm, err := mmap.MapRegion(f, int(size), mmap.RDWR, 0, 0) if err != nil { return nil, fmt.Errorf("error mapping file: %w", err) }