@@ -20,6 +20,7 @@ import (
2020 "go.uber.org/zap"
2121 "golang.org/x/sys/unix"
2222
23+ "github.com/e2b-dev/infra/packages/shared/pkg/atomicbitset"
2324 "github.com/e2b-dev/infra/packages/shared/pkg/logger"
2425 "github.com/e2b-dev/infra/packages/shared/pkg/storage/header"
2526 "github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
@@ -47,26 +48,17 @@ func NewErrCacheClosed(filePath string) *CacheClosedError {
4748}
4849
4950type Cache struct {
50- filePath string
51- size int64
52- blockSize int64
53- mmap * mmap.MMap
54- mu sync.RWMutex
55- dirty sync.Map
56- dirtyGranularity int64
57- dirtyFile bool
58- closed atomic.Bool
51+ filePath string
52+ size int64
53+ blockSize int64
54+ mmap * mmap.MMap
55+ mu sync.RWMutex
56+ dirty * atomicbitset.Bitset
57+ dirtyFile bool
58+ closed atomic.Bool
5959}
6060
61- // NewCache creates a cache with dirty tracking at blockSize granularity.
62- // When we are passing filePath that is a file that has content we want to server want to use dirtyFile = true.
6361func NewCache (size , blockSize int64 , filePath string , dirtyFile bool ) (* Cache , error ) {
64- return NewCacheWithDirtyGranularity (size , blockSize , blockSize , filePath , dirtyFile )
65- }
66-
67- // NewCacheWithDirtyGranularity creates a cache with dirty tracking at the specified granularity.
68- // For chunker caches, dirtyGranularity can be larger than blockSize to reduce dirty map overhead.
69- func NewCacheWithDirtyGranularity (size , blockSize , dirtyGranularity int64 , filePath string , dirtyFile bool ) (* Cache , error ) {
7062 f , err := os .OpenFile (filePath , os .O_RDWR | os .O_CREATE , 0o644 )
7163 if err != nil {
7264 return nil , fmt .Errorf ("error opening file: %w" , err )
@@ -76,11 +68,11 @@ func NewCacheWithDirtyGranularity(size, blockSize, dirtyGranularity int64, fileP
7668
7769 if size == 0 {
7870 return & Cache {
79- filePath : filePath ,
80- size : size ,
81- blockSize : blockSize ,
82- dirtyGranularity : dirtyGranularity ,
83- dirtyFile : dirtyFile ,
71+ filePath : filePath ,
72+ size : size ,
73+ blockSize : blockSize ,
74+ dirtyFile : dirtyFile ,
75+ dirty : atomicbitset . New () ,
8476 }, nil
8577 }
8678
@@ -100,12 +92,12 @@ func NewCacheWithDirtyGranularity(size, blockSize, dirtyGranularity int64, fileP
10092 }
10193
10294 return & Cache {
103- mmap : & mm ,
104- filePath : filePath ,
105- size : size ,
106- blockSize : blockSize ,
107- dirtyGranularity : dirtyGranularity ,
108- dirtyFile : dirtyFile ,
95+ mmap : & mm ,
96+ filePath : filePath ,
97+ size : size ,
98+ blockSize : blockSize ,
99+ dirtyFile : dirtyFile ,
100+ dirty : atomicbitset . New () ,
109101 }, nil
110102}
111103
@@ -117,10 +109,6 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet
117109 ctx , childSpan := tracer .Start (ctx , "export-to-diff" )
118110 defer childSpan .End ()
119111
120- if c .dirtyGranularity != c .blockSize {
121- return nil , fmt .Errorf ("ExportToDiff requires block-level dirty tracking (granularity %d != blockSize %d)" , c .dirtyGranularity , c .blockSize )
122- }
123-
124112 c .mu .Lock ()
125113 defer c .mu .Unlock ()
126114
@@ -129,11 +117,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet
129117 }
130118
131119 if c .mmap == nil {
132- return & header.DiffMetadata {
133- Dirty : bitset .New (0 ),
134- Empty : bitset .New (0 ),
135- BlockSize : c .blockSize ,
136- }, nil
120+ return header .NewDiffMetadata (c .blockSize , bitset .New (0 )), nil
137121 }
138122
139123 f , err := os .Open (c .filePath )
@@ -152,18 +136,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet
152136 logger .L ().Warn (ctx , "error syncing file" , zap .Error (err ))
153137 }
154138
155- buildStart := time .Now ()
156- builder := header .NewDiffMetadataBuilder (c .size , c .blockSize )
157-
158- // We don't need to sort the keys as the bitset handles the ordering.
159- c .dirty .Range (func (key , _ any ) bool {
160- builder .AddDirtyOffset (key .(int64 ))
161-
162- return true
163- })
164-
165- diffMetadata := builder .Build ()
166- telemetry .SetAttributes (ctx , attribute .Int64 ("build_metadata_ms" , time .Since (buildStart ).Milliseconds ()))
139+ diffMetadata := header .NewDiffMetadata (c .blockSize , c .dirty .BitSet ())
167140
168141 dst := int (out .Fd ())
169142 var writeOffset int64
@@ -336,45 +309,17 @@ func (c *Cache) sliceDirect(off, length int64) ([]byte, error) {
336309}
337310
338311func (c * Cache ) isCached (off , length int64 ) bool {
339- // Zero-length is vacuously true (no-op)
340- if length <= 0 {
341- return true
342- }
343-
344- // Make sure the offset is within the cache size
345- if off >= c .size {
346- return false
347- }
348-
349- // Cap if the length goes beyond the cache size, so we don't check for blocks that are out of bounds.
350- end := min (off + length , c .size )
312+ start := uint64 (header .BlockIdx (off , c .blockSize ))
313+ end := uint64 (header .BlockCeilIdx (min (off + length , c .size ), c .blockSize ))
351314
352- startKey := (off / c .dirtyGranularity ) * c .dirtyGranularity
353- endKey := ((end - 1 ) / c .dirtyGranularity ) * c .dirtyGranularity
354-
355- for key := startKey ; key <= endKey ; key += c .dirtyGranularity {
356- if _ , ok := c .dirty .Load (key ); ! ok {
357- return false
358- }
359- }
360-
361- return true
315+ return c .dirty .HasRange (start , end )
362316}
363317
364318func (c * Cache ) setIsCached (off , length int64 ) {
365- // Zero-length is a no-op
366- if length <= 0 {
367- return
368- }
369-
370- end := off + length
319+ start := uint64 (header .BlockIdx (off , c .blockSize ))
320+ end := uint64 (header .BlockCeilIdx (off + length , c .blockSize ))
371321
372- startKey := (off / c .dirtyGranularity ) * c .dirtyGranularity
373- endKey := ((end - 1 ) / c .dirtyGranularity ) * c .dirtyGranularity
374-
375- for key := startKey ; key <= endKey ; key += c .dirtyGranularity {
376- c .dirty .Store (key , struct {}{})
377- }
322+ c .dirty .SetRange (start , end )
378323}
379324
380325// When using WriteAtWithoutLock you must ensure thread safety, ideally by only writing to the same block once and the exposing the slice.
0 commit comments