Skip to content

Commit 481aa06

Browse files
levbValentaTomas
andauthored
perf(block): sync.Map -> atomic bitmap for cache dirty tracking (#2235)
Co-authored-by: ValentaTomas <valenta.and.thomas@gmail.com>
1 parent 0c9821b commit 481aa06

13 files changed

Lines changed: 274 additions & 106 deletions

File tree

packages/api/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ require (
101101
github.com/beorn7/perks v1.0.1 // indirect
102102
github.com/bep/godartsass/v2 v2.3.2 // indirect
103103
github.com/bep/golibsass v1.2.0 // indirect
104-
github.com/bits-and-blooms/bitset v1.22.0 // indirect
104+
github.com/bits-and-blooms/bitset v1.24.2 // indirect
105105
github.com/bits-and-blooms/bloom/v3 v3.7.0 // indirect
106106
github.com/bytedance/gopkg v0.1.4 // indirect
107107
github.com/bytedance/sonic v1.15.0 // indirect

packages/api/go.sum

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/orchestrator/go.mod

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ require (
2020
github.com/aws/aws-sdk-go-v2/config v1.32.6
2121
github.com/aws/aws-sdk-go-v2/credentials v1.19.6
2222
github.com/aws/aws-sdk-go-v2/service/ecr v1.44.0
23-
github.com/bits-and-blooms/bitset v1.22.0
23+
github.com/bits-and-blooms/bitset v1.24.2
2424
github.com/bmatcuk/doublestar/v4 v4.9.1
2525
github.com/caarlos0/env/v11 v11.3.1
2626
github.com/containernetworking/plugins v1.9.0
@@ -97,6 +97,7 @@ require (
9797
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0 // indirect
9898
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0 // indirect
9999
github.com/Microsoft/go-winio v0.6.2 // indirect
100+
github.com/RoaringBitmap/roaring/v2 v2.16.1 // indirect
100101
github.com/andybalholm/brotli v1.2.0 // indirect
101102
github.com/armon/go-metrics v0.4.1 // indirect
102103
github.com/aws/aws-sdk-go-v2 v1.41.0 // indirect
@@ -254,6 +255,7 @@ require (
254255
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
255256
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
256257
github.com/morikuni/aec v1.0.0 // indirect
258+
github.com/mschoch/smat v0.2.0 // indirect
257259
github.com/oapi-codegen/oapi-codegen/v2 v2.5.1 // indirect
258260
github.com/oasdiff/yaml v0.0.0-20250309154309-f31be36b4037 // indirect
259261
github.com/oasdiff/yaml3 v0.0.0-20250309153720-d2182401db90 // indirect

packages/orchestrator/go.sum

Lines changed: 6 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/orchestrator/pkg/sandbox/block/cache.go

Lines changed: 28 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"go.uber.org/zap"
2121
"golang.org/x/sys/unix"
2222

23+
"github.com/e2b-dev/infra/packages/shared/pkg/atomicbitset"
2324
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
2425
"github.com/e2b-dev/infra/packages/shared/pkg/storage/header"
2526
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
@@ -47,26 +48,17 @@ func NewErrCacheClosed(filePath string) *CacheClosedError {
4748
}
4849

4950
type Cache struct {
50-
filePath string
51-
size int64
52-
blockSize int64
53-
mmap *mmap.MMap
54-
mu sync.RWMutex
55-
dirty sync.Map
56-
dirtyGranularity int64
57-
dirtyFile bool
58-
closed atomic.Bool
51+
filePath string
52+
size int64
53+
blockSize int64
54+
mmap *mmap.MMap
55+
mu sync.RWMutex
56+
dirty *atomicbitset.Bitset
57+
dirtyFile bool
58+
closed atomic.Bool
5959
}
6060

61-
// NewCache creates a cache with dirty tracking at blockSize granularity.
62-
// When we are passing filePath that is a file that has content we want to server want to use dirtyFile = true.
6361
func NewCache(size, blockSize int64, filePath string, dirtyFile bool) (*Cache, error) {
64-
return NewCacheWithDirtyGranularity(size, blockSize, blockSize, filePath, dirtyFile)
65-
}
66-
67-
// NewCacheWithDirtyGranularity creates a cache with dirty tracking at the specified granularity.
68-
// For chunker caches, dirtyGranularity can be larger than blockSize to reduce dirty map overhead.
69-
func NewCacheWithDirtyGranularity(size, blockSize, dirtyGranularity int64, filePath string, dirtyFile bool) (*Cache, error) {
7062
f, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE, 0o644)
7163
if err != nil {
7264
return nil, fmt.Errorf("error opening file: %w", err)
@@ -76,11 +68,11 @@ func NewCacheWithDirtyGranularity(size, blockSize, dirtyGranularity int64, fileP
7668

7769
if size == 0 {
7870
return &Cache{
79-
filePath: filePath,
80-
size: size,
81-
blockSize: blockSize,
82-
dirtyGranularity: dirtyGranularity,
83-
dirtyFile: dirtyFile,
71+
filePath: filePath,
72+
size: size,
73+
blockSize: blockSize,
74+
dirtyFile: dirtyFile,
75+
dirty: atomicbitset.New(),
8476
}, nil
8577
}
8678

@@ -100,12 +92,12 @@ func NewCacheWithDirtyGranularity(size, blockSize, dirtyGranularity int64, fileP
10092
}
10193

10294
return &Cache{
103-
mmap: &mm,
104-
filePath: filePath,
105-
size: size,
106-
blockSize: blockSize,
107-
dirtyGranularity: dirtyGranularity,
108-
dirtyFile: dirtyFile,
95+
mmap: &mm,
96+
filePath: filePath,
97+
size: size,
98+
blockSize: blockSize,
99+
dirtyFile: dirtyFile,
100+
dirty: atomicbitset.New(),
109101
}, nil
110102
}
111103

@@ -117,10 +109,6 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet
117109
ctx, childSpan := tracer.Start(ctx, "export-to-diff")
118110
defer childSpan.End()
119111

120-
if c.dirtyGranularity != c.blockSize {
121-
return nil, fmt.Errorf("ExportToDiff requires block-level dirty tracking (granularity %d != blockSize %d)", c.dirtyGranularity, c.blockSize)
122-
}
123-
124112
c.mu.Lock()
125113
defer c.mu.Unlock()
126114

@@ -129,11 +117,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet
129117
}
130118

131119
if c.mmap == nil {
132-
return &header.DiffMetadata{
133-
Dirty: bitset.New(0),
134-
Empty: bitset.New(0),
135-
BlockSize: c.blockSize,
136-
}, nil
120+
return header.NewDiffMetadata(c.blockSize, bitset.New(0)), nil
137121
}
138122

139123
f, err := os.Open(c.filePath)
@@ -152,18 +136,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet
152136
logger.L().Warn(ctx, "error syncing file", zap.Error(err))
153137
}
154138

155-
buildStart := time.Now()
156-
builder := header.NewDiffMetadataBuilder(c.size, c.blockSize)
157-
158-
// We don't need to sort the keys as the bitset handles the ordering.
159-
c.dirty.Range(func(key, _ any) bool {
160-
builder.AddDirtyOffset(key.(int64))
161-
162-
return true
163-
})
164-
165-
diffMetadata := builder.Build()
166-
telemetry.SetAttributes(ctx, attribute.Int64("build_metadata_ms", time.Since(buildStart).Milliseconds()))
139+
diffMetadata := header.NewDiffMetadata(c.blockSize, c.dirty.BitSet())
167140

168141
dst := int(out.Fd())
169142
var writeOffset int64
@@ -336,45 +309,17 @@ func (c *Cache) sliceDirect(off, length int64) ([]byte, error) {
336309
}
337310

338311
func (c *Cache) isCached(off, length int64) bool {
339-
// Zero-length is vacuously true (no-op)
340-
if length <= 0 {
341-
return true
342-
}
343-
344-
// Make sure the offset is within the cache size
345-
if off >= c.size {
346-
return false
347-
}
348-
349-
// Cap if the length goes beyond the cache size, so we don't check for blocks that are out of bounds.
350-
end := min(off+length, c.size)
312+
start := uint64(header.BlockIdx(off, c.blockSize))
313+
end := uint64(header.BlockCeilIdx(min(off+length, c.size), c.blockSize))
351314

352-
startKey := (off / c.dirtyGranularity) * c.dirtyGranularity
353-
endKey := ((end - 1) / c.dirtyGranularity) * c.dirtyGranularity
354-
355-
for key := startKey; key <= endKey; key += c.dirtyGranularity {
356-
if _, ok := c.dirty.Load(key); !ok {
357-
return false
358-
}
359-
}
360-
361-
return true
315+
return c.dirty.HasRange(start, end)
362316
}
363317

364318
func (c *Cache) setIsCached(off, length int64) {
365-
// Zero-length is a no-op
366-
if length <= 0 {
367-
return
368-
}
369-
370-
end := off + length
319+
start := uint64(header.BlockIdx(off, c.blockSize))
320+
end := uint64(header.BlockCeilIdx(off+length, c.blockSize))
371321

372-
startKey := (off / c.dirtyGranularity) * c.dirtyGranularity
373-
endKey := ((end - 1) / c.dirtyGranularity) * c.dirtyGranularity
374-
375-
for key := startKey; key <= endKey; key += c.dirtyGranularity {
376-
c.dirty.Store(key, struct{}{})
377-
}
322+
c.dirty.SetRange(start, end)
378323
}
379324

380325
// When using WriteAtWithoutLock you must ensure thread safety, ideally by only writing to the same block once and the exposing the slice.

packages/orchestrator/pkg/sandbox/block/chunk.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,6 @@ import (
2222
)
2323

2424
const (
25-
// ChunkerDirtyGranularity is the dirty tracking resolution for chunker caches.
26-
// Fetches happen in MemoryChunkSize (4MB) chunks, so tracking at blockSize (4KB)
27-
// is wasteful. 2MB gives 2 entries per fetch instead of 1024.
28-
ChunkerDirtyGranularity = 2 * 1024 * 1024 // 2 MiB
29-
3025
pullType = "pull-type"
3126
pullTypeLocal = "local"
3227
pullTypeRemote = "remote"
@@ -143,7 +138,7 @@ func NewFullFetchChunker(
143138
cachePath string,
144139
metrics metrics.Metrics,
145140
) (*FullFetchChunker, error) {
146-
cache, err := NewCacheWithDirtyGranularity(size, blockSize, ChunkerDirtyGranularity, cachePath, false)
141+
cache, err := NewCache(size, blockSize, cachePath, false)
147142
if err != nil {
148143
return nil, fmt.Errorf("failed to create file cache: %w", err)
149144
}

packages/orchestrator/pkg/sandbox/block/streaming_chunk.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ func NewStreamingChunker(
172172
minReadBatchSize int64,
173173
ff *featureflags.Client,
174174
) (*StreamingChunker, error) {
175-
cache, err := NewCacheWithDirtyGranularity(size, blockSize, ChunkerDirtyGranularity, cachePath, false)
175+
cache, err := NewCache(size, blockSize, cachePath, false)
176176
if err != nil {
177177
return nil, fmt.Errorf("failed to create file cache: %w", err)
178178
}
@@ -270,9 +270,7 @@ func (c *StreamingChunker) Slice(ctx context.Context, off, length int64) ([]byte
270270
return nil, fmt.Errorf("failed to ensure data at %d-%d: %w", off, off+length, err)
271271
}
272272

273-
// Use sliceDirect (no isCached check) since the waiter mechanism guarantees data is in the mmap.
274-
// With coarse dirty granularity, isCached may return false during an active fetch even though
275-
// the requested bytes have been written to the mmap.
273+
// sliceDirect skips isCached — the waiter already confirmed the data is in the mmap.
276274
b, cacheErr := c.cache.sliceDirect(off, length)
277275
if cacheErr != nil {
278276
timer.RecordRaw(ctx, length, chunkerAttrs.failLocalReadAgain)

packages/shared/go.mod

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ require (
88
cloud.google.com/go/artifactregistry v1.17.1
99
cloud.google.com/go/storage v1.59.2
1010
connectrpc.com/connect v1.18.1
11+
github.com/RoaringBitmap/roaring/v2 v2.16.1
1112
github.com/aws/aws-sdk-go-v2 v1.41.0
1213
github.com/aws/aws-sdk-go-v2/config v1.32.6
1314
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.74
1415
github.com/aws/aws-sdk-go-v2/service/ecr v1.44.0
1516
github.com/aws/aws-sdk-go-v2/service/s3 v1.79.3
16-
github.com/bits-and-blooms/bitset v1.22.0
17+
github.com/bits-and-blooms/bitset v1.24.2
1718
github.com/bsm/redislock v0.9.4
1819
github.com/dchest/uniuri v1.2.0
1920
github.com/gin-gonic/gin v1.12.0
@@ -270,6 +271,7 @@ require (
270271
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
271272
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
272273
github.com/morikuni/aec v1.0.0 // indirect
274+
github.com/mschoch/smat v0.2.0 // indirect
273275
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
274276
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
275277
github.com/oklog/ulid v1.3.1 // indirect

packages/shared/go.sum

Lines changed: 6 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package atomicbitset
2+
3+
import (
4+
"sync"
5+
6+
roaring "github.com/RoaringBitmap/roaring/v2"
7+
"github.com/bits-and-blooms/bitset"
8+
)
9+
10+
type Bitset struct {
11+
mu sync.RWMutex
12+
bm *roaring.Bitmap
13+
}
14+
15+
func New() *Bitset {
16+
return &Bitset{
17+
bm: roaring.New(),
18+
}
19+
}
20+
21+
func (b *Bitset) HasRange(start, end uint64) bool {
22+
b.mu.RLock()
23+
defer b.mu.RUnlock()
24+
25+
return b.bm.CardinalityInRange(start, end) == end-start
26+
}
27+
28+
func (b *Bitset) SetRange(start, end uint64) {
29+
b.mu.Lock()
30+
defer b.mu.Unlock()
31+
32+
b.bm.AddRange(start, end)
33+
}
34+
35+
func (b *Bitset) BitSet() *bitset.BitSet {
36+
b.mu.RLock()
37+
defer b.mu.RUnlock()
38+
39+
return b.bm.ToBitSet()
40+
}

0 commit comments

Comments
 (0)