Skip to content

Commit c003ed5

Browse files
JAORMXclaude
andcommitted
Add layer-level caching for rootfs extraction
Introduce per-layer content-addressable caching keyed by DiffID (uncompressed content hash), so shared base layers across images are extracted only once. Uncached layers are extracted in parallel with bounded concurrency, then composed sequentially with OCI whiteout support. Falls back to flat mutate.Extract on failure. Security: path containment checks on whiteouts, symlink target validation during composition, shared atomic size budget across layers to prevent decompression bombs. Closes #19 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b5db2bb commit c003ed5

7 files changed

Lines changed: 1159 additions & 4 deletions

File tree

image/cache.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,15 @@ func (c *Cache) BaseDir() string {
3535
return c.baseDir
3636
}
3737

38+
// LayerCache returns the per-layer cache, creating the layers/ subdirectory
39+
// lazily. Returns nil if the receiver is nil.
40+
func (c *Cache) LayerCache() *LayerCache {
41+
if c == nil {
42+
return nil
43+
}
44+
return NewLayerCache(filepath.Join(c.baseDir, "layers"))
45+
}
46+
3847
// Get returns the path to a cached rootfs for the given digest, and true
3948
// if it exists and appears valid. Returns ("", false) on a cache miss.
4049
// On a hit, the entry's modification time is updated so that Evict does
@@ -125,6 +134,14 @@ func (c *Cache) Evict(maxAge time.Duration) (int, error) {
125134
}
126135
}
127136

137+
// Also evict from the per-layer cache.
138+
lc := c.LayerCache()
139+
layerRemoved, err := lc.Evict(maxAge)
140+
if err != nil {
141+
return removed, fmt.Errorf("evict layer cache: %w", err)
142+
}
143+
removed += layerRemoved
144+
128145
return removed, nil
129146
}
130147

image/layer_cache.go

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package image
5+
6+
import (
7+
"fmt"
8+
"os"
9+
"path/filepath"
10+
"time"
11+
12+
v1 "github.com/google/go-containerregistry/pkg/v1"
13+
)
14+
15+
// LayerCache provides content-addressable caching of individual OCI image layers.
16+
// Layers are cached by their DiffID (uncompressed content hash from the image
17+
// config), which is stable across registries and compression changes.
18+
type LayerCache struct {
19+
baseDir string
20+
}
21+
22+
// NewLayerCache creates a new layer cache rooted at baseDir. The directory is
23+
// created lazily when the first entry is stored.
24+
func NewLayerCache(baseDir string) *LayerCache {
25+
return &LayerCache{baseDir: baseDir}
26+
}
27+
28+
// Has returns true if a cached layer exists for the given DiffID.
29+
func (lc *LayerCache) Has(diffID v1.Hash) bool {
30+
_, ok := lc.Get(diffID)
31+
return ok
32+
}
33+
34+
// Get returns the path to a cached layer for the given DiffID, and true
35+
// if it exists and appears valid. Returns ("", false) on a cache miss.
36+
// On a hit, the entry's modification time is updated so that Evict does
37+
// not remove frequently used entries.
38+
func (lc *LayerCache) Get(diffID v1.Hash) (string, bool) {
39+
dir := lc.pathFor(diffID)
40+
41+
info, err := os.Stat(dir)
42+
if err != nil || !info.IsDir() {
43+
return "", false
44+
}
45+
46+
// Touch mtime so Evict treats this as recently used.
47+
now := time.Now()
48+
_ = os.Chtimes(dir, now, now)
49+
50+
return dir, true
51+
}
52+
53+
// Put records that tempDir contains the extracted layer for diffID.
54+
// It moves (renames) tempDir into the cache directory. After a
55+
// successful Put, tempDir should no longer be used directly;
56+
// callers should use the path returned by Get instead.
57+
func (lc *LayerCache) Put(diffID v1.Hash, tempDir string) error {
58+
if err := os.MkdirAll(lc.baseDir, 0o700); err != nil {
59+
return fmt.Errorf("create layer cache dir: %w", err)
60+
}
61+
62+
dst := lc.pathFor(diffID)
63+
64+
// If the destination already exists, another concurrent extraction may
65+
// have beaten us. Remove the duplicate we just extracted.
66+
if _, err := os.Stat(dst); err == nil {
67+
_ = os.RemoveAll(tempDir)
68+
return nil
69+
}
70+
71+
if err := os.Rename(tempDir, dst); err != nil {
72+
return fmt.Errorf("move layer to cache: %w", err)
73+
}
74+
75+
return nil
76+
}
77+
78+
// TempDir creates a temporary directory inside the cache's base directory.
79+
// This ensures os.Rename in Put stays on the same filesystem, avoiding
80+
// cross-device link errors (e.g. /tmp on tmpfs vs cache on a different mount).
81+
func (lc *LayerCache) TempDir() (string, error) {
82+
if err := os.MkdirAll(lc.baseDir, 0o700); err != nil {
83+
return "", fmt.Errorf("create layer cache dir: %w", err)
84+
}
85+
return os.MkdirTemp(lc.baseDir, "tmp-layer-*")
86+
}
87+
88+
// Evict removes cached entries whose modification time is older than maxAge.
89+
// Stale temporary directories (tmp-*) from interrupted extractions are also
90+
// cleaned if they are older than maxAge. Returns the number of entries removed.
91+
func (lc *LayerCache) Evict(maxAge time.Duration) (int, error) {
92+
entries, err := os.ReadDir(lc.baseDir)
93+
if err != nil {
94+
if os.IsNotExist(err) {
95+
return 0, nil
96+
}
97+
return 0, fmt.Errorf("read layer cache dir: %w", err)
98+
}
99+
100+
cutoff := time.Now().Add(-maxAge)
101+
removed := 0
102+
103+
for _, entry := range entries {
104+
info, err := entry.Info()
105+
if err != nil {
106+
continue
107+
}
108+
109+
if info.ModTime().Before(cutoff) {
110+
entryPath := filepath.Join(lc.baseDir, entry.Name())
111+
if err := os.RemoveAll(entryPath); err != nil {
112+
continue
113+
}
114+
removed++
115+
}
116+
}
117+
118+
return removed, nil
119+
}
120+
121+
// pathFor converts a DiffID into a filesystem path inside the cache directory.
122+
func (lc *LayerCache) pathFor(diffID v1.Hash) string {
123+
safe := diffID.Algorithm + "-" + diffID.Hex
124+
return filepath.Join(lc.baseDir, safe)
125+
}

image/layer_cache_test.go

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package image
5+
6+
import (
7+
"os"
8+
"path/filepath"
9+
"strings"
10+
"testing"
11+
"time"
12+
13+
v1 "github.com/google/go-containerregistry/pkg/v1"
14+
"github.com/stretchr/testify/assert"
15+
"github.com/stretchr/testify/require"
16+
)
17+
18+
var testDiffID = v1.Hash{Algorithm: "sha256", Hex: "deadbeef1234567890abcdef"}
19+
20+
func TestLayerCache_Has_EmptyCache(t *testing.T) {
21+
t.Parallel()
22+
23+
tmpDir := t.TempDir()
24+
lc := NewLayerCache(tmpDir)
25+
26+
assert.False(t, lc.Has(testDiffID))
27+
}
28+
29+
func TestLayerCache_Get_UnknownDiffID(t *testing.T) {
30+
t.Parallel()
31+
32+
tmpDir := t.TempDir()
33+
lc := NewLayerCache(tmpDir)
34+
35+
path, ok := lc.Get(testDiffID)
36+
assert.False(t, ok)
37+
assert.Empty(t, path)
38+
}
39+
40+
func TestLayerCache_PutGet_RoundTrip(t *testing.T) {
41+
t.Parallel()
42+
43+
cacheDir := t.TempDir()
44+
lc := NewLayerCache(cacheDir)
45+
46+
// Create a temporary directory to simulate an extracted layer.
47+
layerDir := t.TempDir()
48+
markerFile := filepath.Join(layerDir, "marker.txt")
49+
err := os.WriteFile(markerFile, []byte("layer content"), 0o644)
50+
require.NoError(t, err)
51+
52+
// Put the layer into the cache.
53+
err = lc.Put(testDiffID, layerDir)
54+
require.NoError(t, err)
55+
56+
// The original layerDir should have been renamed (moved).
57+
_, err = os.Stat(layerDir)
58+
assert.True(t, os.IsNotExist(err), "original layer dir should no longer exist after Put")
59+
60+
// Get should return the cached path.
61+
cachedPath, ok := lc.Get(testDiffID)
62+
require.True(t, ok)
63+
assert.NotEmpty(t, cachedPath)
64+
65+
// Verify the marker file is present in the cached directory.
66+
data, err := os.ReadFile(filepath.Join(cachedPath, "marker.txt"))
67+
require.NoError(t, err)
68+
assert.Equal(t, "layer content", string(data))
69+
70+
// Has should also return true now.
71+
assert.True(t, lc.Has(testDiffID))
72+
}
73+
74+
func TestLayerCache_DoublePut_NoConcurrencyError(t *testing.T) {
75+
t.Parallel()
76+
77+
cacheDir := t.TempDir()
78+
lc := NewLayerCache(cacheDir)
79+
80+
// First put.
81+
layer1 := t.TempDir()
82+
err := os.WriteFile(filepath.Join(layer1, "first"), []byte("1"), 0o644)
83+
require.NoError(t, err)
84+
85+
err = lc.Put(testDiffID, layer1)
86+
require.NoError(t, err)
87+
88+
// Second put with a different dir should succeed without error.
89+
// The second dir should be cleaned up since the cache entry already exists.
90+
layer2 := t.TempDir()
91+
err = os.WriteFile(filepath.Join(layer2, "second"), []byte("2"), 0o644)
92+
require.NoError(t, err)
93+
94+
err = lc.Put(testDiffID, layer2)
95+
require.NoError(t, err)
96+
97+
// The second layer directory should have been removed.
98+
_, err = os.Stat(layer2)
99+
assert.True(t, os.IsNotExist(err), "second layer dir should be removed on duplicate Put")
100+
101+
// The original cached content should still be intact.
102+
cachedPath, ok := lc.Get(testDiffID)
103+
require.True(t, ok)
104+
105+
data, err := os.ReadFile(filepath.Join(cachedPath, "first"))
106+
require.NoError(t, err)
107+
assert.Equal(t, "1", string(data))
108+
}
109+
110+
func TestLayerCache_TempDir_SameFilesystem(t *testing.T) {
111+
t.Parallel()
112+
113+
cacheDir := t.TempDir()
114+
lc := NewLayerCache(cacheDir)
115+
116+
tmpDir, err := lc.TempDir()
117+
require.NoError(t, err)
118+
defer func() { _ = os.RemoveAll(tmpDir) }()
119+
120+
// The temp dir should be under the cache's base directory.
121+
assert.True(t, strings.HasPrefix(tmpDir, cacheDir),
122+
"temp dir %q should be under cache dir %q", tmpDir, cacheDir)
123+
}
124+
125+
func TestLayerCache_Evict_RemovesOldEntries(t *testing.T) {
126+
t.Parallel()
127+
128+
cacheDir := t.TempDir()
129+
lc := NewLayerCache(cacheDir)
130+
131+
// Create a cache entry and backdate its mtime.
132+
oldDir := filepath.Join(cacheDir, "sha256-old")
133+
require.NoError(t, os.MkdirAll(oldDir, 0o700))
134+
oldTime := time.Now().Add(-10 * 24 * time.Hour)
135+
require.NoError(t, os.Chtimes(oldDir, oldTime, oldTime))
136+
137+
removed, err := lc.Evict(7 * 24 * time.Hour)
138+
require.NoError(t, err)
139+
assert.Equal(t, 1, removed)
140+
141+
// Old entry should be gone.
142+
_, err = os.Stat(oldDir)
143+
assert.True(t, os.IsNotExist(err))
144+
}
145+
146+
func TestLayerCache_Evict_PreservesFreshEntries(t *testing.T) {
147+
t.Parallel()
148+
149+
cacheDir := t.TempDir()
150+
lc := NewLayerCache(cacheDir)
151+
152+
// Create a fresh cache entry.
153+
freshDir := filepath.Join(cacheDir, "sha256-fresh")
154+
require.NoError(t, os.MkdirAll(freshDir, 0o700))
155+
156+
removed, err := lc.Evict(7 * 24 * time.Hour)
157+
require.NoError(t, err)
158+
assert.Equal(t, 0, removed)
159+
160+
// Fresh entry should still exist.
161+
_, err = os.Stat(freshDir)
162+
assert.NoError(t, err)
163+
}
164+
165+
func TestLayerCache_Get_TouchesMtime(t *testing.T) {
166+
t.Parallel()
167+
168+
cacheDir := t.TempDir()
169+
lc := NewLayerCache(cacheDir)
170+
171+
// Create a cache entry and backdate it.
172+
entryDir := lc.pathFor(testDiffID)
173+
require.NoError(t, os.MkdirAll(entryDir, 0o700))
174+
oldTime := time.Now().Add(-10 * 24 * time.Hour)
175+
require.NoError(t, os.Chtimes(entryDir, oldTime, oldTime))
176+
177+
// Get should update the mtime.
178+
before := time.Now()
179+
path, ok := lc.Get(testDiffID)
180+
require.True(t, ok)
181+
assert.Equal(t, entryDir, path)
182+
183+
info, err := os.Stat(entryDir)
184+
require.NoError(t, err)
185+
assert.True(t, info.ModTime().After(before) || info.ModTime().Equal(before),
186+
"Get should update mtime to prevent eviction")
187+
}
188+
189+
func TestLayerCache_Evict_NonExistentDir(t *testing.T) {
190+
t.Parallel()
191+
192+
lc := NewLayerCache(filepath.Join(t.TempDir(), "does-not-exist"))
193+
194+
removed, err := lc.Evict(7 * 24 * time.Hour)
195+
require.NoError(t, err)
196+
assert.Equal(t, 0, removed)
197+
}

0 commit comments

Comments
 (0)