Skip to content

Commit 98e51b5

Browse files
JAORMXclaude
andcommitted
Add COW rootfs isolation and ref-indexed image cache
Externalize OCI image caching so extracted rootfs directories persist across VM restarts. On warm cache hits a ref-based index skips the daemon/registry fetch entirely, and a platform-native COW clone (FICLONE on Linux, clonefile on macOS) gives each VM an isolated working copy without modifying the shared cache. New rootfs/ package provides CloneDir with symlink boundary validation, read-only directory permission restoration, and best-effort ownership preservation. Cache gains Evict() for time-based cleanup, LookupRef/StoreRef for the ref→digest index, and mtime touch on Get() to prevent evicting active entries. WithImageCache now sets an externalCache flag so WithDataDir ordering is irrelevant. Warm-start improvement: ~30s → ~3s on btrfs (measured e2e). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 70f4137 commit 98e51b5

File tree

12 files changed

+845
-10
lines changed

12 files changed

+845
-10
lines changed

image/cache.go

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
package image
55

66
import (
7+
"crypto/sha256"
8+
"encoding/hex"
9+
"encoding/json"
710
"fmt"
811
"os"
912
"path/filepath"
1013
"strings"
14+
"time"
1115
)
1216

1317
// Cache provides content-addressable caching of extracted rootfs directories.
@@ -33,6 +37,8 @@ func (c *Cache) BaseDir() string {
3337

3438
// Get returns the path to a cached rootfs for the given digest, and true
3539
// if it exists and appears valid. Returns ("", false) on a cache miss.
40+
// On a hit, the entry's modification time is updated so that Evict does
41+
// not remove frequently used entries.
3642
func (c *Cache) Get(digest string) (string, bool) {
3743
dir := c.pathFor(digest)
3844

@@ -41,6 +47,10 @@ func (c *Cache) Get(digest string) (string, bool) {
4147
return "", false
4248
}
4349

50+
// Touch mtime so Evict treats this as recently used.
51+
now := time.Now()
52+
_ = os.Chtimes(dir, now, now)
53+
4454
return dir, true
4555
}
4656

@@ -85,10 +95,152 @@ func (c *Cache) TempDir() (string, error) {
8595
return os.MkdirTemp(c.baseDir, "tmp-rootfs-*")
8696
}
8797

98+
// Evict removes cached entries whose modification time is older than maxAge.
99+
// Stale temporary directories (tmp-*) from interrupted extractions are also
100+
// cleaned if they are older than maxAge. Returns the number of entries removed.
101+
func (c *Cache) Evict(maxAge time.Duration) (int, error) {
102+
entries, err := os.ReadDir(c.baseDir)
103+
if err != nil {
104+
if os.IsNotExist(err) {
105+
return 0, nil
106+
}
107+
return 0, fmt.Errorf("read cache dir: %w", err)
108+
}
109+
110+
cutoff := time.Now().Add(-maxAge)
111+
removed := 0
112+
113+
for _, entry := range entries {
114+
info, err := entry.Info()
115+
if err != nil {
116+
continue
117+
}
118+
119+
if info.ModTime().Before(cutoff) {
120+
entryPath := filepath.Join(c.baseDir, entry.Name())
121+
if err := os.RemoveAll(entryPath); err != nil {
122+
continue
123+
}
124+
removed++
125+
}
126+
}
127+
128+
return removed, nil
129+
}
130+
88131
// pathFor converts a digest like "sha256:abc123..." into a filesystem path
89132
// inside the cache directory. The colon is replaced to avoid filesystem issues.
90133
func (c *Cache) pathFor(digest string) string {
91134
// Replace "sha256:" prefix with "sha256-" for filesystem safety.
92135
safe := strings.ReplaceAll(digest, ":", "-")
93136
return filepath.Join(c.baseDir, safe)
94137
}
138+
139+
// --- Ref-based index ---
140+
//
141+
// The ref index maps image references (e.g. "ghcr.io/org/image:tag") to their
142+
// manifest digest, allowing cache lookups without contacting a registry or
143+
// daemon. This is critical for performance: the daemon fetcher (docker save)
144+
// exports the entire image just to compute the digest.
145+
146+
const (
147+
refDir = "refs"
148+
configFile = ".oci-config.json"
149+
)
150+
151+
// LookupRef checks whether the cache has a valid entry for the given image
152+
// reference. On a hit it returns a fully populated RootFS without any
153+
// network or daemon I/O. Returns nil on any miss or error.
154+
func (c *Cache) LookupRef(imageRef string) *RootFS {
155+
if c == nil {
156+
return nil
157+
}
158+
159+
digest, ok := c.getRef(imageRef)
160+
if !ok {
161+
return nil
162+
}
163+
164+
rootfsPath, ok := c.Get(digest)
165+
if !ok {
166+
return nil
167+
}
168+
169+
cfg, err := c.getConfig(digest)
170+
if err != nil {
171+
return nil
172+
}
173+
174+
return &RootFS{Path: rootfsPath, Config: cfg, FromCache: true}
175+
}
176+
177+
// StoreRef records the ref→digest mapping and persists the OCI config
178+
// alongside the cached rootfs entry. Both operations are best-effort.
179+
func (c *Cache) StoreRef(imageRef, digest string, cfg *OCIConfig) {
180+
if c == nil {
181+
return
182+
}
183+
c.putRef(imageRef, digest)
184+
c.putConfig(digest, cfg)
185+
}
186+
187+
// getRef returns the cached digest for an image reference.
188+
func (c *Cache) getRef(imageRef string) (string, bool) {
189+
p := c.refPath(imageRef)
190+
data, err := os.ReadFile(p)
191+
if err != nil {
192+
return "", false
193+
}
194+
digest := strings.TrimSpace(string(data))
195+
if digest == "" {
196+
return "", false
197+
}
198+
return digest, true
199+
}
200+
201+
// putRef stores the ref→digest mapping as a small file.
202+
func (c *Cache) putRef(imageRef, digest string) {
203+
dir := filepath.Join(c.baseDir, refDir)
204+
if err := os.MkdirAll(dir, 0o700); err != nil {
205+
return
206+
}
207+
p := c.refPath(imageRef)
208+
_ = os.WriteFile(p, []byte(digest+"\n"), 0o600)
209+
}
210+
211+
// refPath returns the filesystem path for a ref index entry. The image
212+
// reference is hashed to avoid filesystem issues with slashes and colons.
213+
func (c *Cache) refPath(imageRef string) string {
214+
h := sha256.Sum256([]byte(imageRef))
215+
return filepath.Join(c.baseDir, refDir, hex.EncodeToString(h[:]))
216+
}
217+
218+
// getConfig reads the cached OCI config for a digest.
219+
func (c *Cache) getConfig(digest string) (*OCIConfig, error) {
220+
p := filepath.Join(c.pathFor(digest), configFile)
221+
data, err := os.ReadFile(p)
222+
if err != nil {
223+
return nil, err
224+
}
225+
var cfg OCIConfig
226+
if err := json.Unmarshal(data, &cfg); err != nil {
227+
return nil, err
228+
}
229+
return &cfg, nil
230+
}
231+
232+
// putConfig writes the OCI config as JSON inside the cache entry.
233+
func (c *Cache) putConfig(digest string, cfg *OCIConfig) {
234+
if cfg == nil {
235+
return
236+
}
237+
dir := c.pathFor(digest)
238+
if _, err := os.Stat(dir); err != nil {
239+
return
240+
}
241+
data, err := json.Marshal(cfg)
242+
if err != nil {
243+
return
244+
}
245+
_ = os.WriteFile(filepath.Join(dir, configFile), data, 0o600)
246+
}

image/cache_test.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"os"
88
"path/filepath"
99
"testing"
10+
"time"
1011

1112
"github.com/stretchr/testify/assert"
1213
"github.com/stretchr/testify/require"
@@ -146,3 +147,104 @@ func TestCache_DoublePut_NoConcurrencyError(t *testing.T) {
146147
require.NoError(t, err)
147148
assert.Equal(t, "1", string(data))
148149
}
150+
151+
func TestCache_Evict_RemovesOldEntries(t *testing.T) {
152+
t.Parallel()
153+
154+
cacheDir := t.TempDir()
155+
c := NewCache(cacheDir)
156+
157+
// Create a cache entry and backdate its mtime.
158+
oldDir := filepath.Join(cacheDir, "sha256-old")
159+
require.NoError(t, os.MkdirAll(oldDir, 0o700))
160+
oldTime := time.Now().Add(-10 * 24 * time.Hour)
161+
require.NoError(t, os.Chtimes(oldDir, oldTime, oldTime))
162+
163+
// Create a fresh entry.
164+
newDir := filepath.Join(cacheDir, "sha256-new")
165+
require.NoError(t, os.MkdirAll(newDir, 0o700))
166+
167+
removed, err := c.Evict(7 * 24 * time.Hour)
168+
require.NoError(t, err)
169+
assert.Equal(t, 1, removed)
170+
171+
// Old entry should be gone, new entry should remain.
172+
_, err = os.Stat(oldDir)
173+
assert.True(t, os.IsNotExist(err))
174+
175+
_, err = os.Stat(newDir)
176+
assert.NoError(t, err)
177+
}
178+
179+
func TestCache_Evict_CleansOldTmpDirs(t *testing.T) {
180+
t.Parallel()
181+
182+
cacheDir := t.TempDir()
183+
c := NewCache(cacheDir)
184+
185+
// Create a stale tmp directory (simulates interrupted extraction).
186+
tmpDir := filepath.Join(cacheDir, "tmp-rootfs-stale")
187+
require.NoError(t, os.MkdirAll(tmpDir, 0o700))
188+
oldTime := time.Now().Add(-10 * 24 * time.Hour)
189+
require.NoError(t, os.Chtimes(tmpDir, oldTime, oldTime))
190+
191+
removed, err := c.Evict(7 * 24 * time.Hour)
192+
require.NoError(t, err)
193+
assert.Equal(t, 1, removed)
194+
195+
_, err = os.Stat(tmpDir)
196+
assert.True(t, os.IsNotExist(err))
197+
}
198+
199+
func TestCache_Evict_PreservesFreshTmpDirs(t *testing.T) {
200+
t.Parallel()
201+
202+
cacheDir := t.TempDir()
203+
c := NewCache(cacheDir)
204+
205+
// Create a fresh tmp directory (simulates in-flight extraction).
206+
tmpDir := filepath.Join(cacheDir, "tmp-rootfs-fresh")
207+
require.NoError(t, os.MkdirAll(tmpDir, 0o700))
208+
209+
removed, err := c.Evict(7 * 24 * time.Hour)
210+
require.NoError(t, err)
211+
assert.Equal(t, 0, removed)
212+
213+
_, err = os.Stat(tmpDir)
214+
assert.NoError(t, err)
215+
}
216+
217+
func TestCache_Evict_NonExistentDir(t *testing.T) {
218+
t.Parallel()
219+
220+
c := NewCache(filepath.Join(t.TempDir(), "does-not-exist"))
221+
222+
removed, err := c.Evict(7 * 24 * time.Hour)
223+
require.NoError(t, err)
224+
assert.Equal(t, 0, removed)
225+
}
226+
227+
func TestCache_Get_TouchesMtime(t *testing.T) {
228+
t.Parallel()
229+
230+
cacheDir := t.TempDir()
231+
c := NewCache(cacheDir)
232+
233+
// Create a cache entry and backdate it.
234+
digest := "sha256:touchtest"
235+
entryDir := filepath.Join(cacheDir, "sha256-touchtest")
236+
require.NoError(t, os.MkdirAll(entryDir, 0o700))
237+
oldTime := time.Now().Add(-10 * 24 * time.Hour)
238+
require.NoError(t, os.Chtimes(entryDir, oldTime, oldTime))
239+
240+
// Get should update the mtime.
241+
before := time.Now()
242+
path, ok := c.Get(digest)
243+
require.True(t, ok)
244+
assert.Equal(t, entryDir, path)
245+
246+
info, err := os.Stat(entryDir)
247+
require.NoError(t, err)
248+
assert.True(t, info.ModTime().After(before) || info.ModTime().Equal(before),
249+
"Get should update mtime to prevent eviction")
250+
}

image/config.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ type KrunConfig struct {
3131

3232
// RootFS represents an extracted rootfs ready for libkrun.
3333
type RootFS struct {
34-
Path string // Filesystem path to the extracted rootfs directory
35-
Config *OCIConfig // Parsed OCI image configuration
34+
Path string // Filesystem path to the extracted rootfs directory
35+
Config *OCIConfig // Parsed OCI image configuration
36+
FromCache bool // True when the rootfs was served from cache (not freshly extracted)
3637
}
3738

3839
// krunConfigFile is the filename written inside the rootfs.

image/pull.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher
4444
return nil, fmt.Errorf("parse image reference %q: %w", imageRef, err)
4545
}
4646

47+
// Fast path: check if we have a cached ref→digest mapping with a valid
48+
// rootfs entry. This avoids the daemon/registry fetch entirely — critical
49+
// because daemon.Image() does a full "docker save" export.
50+
if cached := cache.LookupRef(ref.String()); cached != nil {
51+
slog.Debug("using ref-indexed cache hit", "ref", ref.String(), "path", cached.Path)
52+
return cached, nil
53+
}
54+
4755
slog.Debug("pulling image", "ref", ref.String())
4856

4957
img, err := fetcher.Pull(ctx, ref.String())
@@ -60,15 +68,17 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher
6068
digestStr := digest.String()
6169
slog.Debug("image digest", "digest", digestStr)
6270

63-
// Check cache before extracting.
71+
// Check cache before extracting (covers the case where the ref index
72+
// missed but the digest entry exists, e.g. pulled under a different tag).
6473
if cache != nil {
6574
if cachedPath, ok := cache.Get(digestStr); ok {
6675
slog.Debug("using cached rootfs", "path", cachedPath)
6776
ociCfg, err := extractOCIConfig(img)
6877
if err != nil {
6978
return nil, fmt.Errorf("extract OCI config: %w", err)
7079
}
71-
return &RootFS{Path: cachedPath, Config: ociCfg}, nil
80+
cache.StoreRef(ref.String(), digestStr, ociCfg)
81+
return &RootFS{Path: cachedPath, Config: ociCfg, FromCache: true}, nil
7282
}
7383
}
7484

@@ -97,7 +107,9 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher
97107
return nil, fmt.Errorf("extract image layers: %w", err)
98108
}
99109

100-
// Move into cache if available.
110+
// Move into cache if available. The extraction is fresh and this is
111+
// the only reference, so FromCache stays false — callers may safely
112+
// modify the rootfs in place without a COW clone.
101113
rootfsPath := tmpDir
102114
if cache != nil {
103115
if err := cache.Put(digestStr, tmpDir); err != nil {
@@ -108,6 +120,8 @@ func PullWithFetcher(ctx context.Context, imageRef string, cache *Cache, fetcher
108120
if cachedPath, ok := cache.Get(digestStr); ok {
109121
rootfsPath = cachedPath
110122
}
123+
// Record ref→digest mapping and OCI config for next-run fast path.
124+
cache.StoreRef(ref.String(), digestStr, ociCfg)
111125
}
112126

113127
return &RootFS{Path: rootfsPath, Config: ociCfg}, nil

image/pull_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,9 @@ func TestPullWithFetcher_CacheHit(t *testing.T) {
451451
markerData, err := os.ReadFile(filepath.Join(rootfs.Path, "marker"))
452452
require.NoError(t, err)
453453
assert.Equal(t, "cached", string(markerData))
454+
455+
// Cache hits must be marked so callers know to COW-clone before mutation.
456+
assert.True(t, rootfs.FromCache, "cache hit should set FromCache=true")
454457
}
455458

456459
func TestPullWithFetcher_CacheMiss(t *testing.T) {
@@ -467,6 +470,9 @@ func TestPullWithFetcher_CacheMiss(t *testing.T) {
467470
assert.NotEmpty(t, rootfs.Path)
468471
assert.DirExists(t, rootfs.Path)
469472
assert.NotNil(t, rootfs.Config)
473+
474+
// Fresh extractions are the only reference — safe to modify in place.
475+
assert.False(t, rootfs.FromCache, "cache miss should set FromCache=false")
470476
}
471477

472478
func TestPullWithFetcher_NilCache(t *testing.T) {

0 commit comments

Comments
 (0)