|
8 | 8 | "encoding/hex" |
9 | 9 | "encoding/json" |
10 | 10 | "fmt" |
| 11 | + "io/fs" |
11 | 12 | "os" |
12 | 13 | "path/filepath" |
13 | 14 | "strings" |
@@ -145,11 +146,256 @@ func (c *Cache) Evict(maxAge time.Duration) (int, error) { |
145 | 146 | return removed, nil |
146 | 147 | } |
147 | 148 |
|
| 149 | +// CacheEntry holds metadata about a single cached rootfs entry. |
| 150 | +type CacheEntry struct { |
| 151 | + // Digest is the OCI manifest digest (e.g. "sha256:abc123..."). |
| 152 | + Digest string |
| 153 | + // Path is the absolute filesystem path to the extracted rootfs. |
| 154 | + Path string |
| 155 | + // Size is the total size in bytes of all files in the rootfs directory. |
| 156 | + Size int64 |
| 157 | + // ModTime is the modification time of the cache directory entry. |
| 158 | + // Updated by Get on cache hits; otherwise reflects creation time. |
| 159 | + ModTime time.Time |
| 160 | + // Refs lists image references (e.g. "ghcr.io/org/image:latest") that |
| 161 | + // point to this digest via the ref index. Empty for orphaned entries. |
| 162 | + Refs []string |
| 163 | +} |
| 164 | + |
| 165 | +// List returns metadata for all cached rootfs entries along with the image |
| 166 | +// references that point to each digest. Orphaned entries (no refs) will |
| 167 | +// have an empty Refs slice. |
| 168 | +func (c *Cache) List() ([]CacheEntry, error) { |
| 169 | + if c == nil { |
| 170 | + return nil, nil |
| 171 | + } |
| 172 | + |
| 173 | + entries, err := os.ReadDir(c.baseDir) |
| 174 | + if err != nil { |
| 175 | + if os.IsNotExist(err) { |
| 176 | + return nil, nil |
| 177 | + } |
| 178 | + return nil, fmt.Errorf("read cache dir: %w", err) |
| 179 | + } |
| 180 | + |
| 181 | + // Build reverse map: digest → []imageRef from the ref index. |
| 182 | + refMap := c.buildRefMap() |
| 183 | + |
| 184 | + var result []CacheEntry |
| 185 | + for _, entry := range entries { |
| 186 | + name := entry.Name() |
| 187 | + |
| 188 | + // Only consider rootfs entries (sha256-*), skip refs/, layers/, tmp-*. |
| 189 | + if !isRootfsEntry(name) { |
| 190 | + continue |
| 191 | + } |
| 192 | + |
| 193 | + info, err := entry.Info() |
| 194 | + if err != nil { |
| 195 | + continue |
| 196 | + } |
| 197 | + |
| 198 | + digest := dirNameToDigest(name) |
| 199 | + entryPath := filepath.Join(c.baseDir, name) |
| 200 | + size := dirSize(entryPath) |
| 201 | + |
| 202 | + result = append(result, CacheEntry{ |
| 203 | + Digest: digest, |
| 204 | + Path: entryPath, |
| 205 | + Size: size, |
| 206 | + ModTime: info.ModTime(), |
| 207 | + Refs: refMap[digest], |
| 208 | + }) |
| 209 | + } |
| 210 | + |
| 211 | + return result, nil |
| 212 | +} |
| 213 | + |
| 214 | +// GC removes rootfs entries not referenced by any ref index entry. |
| 215 | +// Unlike [Evict] (which is time-based), GC is reachability-based: an entry |
| 216 | +// survives if and only if at least one ref points to its digest. |
| 217 | +// Returns the number of entries removed. |
| 218 | +// |
| 219 | +// GC is not safe for concurrent use with [Pull]. If another process is |
| 220 | +// pulling an image while GC runs, the pulled entry may be collected before |
| 221 | +// the ref index is updated. The consequence is a cache miss on the next |
| 222 | +// run, not data corruption. |
| 223 | +func (c *Cache) GC() (int, error) { |
| 224 | + if c == nil { |
| 225 | + return 0, nil |
| 226 | + } |
| 227 | + |
| 228 | + entries, err := os.ReadDir(c.baseDir) |
| 229 | + if err != nil { |
| 230 | + if os.IsNotExist(err) { |
| 231 | + return 0, nil |
| 232 | + } |
| 233 | + return 0, fmt.Errorf("read cache dir: %w", err) |
| 234 | + } |
| 235 | + |
| 236 | + live, err := c.liveDigests() |
| 237 | + if err != nil { |
| 238 | + return 0, fmt.Errorf("enumerate live digests: %w", err) |
| 239 | + } |
| 240 | + |
| 241 | + removed := 0 |
| 242 | + |
| 243 | + for _, entry := range entries { |
| 244 | + name := entry.Name() |
| 245 | + if !isRootfsEntry(name) { |
| 246 | + continue |
| 247 | + } |
| 248 | + |
| 249 | + digest := dirNameToDigest(name) |
| 250 | + if live[digest] { |
| 251 | + continue |
| 252 | + } |
| 253 | + |
| 254 | + entryPath := filepath.Join(c.baseDir, name) |
| 255 | + if err := os.RemoveAll(entryPath); err != nil { |
| 256 | + continue |
| 257 | + } |
| 258 | + removed++ |
| 259 | + } |
| 260 | + |
| 261 | + return removed, nil |
| 262 | +} |
| 263 | + |
| 264 | +// Purge removes the entire cache directory including all rootfs entries, |
| 265 | +// the ref index, and the layer cache. |
| 266 | +func (c *Cache) Purge() error { |
| 267 | + if c == nil { |
| 268 | + return nil |
| 269 | + } |
| 270 | + if err := os.RemoveAll(c.baseDir); err != nil { |
| 271 | + return fmt.Errorf("remove cache dir: %w", err) |
| 272 | + } |
| 273 | + return nil |
| 274 | +} |
| 275 | + |
| 276 | +// liveDigests returns the set of digests referenced by at least one ref |
| 277 | +// index entry. Returns a nil map and nil error when the refs directory |
| 278 | +// does not exist (no images have been pulled yet). Returns a non-nil |
| 279 | +// error if the refs directory exists but cannot be read, so callers |
| 280 | +// can abort rather than treating all entries as orphaned. |
| 281 | +func (c *Cache) liveDigests() (map[string]bool, error) { |
| 282 | + refsDir := filepath.Join(c.baseDir, refDir) |
| 283 | + entries, err := os.ReadDir(refsDir) |
| 284 | + if err != nil { |
| 285 | + if os.IsNotExist(err) { |
| 286 | + return nil, nil |
| 287 | + } |
| 288 | + return nil, fmt.Errorf("read refs dir: %w", err) |
| 289 | + } |
| 290 | + |
| 291 | + live := make(map[string]bool, len(entries)) |
| 292 | + for _, entry := range entries { |
| 293 | + if entry.IsDir() { |
| 294 | + continue |
| 295 | + } |
| 296 | + data, err := os.ReadFile(filepath.Join(refsDir, entry.Name())) |
| 297 | + if err != nil { |
| 298 | + continue |
| 299 | + } |
| 300 | + _, digest := parseRefFile(data) |
| 301 | + if digest != "" { |
| 302 | + live[digest] = true |
| 303 | + } |
| 304 | + } |
| 305 | + return live, nil |
| 306 | +} |
| 307 | + |
| 308 | +// buildRefMap returns a map from digest to the list of image references |
| 309 | +// that point to it. |
| 310 | +func (c *Cache) buildRefMap() map[string][]string { |
| 311 | + refsDir := filepath.Join(c.baseDir, refDir) |
| 312 | + entries, err := os.ReadDir(refsDir) |
| 313 | + if err != nil { |
| 314 | + return nil |
| 315 | + } |
| 316 | + |
| 317 | + refMap := make(map[string][]string) |
| 318 | + for _, entry := range entries { |
| 319 | + if entry.IsDir() { |
| 320 | + continue |
| 321 | + } |
| 322 | + data, err := os.ReadFile(filepath.Join(refsDir, entry.Name())) |
| 323 | + if err != nil { |
| 324 | + continue |
| 325 | + } |
| 326 | + imageRef, digest := parseRefFile(data) |
| 327 | + if digest == "" { |
| 328 | + continue |
| 329 | + } |
| 330 | + // Skip empty image refs from legacy-format files. The entry still |
| 331 | + // counts as referenced for GC (via liveDigests), but we don't add |
| 332 | + // an empty string to the Refs slice. |
| 333 | + if imageRef != "" { |
| 334 | + refMap[digest] = append(refMap[digest], imageRef) |
| 335 | + } |
| 336 | + } |
| 337 | + return refMap |
| 338 | +} |
| 339 | + |
| 340 | +// parseRefFile parses the content of a ref index file. The file may contain |
| 341 | +// either the legacy format (digest only) or the extended format |
| 342 | +// (imageRef\tdigest). Returns the image reference (empty for legacy format) |
| 343 | +// and the digest. |
| 344 | +func parseRefFile(data []byte) (imageRef, digest string) { |
| 345 | + content := strings.TrimSpace(string(data)) |
| 346 | + if content == "" { |
| 347 | + return "", "" |
| 348 | + } |
| 349 | + if idx := strings.IndexByte(content, '\t'); idx >= 0 { |
| 350 | + return content[:idx], content[idx+1:] |
| 351 | + } |
| 352 | + // Legacy format: digest only. |
| 353 | + return "", content |
| 354 | +} |
| 355 | + |
| 356 | +// isRootfsEntry returns true if the directory name looks like a cached |
| 357 | +// rootfs entry (starts with "sha256-") rather than a special directory. |
| 358 | +func isRootfsEntry(name string) bool { |
| 359 | + return strings.HasPrefix(name, "sha256-") |
| 360 | +} |
| 361 | + |
| 362 | +// dirNameToDigest converts a filesystem-safe directory name back to a digest. |
| 363 | +// "sha256-abc123" → "sha256:abc123". |
| 364 | +func dirNameToDigest(name string) string { |
| 365 | + return strings.Replace(name, "-", ":", 1) |
| 366 | +} |
| 367 | + |
| 368 | +// dirSize walks a directory tree and returns the total size of all regular |
| 369 | +// files. Errors are silently ignored; the returned size is best-effort. |
| 370 | +func dirSize(path string) int64 { |
| 371 | + var total int64 |
| 372 | + _ = filepath.WalkDir(path, func(_ string, d fs.DirEntry, err error) error { |
| 373 | + if err != nil { |
| 374 | + return nil |
| 375 | + } |
| 376 | + if d.IsDir() { |
| 377 | + return nil |
| 378 | + } |
| 379 | + info, err := d.Info() |
| 380 | + if err != nil { |
| 381 | + return nil |
| 382 | + } |
| 383 | + total += info.Size() |
| 384 | + return nil |
| 385 | + }) |
| 386 | + return total |
| 387 | +} |
| 388 | + |
148 | 389 | // pathFor converts a digest like "sha256:abc123..." into a filesystem path |
149 | | -// inside the cache directory. The colon is replaced to avoid filesystem issues. |
| 390 | +// inside the cache directory. Only the first colon is replaced so the |
| 391 | +// round-trip with [dirNameToDigest] is symmetric. |
150 | 392 | func (c *Cache) pathFor(digest string) string { |
151 | | - // Replace "sha256:" prefix with "sha256-" for filesystem safety. |
152 | | - safe := strings.ReplaceAll(digest, ":", "-") |
| 393 | + if strings.ContainsAny(digest, "/\\") || strings.Contains(digest, "..") { |
| 394 | + // Defense-in-depth: reject digests that could escape the cache dir. |
| 395 | + // Normal OCI digests are "algorithm:hex" with no path separators. |
| 396 | + return filepath.Join(c.baseDir, "invalid-digest") |
| 397 | + } |
| 398 | + safe := strings.Replace(digest, ":", "-", 1) |
153 | 399 | return filepath.Join(c.baseDir, safe) |
154 | 400 | } |
155 | 401 |
|
@@ -201,28 +447,32 @@ func (c *Cache) StoreRef(imageRef, digest string, cfg *OCIConfig) { |
201 | 447 | c.putConfig(digest, cfg) |
202 | 448 | } |
203 | 449 |
|
204 | | -// getRef returns the cached digest for an image reference. |
| 450 | +// getRef returns the cached digest for an image reference. It handles |
| 451 | +// both the legacy format (digest only) and the extended format |
| 452 | +// (imageRef\tdigest). |
205 | 453 | func (c *Cache) getRef(imageRef string) (string, bool) { |
206 | 454 | p := c.refPath(imageRef) |
207 | 455 | data, err := os.ReadFile(p) |
208 | 456 | if err != nil { |
209 | 457 | return "", false |
210 | 458 | } |
211 | | - digest := strings.TrimSpace(string(data)) |
| 459 | + _, digest := parseRefFile(data) |
212 | 460 | if digest == "" { |
213 | 461 | return "", false |
214 | 462 | } |
215 | 463 | return digest, true |
216 | 464 | } |
217 | 465 |
|
218 | | -// putRef stores the ref→digest mapping as a small file. |
| 466 | +// putRef stores the ref→digest mapping as a small file. The file uses the |
| 467 | +// extended format "imageRef\tdigest\n" so that List/GC can recover the |
| 468 | +// original image reference from the hashed filename. |
219 | 469 | func (c *Cache) putRef(imageRef, digest string) { |
220 | 470 | dir := filepath.Join(c.baseDir, refDir) |
221 | 471 | if err := os.MkdirAll(dir, 0o700); err != nil { |
222 | 472 | return |
223 | 473 | } |
224 | 474 | p := c.refPath(imageRef) |
225 | | - _ = os.WriteFile(p, []byte(digest+"\n"), 0o600) |
| 475 | + _ = os.WriteFile(p, []byte(imageRef+"\t"+digest+"\n"), 0o600) |
226 | 476 | } |
227 | 477 |
|
228 | 478 | // refPath returns the filesystem path for a ref index entry. The image |
|
0 commit comments