Skip to content

Commit 299fe89

Browse files
authored
Garbage collect OCI layout to cleanup non-referenced blobs (#740)
Signed-off-by: Valentin Delaye <jonesbusy@users.noreply.github.com>
1 parent 398da85 commit 299fe89

4 files changed

Lines changed: 316 additions & 2 deletions

File tree

src/main/java/land/oras/OCILayout.java

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,13 @@
2626
import java.nio.file.Files;
2727
import java.nio.file.Path;
2828
import java.nio.file.StandardCopyOption;
29+
import java.util.ArrayList;
2930
import java.util.HashMap;
31+
import java.util.HashSet;
3032
import java.util.LinkedList;
3133
import java.util.List;
3234
import java.util.Map;
35+
import java.util.Set;
3336
import java.util.concurrent.ExecutorService;
3437
import java.util.concurrent.Executors;
3538
import java.util.function.Supplier;
@@ -336,7 +339,7 @@ public Layer pushBlob(LayoutRef ref, Path blob, Map<String, String> annotations)
336339
Path blobPath = getBlobPath(ref);
337340
String digest = ref.getAlgorithm().digest(blob);
338341
ensureAlgorithmPath(digest);
339-
LOG.debug("Digest: {}", digest);
342+
LOG.trace("Digest: {}", digest);
340343
try {
341344
if (Files.exists(blobPath)) {
342345
LOG.info("Blob already exists: {}", digest);
@@ -345,6 +348,7 @@ public Layer pushBlob(LayoutRef ref, Path blob, Map<String, String> annotations)
345348
Files.copy(blob, blobPath);
346349
Layer layer = Layer.fromFile(blobPath, ref.getAlgorithm()).withAnnotations(annotations);
347350
packToTar();
351+
LOG.debug("Blob pushed to OCI layout: {}", digest);
348352
return layer;
349353
} catch (IOException e) {
350354
throw new OrasException("Failed to push blob", e);
@@ -453,11 +457,85 @@ public Referrers getReferrers(LayoutRef ref, @Nullable ArtifactType artifactType
453457
return Referrers.from(manifestDescriptors);
454458
}
455459

460+
/**
461+
* Remove all blobs that are not referenced by any manifest reachable from the root {@code index.json}.
462+
* @return the list of digests (in {@code <algorithm>:<hex>} format) that were removed
463+
*/
464+
public List<String> garbageCollect() {
465+
Set<String> referencedDigests = new HashSet<>();
466+
Index rootIndex = Index.fromPath(getIndexPath());
467+
collectReferencedDigests(rootIndex, referencedDigests);
468+
469+
List<String> removed = new ArrayList<>();
470+
Path blobsRoot = getBlobPath();
471+
try {
472+
if (!Files.exists(blobsRoot)) {
473+
return removed;
474+
}
475+
// Iterate over algorithm directories (e.g. blobs/sha256/)
476+
try (var algoDirs = Files.newDirectoryStream(blobsRoot)) {
477+
for (Path algoDir : algoDirs) {
478+
String algoPrefix = algoDir.getFileName().toString();
479+
try (var blobFiles = Files.newDirectoryStream(algoDir)) {
480+
for (Path blobFile : blobFiles) {
481+
String hex = blobFile.getFileName().toString();
482+
String digest = algoPrefix + ":" + hex;
483+
if (!referencedDigests.contains(digest)) {
484+
LOG.info("Removing unreferenced blob: {}", digest);
485+
Files.delete(blobFile);
486+
removed.add(digest);
487+
}
488+
}
489+
}
490+
}
491+
}
492+
} catch (IOException e) {
493+
throw new OrasException("Failed to garbage collect OCI layout", e);
494+
}
495+
if (!removed.isEmpty()) {
496+
packToTar();
497+
}
498+
return removed;
499+
}
500+
501+
/**
502+
* Recursively collect all blob digests that are reachable from the given index.
503+
*
504+
* @param index the index to traverse
505+
* @param referencedDigests the set to populate with reachable digests
506+
*/
507+
private void collectReferencedDigests(Index index, Set<String> referencedDigests) {
508+
for (ManifestDescriptor entry : index.getManifests()) {
509+
String entryDigest = entry.getDigest();
510+
referencedDigests.add(entryDigest);
511+
Path blobPath = getBlobPath(entry);
512+
513+
// Nested index
514+
if (isIndexMediaType(entry.getMediaType())) {
515+
Index nestedIndex = Index.fromPath(blobPath);
516+
collectReferencedDigests(nestedIndex, referencedDigests);
517+
}
518+
// Manifest
519+
else {
520+
Manifest manifest = Manifest.fromPath(blobPath);
521+
Config config = manifest.getConfig();
522+
if (config != null && config.getDigest() != null) {
523+
referencedDigests.add(config.getDigest());
524+
}
525+
for (Layer layer : manifest.getLayers()) {
526+
if (layer.getDigest() != null && layer.getData() == null) {
527+
referencedDigests.add(layer.getDigest());
528+
}
529+
}
530+
}
531+
}
532+
}
533+
456534
private void setPath(Path path) {
457535
this.path = path;
458536
}
459537

460-
private void setTarPath(Path tarPath) {
538+
private void setTarPath(@Nullable Path tarPath) {
461539
this.tarPath = tarPath;
462540
}
463541

src/test/java/land/oras/OCILayoutTest.java

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,4 +1443,240 @@ void testShouldCopyArtifactRecursivelyFromRegistryIntoTarBackedOciLayout() throw
14431443
assertBlobExists(workDir, SupportedAlgorithm.SHA256.digest(file3));
14441444
assertBlobContent(workDir, SupportedAlgorithm.SHA256.digest(file3), "linked-file2-tar");
14451445
}
1446+
1447+
@Test
1448+
void shouldGarbageCollectReturnEmptyWhenNoBlobsAreOrphaned() throws IOException {
1449+
Path ociLayoutPath = layoutPath.resolve("gc-no-orphan");
1450+
Path artifactFile = blobDir.resolve("gc-no-orphan.txt");
1451+
Files.writeString(artifactFile, "no-orphan");
1452+
1453+
LayoutRef layoutRef = LayoutRef.parse("%s:latest".formatted(ociLayoutPath.toString()));
1454+
OCILayout ociLayout =
1455+
OCILayout.Builder.builder().defaults(ociLayoutPath).build();
1456+
ociLayout.pushArtifact(
1457+
layoutRef, ArtifactType.from("foo/bar"), Annotations.empty(), LocalPath.of(artifactFile, "text/plain"));
1458+
1459+
List<String> removed = ociLayout.garbageCollect();
1460+
1461+
// No orphaned blobs — nothing should be removed
1462+
assertTrue(removed.isEmpty(), "Expected no blobs to be garbage collected");
1463+
1464+
// Original blobs still present
1465+
assertBlobExists(ociLayoutPath, Config.empty().getDigest());
1466+
assertBlobExists(ociLayoutPath, SupportedAlgorithm.SHA256.digest(artifactFile));
1467+
}
1468+
1469+
@Test
1470+
void shouldGarbageCollectRemoveOrphanedBlob() throws IOException {
1471+
Path ociLayoutPath = layoutPath.resolve("gc-orphan");
1472+
Path artifactFile = blobDir.resolve("gc-orphan.txt");
1473+
Files.writeString(artifactFile, "referenced");
1474+
Path orphanFile = blobDir.resolve("gc-orphan-extra.txt");
1475+
Files.writeString(orphanFile, "orphaned-blob-content");
1476+
1477+
LayoutRef layoutRef = LayoutRef.parse("%s:latest".formatted(ociLayoutPath.toString()));
1478+
OCILayout ociLayout =
1479+
OCILayout.Builder.builder().defaults(ociLayoutPath).build();
1480+
ociLayout.pushArtifact(
1481+
layoutRef, ArtifactType.from("foo/bar"), Annotations.empty(), LocalPath.of(artifactFile, "text/plain"));
1482+
1483+
// Inject an orphaned blob directly into the blobs/sha256/ directory
1484+
String orphanDigest = SupportedAlgorithm.SHA256.digest(orphanFile);
1485+
Path orphanBlobPath = ociLayoutPath
1486+
.resolve(Const.OCI_LAYOUT_BLOBS)
1487+
.resolve("sha256")
1488+
.resolve(SupportedAlgorithm.getDigest(orphanDigest));
1489+
Files.copy(orphanFile, orphanBlobPath);
1490+
assertBlobExists(ociLayoutPath, orphanDigest);
1491+
1492+
List<String> removed = ociLayout.garbageCollect();
1493+
1494+
// Exactly the orphaned blob should be removed
1495+
assertEquals(1, removed.size(), "Expected exactly one blob to be garbage collected");
1496+
assertEquals(orphanDigest, removed.get(0));
1497+
1498+
// The orphaned blob must no longer exist
1499+
assertBlobAbsent(ociLayoutPath, orphanDigest);
1500+
1501+
// Referenced blobs must still be present
1502+
assertBlobExists(ociLayoutPath, Config.empty().getDigest());
1503+
assertBlobExists(ociLayoutPath, SupportedAlgorithm.SHA256.digest(artifactFile));
1504+
}
1505+
1506+
@Test
1507+
void shouldGarbageCollectMultipleOrphanedBlobs() throws IOException {
1508+
Path ociLayoutPath = layoutPath.resolve("gc-multi-orphan");
1509+
Path artifactFile = blobDir.resolve("gc-multi-orphan.txt");
1510+
Files.writeString(artifactFile, "referenced-multi");
1511+
1512+
LayoutRef layoutRef = LayoutRef.parse("%s:latest".formatted(ociLayoutPath.toString()));
1513+
OCILayout ociLayout =
1514+
OCILayout.Builder.builder().defaults(ociLayoutPath).build();
1515+
ociLayout.pushArtifact(
1516+
layoutRef, ArtifactType.from("foo/bar"), Annotations.empty(), LocalPath.of(artifactFile, "text/plain"));
1517+
1518+
// Inject two orphaned blobs
1519+
Path orphan1 = blobDir.resolve("gc-orphan1.txt");
1520+
Path orphan2 = blobDir.resolve("gc-orphan2.txt");
1521+
Files.writeString(orphan1, "orphan-one");
1522+
Files.writeString(orphan2, "orphan-two");
1523+
1524+
String orphanDigest1 = SupportedAlgorithm.SHA256.digest(orphan1);
1525+
String orphanDigest2 = SupportedAlgorithm.SHA256.digest(orphan2);
1526+
1527+
Path algoDir = ociLayoutPath.resolve(Const.OCI_LAYOUT_BLOBS).resolve("sha256");
1528+
Files.copy(orphan1, algoDir.resolve(SupportedAlgorithm.getDigest(orphanDigest1)));
1529+
Files.copy(orphan2, algoDir.resolve(SupportedAlgorithm.getDigest(orphanDigest2)));
1530+
1531+
List<String> removed = ociLayout.garbageCollect();
1532+
1533+
assertEquals(2, removed.size(), "Expected two blobs to be garbage collected");
1534+
assertTrue(removed.contains(orphanDigest1), "orphanDigest1 should be in removed list");
1535+
assertTrue(removed.contains(orphanDigest2), "orphanDigest2 should be in removed list");
1536+
1537+
assertBlobAbsent(ociLayoutPath, orphanDigest1);
1538+
assertBlobAbsent(ociLayoutPath, orphanDigest2);
1539+
1540+
// Referenced blobs must still be present
1541+
assertBlobExists(ociLayoutPath, Config.empty().getDigest());
1542+
assertBlobExists(ociLayoutPath, SupportedAlgorithm.SHA256.digest(artifactFile));
1543+
}
1544+
1545+
@Test
1546+
void shouldGarbageCollectKeepAllBlobsAfterMultipleManifests() throws IOException {
1547+
Path ociLayoutPath = layoutPath.resolve("gc-multi-manifest");
1548+
Path file1 = blobDir.resolve("gc-multi-manifest-1.txt");
1549+
Path file2 = blobDir.resolve("gc-multi-manifest-2.txt");
1550+
Files.writeString(file1, "first-artifact");
1551+
Files.writeString(file2, "second-artifact");
1552+
1553+
OCILayout ociLayout =
1554+
OCILayout.Builder.builder().defaults(ociLayoutPath).build();
1555+
1556+
LayoutRef ref1 = LayoutRef.parse("%s:v1".formatted(ociLayoutPath.toString()));
1557+
LayoutRef ref2 = LayoutRef.parse("%s:v2".formatted(ociLayoutPath.toString()));
1558+
1559+
ociLayout.pushArtifact(ref1, ArtifactType.from("foo/bar"), Annotations.empty(), LocalPath.of(file1));
1560+
ociLayout.pushArtifact(ref2, ArtifactType.from("foo/bar"), Annotations.empty(), LocalPath.of(file2));
1561+
1562+
List<String> removed = ociLayout.garbageCollect();
1563+
1564+
// Nothing should be removed — both artifacts are fully referenced
1565+
assertTrue(removed.isEmpty(), "Expected no blobs to be garbage collected with two valid manifests");
1566+
1567+
assertBlobExists(ociLayoutPath, SupportedAlgorithm.SHA256.digest(file1));
1568+
assertBlobExists(ociLayoutPath, SupportedAlgorithm.SHA256.digest(file2));
1569+
}
1570+
1571+
@Test
1572+
void shouldGarbageCollectKeepReferrerBlobsWhenCopiedDeep() throws IOException {
1573+
Path ociLayoutPath = layoutPath.resolve("gc-referrer");
1574+
Path mainFile = blobDir.resolve("gc-referrer-main.txt");
1575+
Path attachFile = blobDir.resolve("gc-referrer-attach.txt");
1576+
Files.writeString(mainFile, "main-artifact");
1577+
Files.writeString(attachFile, "attached-artifact");
1578+
1579+
LayoutRef layoutRef = LayoutRef.parse("%s:latest".formatted(ociLayoutPath.toString()));
1580+
OCILayout ociLayout =
1581+
OCILayout.Builder.builder().defaults(ociLayoutPath).build();
1582+
1583+
// Push main artifact
1584+
ociLayout.pushArtifact(
1585+
layoutRef, ArtifactType.from("foo/bar"), Annotations.empty(), LocalPath.of(mainFile, "text/plain"));
1586+
1587+
// Attach a referrer to the main artifact
1588+
ociLayout.attachArtifact(
1589+
layoutRef,
1590+
ArtifactType.from("application/referrer"),
1591+
Annotations.empty(),
1592+
LocalPath.of(attachFile, "text/plain"));
1593+
1594+
// No orphans — both main and referrer blobs are valid
1595+
List<String> removed = ociLayout.garbageCollect();
1596+
1597+
assertTrue(removed.isEmpty(), "Expected no blobs to be removed when referrers are properly referenced");
1598+
1599+
assertBlobExists(ociLayoutPath, SupportedAlgorithm.SHA256.digest(mainFile));
1600+
assertBlobExists(ociLayoutPath, SupportedAlgorithm.SHA256.digest(attachFile));
1601+
}
1602+
1603+
@Test
1604+
void shouldGarbageCollectOnEmptyLayout() {
1605+
Path ociLayoutPath = layoutPath.resolve("gc-empty");
1606+
OCILayout ociLayout =
1607+
OCILayout.Builder.builder().defaults(ociLayoutPath).build();
1608+
1609+
// An empty layout has no blobs at all — GC must return empty list without error
1610+
List<String> removed = ociLayout.garbageCollect();
1611+
1612+
assertTrue(removed.isEmpty(), "Expected no blobs to be removed from an empty layout");
1613+
}
1614+
1615+
@Test
1616+
void shouldGarbageCollectWithNestedIndex() throws IOException {
1617+
1618+
// Build a layout
1619+
Path ociLayoutPath = layoutPath.resolve("gc-nested-index");
1620+
Path file1 = blobDir.resolve("gc-nested-index-1.txt");
1621+
Path file2 = blobDir.resolve("gc-nested-index-2.txt");
1622+
Path orphanFile = blobDir.resolve("gc-nested-index-orphan.txt");
1623+
Files.writeString(file1, "nested-index-artifact-one");
1624+
Files.writeString(file2, "nested-index-artifact-two");
1625+
Files.writeString(orphanFile, "nested-index-orphan-content");
1626+
1627+
OCILayout ociLayout =
1628+
OCILayout.Builder.builder().defaults(ociLayoutPath).build();
1629+
1630+
// Push two independent manifests (without a top-level tag so they get digest-only entries)
1631+
LayoutRef ref1 = LayoutRef.parse("%s".formatted(ociLayoutPath.toString()));
1632+
LayoutRef ref2 = LayoutRef.parse("%s".formatted(ociLayoutPath.toString()));
1633+
Manifest manifest1 = ociLayout.pushArtifact(
1634+
ref1, ArtifactType.from("foo/bar"), Annotations.empty(), LocalPath.of(file1, "text/plain"));
1635+
Manifest manifest2 = ociLayout.pushArtifact(
1636+
ref2, ArtifactType.from("foo/bar"), Annotations.empty(), LocalPath.of(file2, "text/plain"));
1637+
1638+
// Group the two manifests into a nested index and push it.
1639+
assertNotNull(manifest1.getDescriptor(), "Manifest 1 descriptor should not be null");
1640+
assertNotNull(manifest2.getDescriptor(), "Manifest 2 descriptor should not be null");
1641+
Index nestedIndex = Index.fromManifests(List.of(manifest1.getDescriptor(), manifest2.getDescriptor()));
1642+
LayoutRef indexRef = LayoutRef.parse("%s:multi".formatted(ociLayoutPath.toString()));
1643+
Index pushedIndex = ociLayout.pushIndex(indexRef, nestedIndex);
1644+
1645+
// Collect the digests that must survive GC
1646+
assertNotNull(pushedIndex.getDescriptor(), "Pushed index descriptor should not be null");
1647+
String nestedIndexDigest = pushedIndex.getDescriptor().getDigest();
1648+
String manifest1Digest = manifest1.getDescriptor().getDigest();
1649+
String manifest2Digest = manifest2.getDescriptor().getDigest();
1650+
String layer1Digest = SupportedAlgorithm.SHA256.digest(file1);
1651+
String layer2Digest = SupportedAlgorithm.SHA256.digest(file2);
1652+
String configDigest = Config.empty().getDigest();
1653+
1654+
// Inject an orphaned blob directly on disk
1655+
Layer orphanedLayer =
1656+
ociLayout.pushBlob(indexRef.withDigest(SupportedAlgorithm.SHA256.digest(orphanFile)), orphanFile);
1657+
String orphanDigest = orphanedLayer.getDigest();
1658+
assertBlobExists(ociLayoutPath, orphanDigest);
1659+
1660+
// Run GC
1661+
List<String> removed = ociLayout.garbageCollect();
1662+
1663+
// Only the orphan must have been removed
1664+
assertEquals(1, removed.size(), "Expected exactly one blob to be garbage collected");
1665+
assertEquals(orphanDigest, removed.get(0));
1666+
assertBlobAbsent(ociLayoutPath, orphanDigest);
1667+
1668+
// The nested index blob itself must be kept (it is referenced from root index.json)
1669+
assertBlobExists(ociLayoutPath, nestedIndexDigest);
1670+
1671+
// Both manifests reachable via the nested index must be kept
1672+
assertBlobExists(ociLayoutPath, manifest1Digest);
1673+
assertBlobExists(ociLayoutPath, manifest2Digest);
1674+
1675+
// All layer blobs reached by recursing into the nested index must be kept
1676+
assertBlobExists(ociLayoutPath, layer1Digest);
1677+
assertBlobExists(ociLayoutPath, layer2Digest);
1678+
1679+
// Shared config blob must be kept
1680+
assertBlobExists(ociLayoutPath, configDigest);
1681+
}
14461682
}
0 Bytes
Binary file not shown.

src/test/resources/oci/subject.tar

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)