Skip to content

Commit 7b1f3f3

Browse files
Miriadbuilder
andcommitted
chore: add orphan asset cleanup script
Deletes unreferenced Sanity assets left over from migration. Safety: checks document references before deleting, preserves all active assets. Supports --dry-run mode. Co-authored-by: builder <builder@miriad.systems>
1 parent 9c18b5b commit 7b1f3f3

File tree

1 file changed

+165
-0
lines changed

1 file changed

+165
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
import { createClient } from '@sanity/client';
2+
import dotenv from 'dotenv';
3+
import fs from 'fs';
4+
5+
dotenv.config();
6+
7+
const client = createClient({
8+
projectId: process.env.NEXT_PUBLIC_SANITY_PROJECT_ID,
9+
dataset: process.env.NEXT_PUBLIC_SANITY_DATASET || 'dev',
10+
apiVersion: '2024-01-01',
11+
token: process.env.SANITY_API_WRITE_TOKEN,
12+
useCdn: false,
13+
});
14+
15+
const DRY_RUN = process.argv.includes('--dry-run');
16+
17+
async function main() {
18+
console.log(`\n🧹 Orphan Asset Cleanup${DRY_RUN ? ' (DRY RUN)' : ''}\n`);
19+
20+
// 1. Load the current mapping to get the set of GOOD asset IDs
21+
const mapping = JSON.parse(fs.readFileSync('asset-mapping.json', 'utf-8'));
22+
const activeAssetIds = new Set(mapping.map(m => m.sanityAssetId));
23+
console.log(`Active assets (keep): ${activeAssetIds.size}`);
24+
25+
// 2. Query ALL image and file assets from Sanity
26+
// Use pagination to handle large datasets
27+
let allAssets = [];
28+
let lastId = '';
29+
30+
while (true) {
31+
const batch = await client.fetch(
32+
`*[_type in ["sanity.imageAsset", "sanity.fileAsset"] && _id > $lastId] | order(_id) [0...1000] { _id, _type, originalFilename, size }`,
33+
{ lastId }
34+
);
35+
if (batch.length === 0) break;
36+
allAssets = allAssets.concat(batch);
37+
lastId = batch[batch.length - 1]._id;
38+
console.log(` Fetched ${allAssets.length} assets so far...`);
39+
}
40+
41+
console.log(`Total assets in Sanity: ${allAssets.length}`);
42+
43+
// 3. Separate into mapped (active) and candidates for deletion
44+
const candidates = [];
45+
const keptByMapping = [];
46+
47+
for (const asset of allAssets) {
48+
if (activeAssetIds.has(asset._id)) {
49+
keptByMapping.push(asset._id);
50+
} else {
51+
candidates.push(asset);
52+
}
53+
}
54+
55+
console.log(`\nAssets in mapping (auto-keep): ${keptByMapping.length}`);
56+
console.log(`Candidates to check for references: ${candidates.length}`);
57+
58+
// 4. Batch-check references for candidates
59+
// Query all non-asset documents that reference any asset, then build a set of referenced asset IDs
60+
console.log(`\nChecking which candidates are referenced by documents...`);
61+
62+
// Get all asset IDs that are referenced by at least one non-asset document
63+
// We do this by querying documents (not assets) and extracting their asset references
64+
const referencedAssetIds = new Set();
65+
66+
// Check in batches of 50 candidates at a time using parallel queries
67+
const BATCH_SIZE = 50;
68+
let checked = 0;
69+
70+
for (let i = 0; i < candidates.length; i += BATCH_SIZE) {
71+
const batch = candidates.slice(i, i + BATCH_SIZE);
72+
const ids = batch.map(a => a._id);
73+
74+
// For each batch, check which IDs have references
75+
const results = await Promise.all(
76+
ids.map(id =>
77+
client.fetch(`count(*[references($id)])`, { id })
78+
.then(count => ({ id, count }))
79+
)
80+
);
81+
82+
for (const { id, count } of results) {
83+
if (count > 0) {
84+
referencedAssetIds.add(id);
85+
}
86+
}
87+
88+
checked += batch.length;
89+
if (checked % 200 === 0 || checked === candidates.length) {
90+
console.log(` Checked ${checked}/${candidates.length} candidates (${referencedAssetIds.size} referenced so far)...`);
91+
}
92+
}
93+
94+
// 5. Build final orphan list
95+
const orphans = [];
96+
const kept = [...keptByMapping];
97+
98+
for (const asset of candidates) {
99+
if (referencedAssetIds.has(asset._id)) {
100+
console.log(` ⚠️ Keeping ${asset._id} — referenced by document(s)`);
101+
kept.push(asset._id);
102+
} else {
103+
orphans.push(asset);
104+
}
105+
}
106+
107+
console.log(`\nOrphans to delete: ${orphans.length}`);
108+
console.log(`Assets to keep: ${kept.length}`);
109+
110+
// 6. Delete orphans in batches
111+
if (orphans.length === 0) {
112+
console.log('No orphans found! 🎉');
113+
return;
114+
}
115+
116+
if (DRY_RUN) {
117+
console.log('\n🔍 DRY RUN — would delete these orphans:');
118+
// Just show first 20
119+
for (const orphan of orphans.slice(0, 20)) {
120+
console.log(` ${orphan._id} (${(orphan.size / 1024).toFixed(1)} KB)`);
121+
}
122+
if (orphans.length > 20) {
123+
console.log(` ... and ${orphans.length - 20} more`);
124+
}
125+
const totalSize = orphans.reduce((sum, o) => sum + (o.size || 0), 0);
126+
console.log(`\nTotal space to reclaim: ${(totalSize / 1024 / 1024).toFixed(1)} MB`);
127+
return;
128+
}
129+
130+
// Delete in batches of 100 using transactions
131+
const DEL_BATCH_SIZE = 100;
132+
let deleted = 0;
133+
134+
for (let i = 0; i < orphans.length; i += DEL_BATCH_SIZE) {
135+
const batch = orphans.slice(i, i + DEL_BATCH_SIZE);
136+
const tx = client.transaction();
137+
138+
for (const orphan of batch) {
139+
tx.delete(orphan._id);
140+
}
141+
142+
try {
143+
await tx.commit();
144+
deleted += batch.length;
145+
console.log(` Deleted ${deleted}/${orphans.length} orphans...`);
146+
} catch (err) {
147+
console.error(` Error deleting batch: ${err.message}`);
148+
// Try one by one for this batch
149+
for (const orphan of batch) {
150+
try {
151+
await client.delete(orphan._id);
152+
deleted++;
153+
} catch (e) {
154+
console.error(` Failed to delete ${orphan._id}: ${e.message}`);
155+
}
156+
}
157+
}
158+
}
159+
160+
console.log(`\n✅ Deleted ${deleted} orphan assets`);
161+
const totalSize = orphans.reduce((sum, o) => sum + (o.size || 0), 0);
162+
console.log(`Space reclaimed: ${(totalSize / 1024 / 1024).toFixed(1)} MB`);
163+
}
164+
165+
main().catch(console.error);

0 commit comments

Comments
 (0)