|
| 1 | +import { createClient } from '@sanity/client'; |
| 2 | +import dotenv from 'dotenv'; |
| 3 | +import fs from 'fs'; |
| 4 | + |
| 5 | +dotenv.config(); |
| 6 | + |
| 7 | +const client = createClient({ |
| 8 | + projectId: process.env.NEXT_PUBLIC_SANITY_PROJECT_ID, |
| 9 | + dataset: process.env.NEXT_PUBLIC_SANITY_DATASET || 'dev', |
| 10 | + apiVersion: '2024-01-01', |
| 11 | + token: process.env.SANITY_API_WRITE_TOKEN, |
| 12 | + useCdn: false, |
| 13 | +}); |
| 14 | + |
| 15 | +const DRY_RUN = process.argv.includes('--dry-run'); |
| 16 | + |
| 17 | +async function main() { |
| 18 | + console.log(`\n🧹 Orphan Asset Cleanup${DRY_RUN ? ' (DRY RUN)' : ''}\n`); |
| 19 | + |
| 20 | + // 1. Load the current mapping to get the set of GOOD asset IDs |
| 21 | + const mapping = JSON.parse(fs.readFileSync('asset-mapping.json', 'utf-8')); |
| 22 | + const activeAssetIds = new Set(mapping.map(m => m.sanityAssetId)); |
| 23 | + console.log(`Active assets (keep): ${activeAssetIds.size}`); |
| 24 | + |
| 25 | + // 2. Query ALL image and file assets from Sanity |
| 26 | + // Use pagination to handle large datasets |
| 27 | + let allAssets = []; |
| 28 | + let lastId = ''; |
| 29 | + |
| 30 | + while (true) { |
| 31 | + const batch = await client.fetch( |
| 32 | + `*[_type in ["sanity.imageAsset", "sanity.fileAsset"] && _id > $lastId] | order(_id) [0...1000] { _id, _type, originalFilename, size }`, |
| 33 | + { lastId } |
| 34 | + ); |
| 35 | + if (batch.length === 0) break; |
| 36 | + allAssets = allAssets.concat(batch); |
| 37 | + lastId = batch[batch.length - 1]._id; |
| 38 | + console.log(` Fetched ${allAssets.length} assets so far...`); |
| 39 | + } |
| 40 | + |
| 41 | + console.log(`Total assets in Sanity: ${allAssets.length}`); |
| 42 | + |
| 43 | + // 3. Separate into mapped (active) and candidates for deletion |
| 44 | + const candidates = []; |
| 45 | + const keptByMapping = []; |
| 46 | + |
| 47 | + for (const asset of allAssets) { |
| 48 | + if (activeAssetIds.has(asset._id)) { |
| 49 | + keptByMapping.push(asset._id); |
| 50 | + } else { |
| 51 | + candidates.push(asset); |
| 52 | + } |
| 53 | + } |
| 54 | + |
| 55 | + console.log(`\nAssets in mapping (auto-keep): ${keptByMapping.length}`); |
| 56 | + console.log(`Candidates to check for references: ${candidates.length}`); |
| 57 | + |
| 58 | + // 4. Batch-check references for candidates |
| 59 | + // Query all non-asset documents that reference any asset, then build a set of referenced asset IDs |
| 60 | + console.log(`\nChecking which candidates are referenced by documents...`); |
| 61 | + |
| 62 | + // Get all asset IDs that are referenced by at least one non-asset document |
| 63 | + // We do this by querying documents (not assets) and extracting their asset references |
| 64 | + const referencedAssetIds = new Set(); |
| 65 | + |
| 66 | + // Check in batches of 50 candidates at a time using parallel queries |
| 67 | + const BATCH_SIZE = 50; |
| 68 | + let checked = 0; |
| 69 | + |
| 70 | + for (let i = 0; i < candidates.length; i += BATCH_SIZE) { |
| 71 | + const batch = candidates.slice(i, i + BATCH_SIZE); |
| 72 | + const ids = batch.map(a => a._id); |
| 73 | + |
| 74 | + // For each batch, check which IDs have references |
| 75 | + const results = await Promise.all( |
| 76 | + ids.map(id => |
| 77 | + client.fetch(`count(*[references($id)])`, { id }) |
| 78 | + .then(count => ({ id, count })) |
| 79 | + ) |
| 80 | + ); |
| 81 | + |
| 82 | + for (const { id, count } of results) { |
| 83 | + if (count > 0) { |
| 84 | + referencedAssetIds.add(id); |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + checked += batch.length; |
| 89 | + if (checked % 200 === 0 || checked === candidates.length) { |
| 90 | + console.log(` Checked ${checked}/${candidates.length} candidates (${referencedAssetIds.size} referenced so far)...`); |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + // 5. Build final orphan list |
| 95 | + const orphans = []; |
| 96 | + const kept = [...keptByMapping]; |
| 97 | + |
| 98 | + for (const asset of candidates) { |
| 99 | + if (referencedAssetIds.has(asset._id)) { |
| 100 | + console.log(` ⚠️ Keeping ${asset._id} — referenced by document(s)`); |
| 101 | + kept.push(asset._id); |
| 102 | + } else { |
| 103 | + orphans.push(asset); |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + console.log(`\nOrphans to delete: ${orphans.length}`); |
| 108 | + console.log(`Assets to keep: ${kept.length}`); |
| 109 | + |
| 110 | + // 6. Delete orphans in batches |
| 111 | + if (orphans.length === 0) { |
| 112 | + console.log('No orphans found! 🎉'); |
| 113 | + return; |
| 114 | + } |
| 115 | + |
| 116 | + if (DRY_RUN) { |
| 117 | + console.log('\n🔍 DRY RUN — would delete these orphans:'); |
| 118 | + // Just show first 20 |
| 119 | + for (const orphan of orphans.slice(0, 20)) { |
| 120 | + console.log(` ${orphan._id} (${(orphan.size / 1024).toFixed(1)} KB)`); |
| 121 | + } |
| 122 | + if (orphans.length > 20) { |
| 123 | + console.log(` ... and ${orphans.length - 20} more`); |
| 124 | + } |
| 125 | + const totalSize = orphans.reduce((sum, o) => sum + (o.size || 0), 0); |
| 126 | + console.log(`\nTotal space to reclaim: ${(totalSize / 1024 / 1024).toFixed(1)} MB`); |
| 127 | + return; |
| 128 | + } |
| 129 | + |
| 130 | + // Delete in batches of 100 using transactions |
| 131 | + const DEL_BATCH_SIZE = 100; |
| 132 | + let deleted = 0; |
| 133 | + |
| 134 | + for (let i = 0; i < orphans.length; i += DEL_BATCH_SIZE) { |
| 135 | + const batch = orphans.slice(i, i + DEL_BATCH_SIZE); |
| 136 | + const tx = client.transaction(); |
| 137 | + |
| 138 | + for (const orphan of batch) { |
| 139 | + tx.delete(orphan._id); |
| 140 | + } |
| 141 | + |
| 142 | + try { |
| 143 | + await tx.commit(); |
| 144 | + deleted += batch.length; |
| 145 | + console.log(` Deleted ${deleted}/${orphans.length} orphans...`); |
| 146 | + } catch (err) { |
| 147 | + console.error(` Error deleting batch: ${err.message}`); |
| 148 | + // Try one by one for this batch |
| 149 | + for (const orphan of batch) { |
| 150 | + try { |
| 151 | + await client.delete(orphan._id); |
| 152 | + deleted++; |
| 153 | + } catch (e) { |
| 154 | + console.error(` Failed to delete ${orphan._id}: ${e.message}`); |
| 155 | + } |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + console.log(`\n✅ Deleted ${deleted} orphan assets`); |
| 161 | + const totalSize = orphans.reduce((sum, o) => sum + (o.size || 0), 0); |
| 162 | + console.log(`Space reclaimed: ${(totalSize / 1024 / 1024).toFixed(1)} MB`); |
| 163 | +} |
| 164 | + |
| 165 | +main().catch(console.error); |
0 commit comments