Skip to content

Commit 9c18b5b

Browse files
Miriadbuilder
andcommitted
fix: migration script downloads only originals, strips transformation params
- Add raw Cloudinary object detection (old-format docs without _type) - Add stripTransformations() to remove Cloudinary URL params - Add getOriginalUrl() to construct canonical URLs from public_id - Prevents uploading derived variants (avif, webp, resized copies) Re-run results: 433 clean originals uploaded (down from 6,970 with variants) Co-authored-by: builder <builder@miriad.systems>
1 parent 7e5c775 commit 9c18b5b

File tree

3 files changed

+1068
-13
lines changed

3 files changed

+1068
-13
lines changed

scripts/migration/migrate.mjs

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,29 @@ function findCloudinaryRefs(obj, currentPath = '') {
309309
url: resolvedUrl,
310310
publicId: publicId,
311311
resourceType: obj.resource_type || 'image',
312+
format: obj.format || null,
312313
});
313314
}
314315
return results; // Don't recurse into cloudinary.asset children
315316
}
316317

318+
// Check for raw Cloudinary objects (old format without _type)
319+
if (typeof obj === 'object' && !Array.isArray(obj) && obj.public_id && (obj.secure_url || obj.url) && !obj._type) {
320+
const url = obj.secure_url || obj.url || null;
321+
const publicId = obj.public_id;
322+
if (url) {
323+
results.push({
324+
path: currentPath,
325+
type: 'raw-cloudinary-object',
326+
url: url,
327+
publicId: publicId,
328+
resourceType: obj.resource_type || 'image',
329+
format: obj.format || null,
330+
});
331+
}
332+
return results; // Don't recurse into raw Cloudinary object children (derived[], etc.)
333+
}
334+
317335
if (typeof obj === 'string') {
318336
if (containsCloudinaryRef(obj)) {
319337
// Skip URLs that are inside cloudinary.asset sub-fields (derived, url, secure_url)
@@ -426,6 +444,7 @@ async function phase1_discoverReferences(sanityClient) {
426444
log(1, `Found ${docsWithRefs.length} documents with Cloudinary references`);
427445

428446
let cloudinaryAssetCount = 0;
447+
let rawCloudinaryCount = 0;
429448
let urlCount = 0;
430449
let embeddedCount = 0;
431450

@@ -434,12 +453,13 @@ async function phase1_discoverReferences(sanityClient) {
434453
for (const r of d.refs) {
435454
log(1, ` ${r.path} [${r.type}] → ${r.url || r.publicId || '(no url)'}`);
436455
if (r.type === 'cloudinary.asset') cloudinaryAssetCount++;
456+
else if (r.type === 'raw-cloudinary-object') rawCloudinaryCount++;
437457
else if (r.type === 'url') urlCount++;
438458
else if (r.type === 'embedded') embeddedCount++;
439459
}
440460
}
441461

442-
log(1, `\n Breakdown: ${cloudinaryAssetCount} cloudinary.asset objects, ${urlCount} URL fields, ${embeddedCount} embedded URLs`);
462+
log(1, `\n Breakdown: ${cloudinaryAssetCount} cloudinary.asset objects, ${rawCloudinaryCount} raw Cloudinary objects, ${urlCount} URL fields, ${embeddedCount} embedded URLs`);
443463

444464
// Save to disk for resume
445465
if (!DRY_RUN) {
@@ -450,6 +470,26 @@ async function phase1_discoverReferences(sanityClient) {
450470
return docsWithRefs;
451471
}
452472

473+
// ─── Utility: strip Cloudinary transformations from URL ──────────────────────
474+
function stripTransformations(url) {
475+
// Cloudinary URL format: .../upload/[transformations/]v{version}/{public_id}.{ext}
476+
// Strip everything between /upload/ and /v{version}/
477+
return url.replace(
478+
/(\/upload\/)((?:[a-z_][a-z0-9_,:]+(?:\/|$))*)(v\d+\/)/i,
479+
'$1$3'
480+
);
481+
}
482+
483+
// ─── Utility: get canonical original URL for a Cloudinary reference ──────────
484+
function getOriginalUrl(ref) {
485+
if (ref.publicId && ref.resourceType) {
486+
const ext = ref.format || (ref.resourceType === 'video' ? 'mp4' : 'png');
487+
return `https://media.codingcat.dev/${ref.resourceType}/upload/${ref.publicId}.${ext}`;
488+
}
489+
// Fallback: strip transformations from the URL
490+
return stripTransformations(ref.url);
491+
}
492+
453493
// ═══════════════════════════════════════════════════════════════════════════════
454494
// PHASE 2: Extract Unique Cloudinary URLs
455495
// ═══════════════════════════════════════════════════════════════════════════════
@@ -469,20 +509,22 @@ async function phase2_extractUniqueUrls(docsWithRefs) {
469509

470510
for (const doc of docsWithRefs) {
471511
for (const ref of doc.refs) {
472-
const url = ref.url;
473-
if (!url) continue;
512+
if (!ref.url) continue;
474513

475-
if (urlMap.has(url)) {
514+
// Get the canonical original URL (strips transformations, uses CNAME)
515+
const originalUrl = getOriginalUrl(ref);
516+
517+
if (urlMap.has(originalUrl)) {
476518
// Add this doc as another source
477-
const entry = urlMap.get(url);
519+
const entry = urlMap.get(originalUrl);
478520
if (!entry.sourceDocIds.includes(doc._id)) {
479521
entry.sourceDocIds.push(doc._id);
480522
}
481523
} else {
482-
urlMap.set(url, {
483-
cloudinaryUrl: url,
484-
cloudinaryPublicId: ref.publicId || extractPublicIdFromUrl(url),
485-
resourceType: ref.resourceType || guessResourceType(url),
524+
urlMap.set(originalUrl, {
525+
cloudinaryUrl: originalUrl,
526+
cloudinaryPublicId: ref.publicId || extractPublicIdFromUrl(originalUrl),
527+
resourceType: ref.resourceType || guessResourceType(originalUrl),
486528
sourceDocIds: [doc._id],
487529
});
488530
}
@@ -634,11 +676,24 @@ async function phase3_downloadAndUpload(uniqueUrls) {
634676
/**
635677
* Given a Cloudinary URL, find the matching Sanity asset in the mapping.
636678
*/
637-
function findMappingForUrl(url, mapping) {
679+
function findMappingForUrl(url, mapping, refPublicId) {
638680
// Try exact URL match first
639681
let entry = mapping.find((m) => m.cloudinaryUrl === url);
640682
if (entry) return entry;
641683

684+
// Try matching by the ref's own publicId (from the Cloudinary object)
685+
if (refPublicId) {
686+
entry = mapping.find((m) => m.cloudinaryPublicId === refPublicId);
687+
if (entry) return entry;
688+
}
689+
690+
// Try matching by stripped/canonical URL
691+
const strippedUrl = stripTransformations(url);
692+
if (strippedUrl !== url) {
693+
entry = mapping.find((m) => m.cloudinaryUrl === strippedUrl);
694+
if (entry) return entry;
695+
}
696+
642697
// Try matching by public_id extracted from the URL
643698
const publicId = extractPublicIdFromUrl(url);
644699
if (publicId) {
@@ -694,7 +749,7 @@ async function phase4_updateReferences(sanityClient, docsWithRefs, mapping) {
694749
continue;
695750
}
696751

697-
const mappingEntry = findMappingForUrl(refUrl, mapping);
752+
const mappingEntry = findMappingForUrl(refUrl, mapping, ref.publicId);
698753

699754
if (!mappingEntry) {
700755
log(4, ` ⚠ No mapping found for URL: ${refUrl} (in ${docId} at ${fieldPath})`);
@@ -705,8 +760,8 @@ async function phase4_updateReferences(sanityClient, docsWithRefs, mapping) {
705760
const sanityId = mappingEntry.sanityAssetId;
706761
const cdnUrl = mappingEntry.sanityUrl || sanityAssetUrl(sanityId);
707762

708-
if (refType === 'cloudinary.asset') {
709-
// ── Replace entire cloudinary.asset object with Sanity image/file reference ──
763+
if (refType === 'cloudinary.asset' || refType === 'raw-cloudinary-object') {
764+
// ── Replace entire cloudinary.asset or raw Cloudinary object with Sanity image/file reference ──
710765
const isImage = (ref.resourceType || 'image') === 'image';
711766
const refObj = isImage
712767
? {
@@ -830,6 +885,10 @@ async function phase5_report(docsWithRefs, uniqueUrls, mapping, changes) {
830885
(sum, d) => sum + d.refs.filter((r) => r.type === 'cloudinary.asset').length,
831886
0
832887
);
888+
const rawCloudinaryRefs = docsWithRefs.reduce(
889+
(sum, d) => sum + d.refs.filter((r) => r.type === 'raw-cloudinary-object').length,
890+
0
891+
);
833892
const urlRefs = docsWithRefs.reduce(
834893
(sum, d) => sum + d.refs.filter((r) => r.type === 'url').length,
835894
0
@@ -846,6 +905,7 @@ async function phase5_report(docsWithRefs, uniqueUrls, mapping, changes) {
846905
totalDocumentsWithRefs: docsWithRefs.length,
847906
totalReferencesFound: totalRefs,
848907
cloudinaryAssetObjects: cloudinaryAssetRefs,
908+
rawCloudinaryObjects: rawCloudinaryRefs,
849909
urlStringRefs: urlRefs,
850910
embeddedUrlRefs: embeddedRefs,
851911
uniqueCloudinaryUrls: uniqueUrls.length,
@@ -864,6 +924,7 @@ async function phase5_report(docsWithRefs, uniqueUrls, mapping, changes) {
864924
console.log(` Documents with refs: ${report.summary.totalDocumentsWithRefs}`);
865925
console.log(` Total references found: ${report.summary.totalReferencesFound}`);
866926
console.log(` cloudinary.asset objects: ${report.summary.cloudinaryAssetObjects}`);
927+
console.log(` raw Cloudinary objects: ${report.summary.rawCloudinaryObjects}`);
867928
console.log(` URL string fields: ${report.summary.urlStringRefs}`);
868929
console.log(` Embedded URLs in text: ${report.summary.embeddedUrlRefs}`);
869930
console.log(` Unique Cloudinary URLs: ${report.summary.uniqueCloudinaryUrls}`);
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
╔══════════════════════════════════════════════════════════╗
2+
║ Cloudinary → Sanity Asset Migration (Sanity-First) ║
3+
╚══════════════════════════════════════════════════════════╝
4+
5+
[Phase 1] ── Discovering Cloudinary references in Sanity documents ──
6+
(node:15211) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
7+
(Use `node --trace-deprecation ...` to show where the warning was created)
8+
[Phase 1] Found cached discovery with 451 documents. Delete /home/daytona/codingcat.dev/scripts/migration/discovered-references.json to re-scan.
9+
[Phase 2] ── Extracting unique Cloudinary URLs ──
10+
[Phase 2] Found cached URL list with 436 unique URLs. Delete /home/daytona/codingcat.dev/scripts/migration/unique-cloudinary-urls.json to re-extract.
11+
[Phase 3] ── Downloading & uploading assets to Sanity ──
12+
[Phase 3] Concurrency: 3 | Total unique URLs: 436
13+
[Phase 3] Skipping 388 already-migrated assets
14+
[Phase 3] Assets to migrate: 48
15+
✗ [1/48] Failed: b_rgb:5e1186,c_pad,w_1000,h_420/$%7Bpage — Download failed: HTTP 404 for https://media.codingcat.dev/image/upload/b_rgb:5e1186,c_pad,w_1000,h_420/${page?.coverPhoto?.public_id}`,
16+
✗ [2/48] Failed: w/_500/v1556553295/ajonp-ajonp-com/18-rxfire-svelte-cats/RxFire/_Svelt — Download failed: HTTP 400 for https://res.cloudinary.com/ajonp/image/upload/w\_500/v1556553295/ajonp-ajonp-com/18-rxfire-svelte-cats/RxFire\_Svelt.webp
17+
✗ [3/48] Failed: q/_auto/ajonp-ajonp-com/17-rxfire-react-cats/RxFire/_3 — Download failed: HTTP 404 for https://res.cloudinary.com/ajonp/image/upload/q\_auto/ajonp-ajonp-com/17-rxfire-react-cats/RxFire\_3.webp
18+
[Phase 3] [1/48] ✓ main-codingcatdev-photo/fosawiikzx30ajcilo2a → image-5febb50ae39284c22bdb43dc525ae5fdaedbaa9a-1920x1080-png
19+
[Phase 3] [2/48] ✓ main-codingcatdev-photo/q2eng4nciqybq8clwg6k → image-2d68f449c62af8ea1ac6f369c8e9c5f99777f574-1920x1080-png
20+
[Phase 3] [3/48] ✓ main-codingcatdev-photo/dz2owgunrjb8wa7vzzbu → image-7d3977f54bf16c87c46a1beef484cada62c16923-1920x1080-png
21+
[Phase 3] [4/48] ✓ main-codingcatdev-photo/ome6ihlaksocf2rtzfhe → image-c010f3b5b43aaa88556f5fb6123d61f2ffa01b01-1920x1080-png
22+
[Phase 3] [5/48] ✓ main-codingcatdev-photo/x8ncnxweooiat7vzpwke → image-2e3a02b625146e6099fde4c9dff438a1933e1b4d-1920x1080-png
23+
[Phase 3] [6/48] ✓ main-codingcatdev-photo/veso6actvfkxmpkcid8f → image-9e5538fac8ed46b66e3e3e50d1347669c826a7d9-1920x1080-png
24+
[Phase 3] [7/48] ✓ main-codingcatdev-photo/ni4t43qfcdab90gojuve → image-2fea4cd44d8d8fd09f3f3ef0af1a01072103f69d-1920x1080-png
25+
[Phase 3] [8/48] ✓ main-codingcatdev-photo/zbahldu0x4ihuimeczfq → image-212028e3fef8f64e9471806d7d26d0999a93063e-1920x1080-png
26+
[Phase 3] [9/48] ✓ main-codingcatdev-photo/zrgetssjgnguqj4yclfp → image-da2928acb25d72f7023b4d120c5c1daf6aca3e34-1920x1080-png
27+
[Phase 3] [10/48] ✓ main-codingcatdev-photo/u8at848k5o9mdgnpxv5k → image-3afec45da64654865d96bd2673880ca6a2e16a74-1920x1080-png
28+
[Phase 3] [11/48] ✓ main-codingcatdev-photo/vbex1zxomeoo0wjzyhhr → image-97e9213b6bff5259b7f12fc69908f300fcee99c5-1920x1080-png
29+
[Phase 3] [12/48] ✓ main-codingcatdev-photo/ogluu84watt3zu63gbf8 → image-f3e1a12bddee913446b424e66fbbd75220fdb5a6-1920x1080-png
30+
[Phase 3] [13/48] ✓ main-codingcatdev-photo/vz7ramuqpbyhcu3azajy → image-9be71c92a88bc5fb120fa8dcc3ef8f1b6581ecb7-1920x1080-png
31+
[Phase 3] [14/48] ✓ main-codingcatdev-photo/tvksoc43u6exibz6fmzv → image-eb3c49cfe38d7a95e1d8a47289387b571f697e02-1920x1080-png
32+
[Phase 3] [15/48] ✓ main-codingcatdev-photo/csxroq0lxevn4zbqdqks → image-ecc37088c7cd9ce37c1790a8fc6393c956271cf6-1920x1080-png
33+
[Phase 3] [16/48] ✓ main-codingcatdev-photo/pvjydzcbs39pwocebmsd → image-ee6b49a7ff8b7966607a7e397a1b0bb91a3b742c-1920x1080-png
34+
[Phase 3] [17/48] ✓ main-codingcatdev-photo/eeyfwyuldrn87o1i59ji → image-ba7df7dd2d5e5ad0a058f96583bcfa2664b21d44-1920x1080-png
35+
[Phase 3] [18/48] ✓ main-codingcatdev-photo/stdloblfbnlgf4pm3ze5 → image-0f0c46e79995ee69acf9a45fde9165e8319642d3-1920x1080-png
36+
[Phase 3] [19/48] ✓ main-codingcatdev-photo/hyopbplzvjnobn4nxe3v → image-135fd1bd7295a7173ec25208317befeb9fda3ab6-1920x1080-png
37+
[Phase 3] [20/48] ✓ main-codingcatdev-photo/inhxfdhdrfjmnvcfzwyi → image-3933e331d455eb073f0105de1faf0588ff478a3f-1920x1080-png
38+
[Phase 3] [21/48] ✓ main-codingcatdev-photo/vwb6zojoyln2d6oxudz8 → image-368eb23766409dbbf6025172f5e7178875833875-1920x1080-png
39+
[Phase 3] [22/48] ✓ main-codingcatdev-photo/skkbgf5bix76a04zhzqp → image-c0ceae6b08b279593092c23b3de9ca33c50cbcfc-1920x1080-png
40+
[Phase 3] [23/48] ✓ main-codingcatdev-photo/ecqrelydm7ykl8xup5xg → image-628c0895869e7f3e49a9253a5c5f72f2b8ffecb9-1920x1080-png
41+
[Phase 3] [24/48] ✓ main-codingcatdev-photo/wveq4jasspmsywiqnglu → image-ca193facbbc516ab21b53ab5bf94d8a48c2d007d-1920x1080-png
42+
[Phase 3] [25/48] ✓ main-codingcatdev-photo/pjkucvrzkdfkjyqa1nwu → image-53e9ff3d86e5cefe91650bcaaf1bdb04232aa619-1920x1080-png
43+
[Phase 3] [26/48] ✓ main-codingcatdev-photo/hn8dumtsubllbz9xyqh6 → image-a153842b3a5b54b7f72326ea0027885ec4a1493e-1920x1080-png
44+
[Phase 3] [27/48] ✓ main-codingcatdev-photo/cpg2clvczhzzvetwruul → image-9723fa4063e2cdf122862c1bdbd95225bc7ce5cd-1920x1080-png
45+
[Phase 3] [28/48] ✓ main-codingcatdev-photo/omzc5ridsuuxgxgtvt7o → image-995ecf3d8e7e286b1d3650c4ef34c8f1a87df7f7-1920x1080-png
46+
[Phase 3] [29/48] ✓ main-codingcatdev-photo/i6qzbmbxegit9nebc44s → image-933224e9d3b7fd471896c99dcdf340de2d6a42fc-1920x1080-png
47+
[Phase 3] [30/48] ✓ main-codingcatdev-photo/b2ryikx5b9x5dq27anok → image-2691a24b52e6a655f49338a9602998b9d2e2d9c1-1920x1080-png
48+
[Phase 3] [31/48] ✓ main-codingcatdev-photo/oogq3stsiqvbzsswaatr → image-149aa0d3ba55935a4bfb8c84913643403d424458-1920x1080-png
49+
[Phase 3] [32/48] ✓ main-codingcatdev-photo/ilnwzjko76hr0lddwxk5 → image-a2e8a72e363dcb5866adebe2b62d47cb09425321-1920x1080-png
50+
[Phase 3] [33/48] ✓ main-codingcatdev-photo/scjp26pt4hdxicpvsebs → image-f74d827f531e688c8ac022cdf695bdebb067ab34-1920x1080-png
51+
[Phase 3] [34/48] ✓ main-codingcatdev-photo/ygqhzxyhtfpfilzskglf → image-05af6c518affbc8643ef1db5e67afaa65a4a0d61-1920x1080-png
52+
[Phase 3] [35/48] ✓ main-codingcatdev-photo/ilpfshoaxdhnwemlsfld → image-0753d61f0436a4d52a78c893aaabdee468aac58c-1920x1080-png
53+
[Phase 3] [36/48] ✓ main-codingcatdev-photo/mfzmkc6ohyyuhmgfdmru → image-e43fbdceae2ad533c194fae83cbf21f99115cf2e-1920x1080-png

0 commit comments

Comments
 (0)