@@ -309,11 +309,29 @@ function findCloudinaryRefs(obj, currentPath = '') {
309309 url : resolvedUrl ,
310310 publicId : publicId ,
311311 resourceType : obj . resource_type || 'image' ,
312+ format : obj . format || null ,
312313 } ) ;
313314 }
314315 return results ; // Don't recurse into cloudinary.asset children
315316 }
316317
318+ // Check for raw Cloudinary objects (old format without _type)
319+ if ( typeof obj === 'object' && ! Array . isArray ( obj ) && obj . public_id && ( obj . secure_url || obj . url ) && ! obj . _type ) {
320+ const url = obj . secure_url || obj . url || null ;
321+ const publicId = obj . public_id ;
322+ if ( url ) {
323+ results . push ( {
324+ path : currentPath ,
325+ type : 'raw-cloudinary-object' ,
326+ url : url ,
327+ publicId : publicId ,
328+ resourceType : obj . resource_type || 'image' ,
329+ format : obj . format || null ,
330+ } ) ;
331+ }
332+ return results ; // Don't recurse into raw Cloudinary object children (derived[], etc.)
333+ }
334+
317335 if ( typeof obj === 'string' ) {
318336 if ( containsCloudinaryRef ( obj ) ) {
319337 // Skip URLs that are inside cloudinary.asset sub-fields (derived, url, secure_url)
@@ -426,6 +444,7 @@ async function phase1_discoverReferences(sanityClient) {
426444 log ( 1 , `Found ${ docsWithRefs . length } documents with Cloudinary references` ) ;
427445
428446 let cloudinaryAssetCount = 0 ;
447+ let rawCloudinaryCount = 0 ;
429448 let urlCount = 0 ;
430449 let embeddedCount = 0 ;
431450
@@ -434,12 +453,13 @@ async function phase1_discoverReferences(sanityClient) {
434453 for ( const r of d . refs ) {
435454 log ( 1 , ` ${ r . path } [${ r . type } ] → ${ r . url || r . publicId || '(no url)' } ` ) ;
436455 if ( r . type === 'cloudinary.asset' ) cloudinaryAssetCount ++ ;
456+ else if ( r . type === 'raw-cloudinary-object' ) rawCloudinaryCount ++ ;
437457 else if ( r . type === 'url' ) urlCount ++ ;
438458 else if ( r . type === 'embedded' ) embeddedCount ++ ;
439459 }
440460 }
441461
442- log ( 1 , `\n Breakdown: ${ cloudinaryAssetCount } cloudinary.asset objects, ${ urlCount } URL fields, ${ embeddedCount } embedded URLs` ) ;
462+ log ( 1 , `\n Breakdown: ${ cloudinaryAssetCount } cloudinary.asset objects, ${ rawCloudinaryCount } raw Cloudinary objects, ${ urlCount } URL fields, ${ embeddedCount } embedded URLs` ) ;
443463
444464 // Save to disk for resume
445465 if ( ! DRY_RUN ) {
@@ -450,6 +470,26 @@ async function phase1_discoverReferences(sanityClient) {
450470 return docsWithRefs ;
451471}
452472
473+ // ─── Utility: strip Cloudinary transformations from URL ──────────────────────
474+ function stripTransformations ( url ) {
475+ // Cloudinary URL format: .../upload/[transformations/]v{version}/{public_id}.{ext}
476+ // Strip everything between /upload/ and /v{version}/
477+ return url . replace (
478+ / ( \/ u p l o a d \/ ) ( (?: [ a - z _ ] [ a - z 0 - 9 _ , : ] + (?: \/ | $ ) ) * ) ( v \d + \/ ) / i,
479+ '$1$3'
480+ ) ;
481+ }
482+
483+ // ─── Utility: get canonical original URL for a Cloudinary reference ──────────
484+ function getOriginalUrl ( ref ) {
485+ if ( ref . publicId && ref . resourceType ) {
486+ const ext = ref . format || ( ref . resourceType === 'video' ? 'mp4' : 'png' ) ;
487+ return `https://media.codingcat.dev/${ ref . resourceType } /upload/${ ref . publicId } .${ ext } ` ;
488+ }
489+ // Fallback: strip transformations from the URL
490+ return stripTransformations ( ref . url ) ;
491+ }
492+
453493// ═══════════════════════════════════════════════════════════════════════════════
454494// PHASE 2: Extract Unique Cloudinary URLs
455495// ═══════════════════════════════════════════════════════════════════════════════
@@ -469,20 +509,22 @@ async function phase2_extractUniqueUrls(docsWithRefs) {
469509
470510 for ( const doc of docsWithRefs ) {
471511 for ( const ref of doc . refs ) {
472- const url = ref . url ;
473- if ( ! url ) continue ;
512+ if ( ! ref . url ) continue ;
474513
475- if ( urlMap . has ( url ) ) {
514+ // Get the canonical original URL (strips transformations, uses CNAME)
515+ const originalUrl = getOriginalUrl ( ref ) ;
516+
517+ if ( urlMap . has ( originalUrl ) ) {
476518 // Add this doc as another source
477- const entry = urlMap . get ( url ) ;
519+ const entry = urlMap . get ( originalUrl ) ;
478520 if ( ! entry . sourceDocIds . includes ( doc . _id ) ) {
479521 entry . sourceDocIds . push ( doc . _id ) ;
480522 }
481523 } else {
482- urlMap . set ( url , {
483- cloudinaryUrl : url ,
484- cloudinaryPublicId : ref . publicId || extractPublicIdFromUrl ( url ) ,
485- resourceType : ref . resourceType || guessResourceType ( url ) ,
524+ urlMap . set ( originalUrl , {
525+ cloudinaryUrl : originalUrl ,
526+ cloudinaryPublicId : ref . publicId || extractPublicIdFromUrl ( originalUrl ) ,
527+ resourceType : ref . resourceType || guessResourceType ( originalUrl ) ,
486528 sourceDocIds : [ doc . _id ] ,
487529 } ) ;
488530 }
@@ -634,11 +676,24 @@ async function phase3_downloadAndUpload(uniqueUrls) {
634676/**
635677 * Given a Cloudinary URL, find the matching Sanity asset in the mapping.
636678 */
637- function findMappingForUrl ( url , mapping ) {
679+ function findMappingForUrl ( url , mapping , refPublicId ) {
638680 // Try exact URL match first
639681 let entry = mapping . find ( ( m ) => m . cloudinaryUrl === url ) ;
640682 if ( entry ) return entry ;
641683
684+ // Try matching by the ref's own publicId (from the Cloudinary object)
685+ if ( refPublicId ) {
686+ entry = mapping . find ( ( m ) => m . cloudinaryPublicId === refPublicId ) ;
687+ if ( entry ) return entry ;
688+ }
689+
690+ // Try matching by stripped/canonical URL
691+ const strippedUrl = stripTransformations ( url ) ;
692+ if ( strippedUrl !== url ) {
693+ entry = mapping . find ( ( m ) => m . cloudinaryUrl === strippedUrl ) ;
694+ if ( entry ) return entry ;
695+ }
696+
642697 // Try matching by public_id extracted from the URL
643698 const publicId = extractPublicIdFromUrl ( url ) ;
644699 if ( publicId ) {
@@ -694,7 +749,7 @@ async function phase4_updateReferences(sanityClient, docsWithRefs, mapping) {
694749 continue ;
695750 }
696751
697- const mappingEntry = findMappingForUrl ( refUrl , mapping ) ;
752+ const mappingEntry = findMappingForUrl ( refUrl , mapping , ref . publicId ) ;
698753
699754 if ( ! mappingEntry ) {
700755 log ( 4 , ` ⚠ No mapping found for URL: ${ refUrl } (in ${ docId } at ${ fieldPath } )` ) ;
@@ -705,8 +760,8 @@ async function phase4_updateReferences(sanityClient, docsWithRefs, mapping) {
705760 const sanityId = mappingEntry . sanityAssetId ;
706761 const cdnUrl = mappingEntry . sanityUrl || sanityAssetUrl ( sanityId ) ;
707762
708- if ( refType === 'cloudinary.asset' ) {
709- // ── Replace entire cloudinary.asset object with Sanity image/file reference ──
763+ if ( refType === 'cloudinary.asset' || refType === 'raw-cloudinary-object' ) {
764+ // ── Replace entire cloudinary.asset or raw Cloudinary object with Sanity image/file reference ──
710765 const isImage = ( ref . resourceType || 'image' ) === 'image' ;
711766 const refObj = isImage
712767 ? {
@@ -830,6 +885,10 @@ async function phase5_report(docsWithRefs, uniqueUrls, mapping, changes) {
830885 ( sum , d ) => sum + d . refs . filter ( ( r ) => r . type === 'cloudinary.asset' ) . length ,
831886 0
832887 ) ;
888+ const rawCloudinaryRefs = docsWithRefs . reduce (
889+ ( sum , d ) => sum + d . refs . filter ( ( r ) => r . type === 'raw-cloudinary-object' ) . length ,
890+ 0
891+ ) ;
833892 const urlRefs = docsWithRefs . reduce (
834893 ( sum , d ) => sum + d . refs . filter ( ( r ) => r . type === 'url' ) . length ,
835894 0
@@ -846,6 +905,7 @@ async function phase5_report(docsWithRefs, uniqueUrls, mapping, changes) {
846905 totalDocumentsWithRefs : docsWithRefs . length ,
847906 totalReferencesFound : totalRefs ,
848907 cloudinaryAssetObjects : cloudinaryAssetRefs ,
908+ rawCloudinaryObjects : rawCloudinaryRefs ,
849909 urlStringRefs : urlRefs ,
850910 embeddedUrlRefs : embeddedRefs ,
851911 uniqueCloudinaryUrls : uniqueUrls . length ,
@@ -864,6 +924,7 @@ async function phase5_report(docsWithRefs, uniqueUrls, mapping, changes) {
864924 console . log ( ` Documents with refs: ${ report . summary . totalDocumentsWithRefs } ` ) ;
865925 console . log ( ` Total references found: ${ report . summary . totalReferencesFound } ` ) ;
866926 console . log ( ` cloudinary.asset objects: ${ report . summary . cloudinaryAssetObjects } ` ) ;
927+ console . log ( ` raw Cloudinary objects: ${ report . summary . rawCloudinaryObjects } ` ) ;
867928 console . log ( ` URL string fields: ${ report . summary . urlStringRefs } ` ) ;
868929 console . log ( ` Embedded URLs in text: ${ report . summary . embeddedUrlRefs } ` ) ;
869930 console . log ( ` Unique Cloudinary URLs: ${ report . summary . uniqueCloudinaryUrls } ` ) ;
0 commit comments