@@ -729,74 +729,114 @@ function chunkMediaSheet(mediaData, chunkSize) {
729729}
730730
731731/**
732- * Worker-safe version of saveIndexChunks from admin-api.js
732+ * Determine optimal chunk size based on total entry count
733733 *
734- * @param {string } basePath - Base path for chunks (e.g., /org/repo/.da/media-insights)
735- * @param {Array } mediaData - Media sheet data
736- * @param {Array } usageData - Usage sheet data
737- * @param {number } chunkSize - Entries per chunk
738- * @param {string } daOrigin - DA origin (e.g., https://admin.da.live)
734+ * Rationale:
735+ * - Small sites (<10k entries): Single file (100k chunk size ensures no chunking)
736+ * - No overhead from loading multiple chunks
737+ * - Simpler debugging and inspection
738+ *
739+ * - Medium sites (10k-200k): 8k entries per chunk (~4-5MB files)
740+ * - Prevents CF Worker 128MB memory limit errors during PUT
741+ * - Balances file size vs chunk count overhead
742+ * - Progressive loading: chunk 0 loads quickly for default Images view
743+ *
744+ * - Large sites (>200k entries): 6k entries per chunk (~3-4MB files)
745+ * - Smaller files for better reliability on massive indexes
746+ * - More chunks acceptable given already high chunk count
747+ * - Further reduces memory pressure on uploads
748+ *
749+ * Chunk size targets file sizes ≤5MB to avoid DA Admin/S3 timeouts
750+ * Average media entry size: ~550 bytes (URL + metadata + doc field)
751+ *
752+ * @param {number } totalEntries - Total number of media entries in index
753+ * @returns {number } Optimal chunk size (entries per chunk)
754+ */
755+ export function getAdaptiveChunkSize ( totalEntries ) {
756+ if ( totalEntries < 10_000 ) {
757+ return 100_000 ;
758+ }
759+
760+ if ( totalEntries < 200_000 ) {
761+ return 8_000 ;
762+ }
763+
764+ return 6_000 ;
765+ }
766+
767+ /**
768+ * Save index as chunks with batched uploads to prevent rate limiting
769+ * Uploads 3 chunks concurrently with 500ms delays between batches
770+ *
771+ * @param {string } basePath - Base path without filename (e.g., '/site/.da/media-insights')
772+ * @param {Array } mediaData - Media sheet data (must be pre-sorted)
773+ * @param {number } chunkSize - Entries per chunk (from getAdaptiveChunkSize)
774+ * @param {string } daOrigin - DA origin (e.g., 'https://admin.da.live')
739775 * @param {string } imsToken - IMS access token
740776 * @param {string } indexFilesChunkPrefix - Chunk filename prefix (e.g., 'index-')
741- * @returns {Promise<number> } Number of chunks saved
777+ * @returns {Promise<number> } Number of chunks created
742778 */
743779export async function saveIndexChunks (
744780 basePath ,
745781 mediaData ,
746- usageData ,
747782 chunkSize ,
748783 daOrigin ,
749784 imsToken ,
750785 indexFilesChunkPrefix ,
751786) {
752787 const mediaChunks = chunkMediaSheet ( mediaData , chunkSize ) ;
753-
754- // Always save at least chunk 0, even if empty (for consistency)
755788 const chunksToSave = mediaChunks . length > 0 ? mediaChunks : [ [ ] ] ;
756- const savePromises = [ ] ;
757789
758- for ( let i = 0 ; i < chunksToSave . length ; i += 1 ) {
759- const chunkFileName = getChunkFileName ( i , indexFilesChunkPrefix ) ;
760- const chunkPath = `${ basePath } /${ chunkFileName } ` ;
761-
762- // Only include usage sheet in first chunk to avoid duplication
763- const sheets = {
764- media : chunksToSave [ i ] ,
765- usage : i === 0 ? usageData : [ ] ,
766- } ;
767-
768- const formData = await createMultiSheet ( sheets ) ;
769- const savePromise = workerDaFetch ( `${ daOrigin } /source${ chunkPath } ` , imsToken , {
770- method : 'PUT' ,
771- body : formData ,
790+ // Rate limiting to prevent DA Admin endpoint overload:
791+ // - batchSize=3: Limit concurrent uploads (prevents 503 errors)
792+ // - delayMs=500: 500ms delay between batches (~20 req/sec rate limit)
793+ // - Prevents CF Worker 128MB memory errors from large concurrent PUTs
794+ const batchSize = 3 ;
795+ const delayMs = 500 ;
796+
797+ for ( let i = 0 ; i < chunksToSave . length ; i += batchSize ) {
798+ const batch = chunksToSave . slice ( i , i + batchSize ) ;
799+ const batchPromises = batch . map ( async ( chunk , idx ) => {
800+ const chunkNum = i + idx ;
801+ const chunkFileName = getChunkFileName ( chunkNum , indexFilesChunkPrefix ) ;
802+ const chunkPath = `${ basePath } /${ chunkFileName } ` ;
803+ const sheets = { media : chunk } ;
804+
805+ const formData = await createMultiSheet ( sheets ) ;
806+ return workerDaFetch ( `${ daOrigin } /source${ chunkPath } ` , imsToken , {
807+ method : 'PUT' ,
808+ body : formData ,
809+ } ) ;
772810 } ) ;
773811
774- savePromises . push ( savePromise ) ;
775- }
776-
777- const responses = await Promise . all ( savePromises ) ;
812+ const responses = await Promise . all ( batchPromises ) ;
778813
779- // Validate all chunks saved successfully
780- const failedChunks = [ ] ;
781- responses . forEach ( ( resp , i ) => {
782- if ( ! resp . ok ) {
783- failedChunks . push ( i ) ;
814+ const failed = [ ] ;
815+ responses . forEach ( ( r , idx ) => {
816+ if ( ! r . ok ) {
817+ failed . push ( { chunkNum : i + idx , status : r . status } ) ;
818+ }
819+ } ) ;
820+ if ( failed . length > 0 ) {
821+ const chunkNums = failed . map ( ( f ) => `${ f . chunkNum } (${ f . status } )` ) . join ( ', ' ) ;
822+ throw new Error ( `Failed to save ${ failed . length } chunk(s): ${ chunkNums } ` ) ;
784823 }
785- } ) ;
786824
787- if ( failedChunks . length > 0 ) {
788- throw new Error ( `Failed to save chunks: ${ failedChunks . join ( ', ' ) } ` ) ;
825+ if ( i + batchSize < chunksToSave . length ) {
826+ await new Promise ( ( resolve ) => { setTimeout ( resolve , delayMs ) ; } ) ;
827+ }
789828 }
790829
791830 return chunksToSave . length ;
792831}
793832
794833/**
795- * Worker-safe version of saveIndexMeta from admin-api.js
834+ * Save index metadata to DA storage
835+ * Must be called AFTER saveIndexChunks to ensure chunkCount is accurate
796836 *
797- * @param {object } meta - Metadata object
798- * @param {string } path - Full path to meta file
799- * @param {string } daOrigin - DA origin (e.g., https://admin.da.live)
837+ * @param {object } meta - Metadata object containing indexType, timestamp, chunkCount, etc.
838+ * @param {string } path - Full path to meta file (e.g., '/site/.da/media-insights/index-meta.json')
839+ * @param {string } daOrigin - DA origin (e.g., ' https://admin.da.live' )
800840 * @param {string } imsToken - IMS access token
801841 * @returns {Promise<Response> }
802842 */
0 commit comments