@@ -11,8 +11,25 @@ import { DEFAULT_IMAGE_URL } from '../src/constants.js';
1111const prisma = new PrismaClient ( ) ;
1212
1313async function getDiningData ( ) : Promise < RawScrapedData > {
14- const diningData = await fetch ( process . env . CORNELL_DINING_API_URL as string ) ;
15- const data = await diningData . json ( ) as RawScrapedData ;
14+ const apiUrl = process . env . CORNELL_DINING_API_URL ;
15+ if ( ! apiUrl ) {
16+ throw new Error ( 'CORNELL_DINING_API_URL environment variable is not set' ) ;
17+ }
18+
19+ const response = await fetch ( apiUrl , {
20+ signal : AbortSignal . timeout ( 30000 ) , // 30 second timeout
21+ } ) ;
22+
23+ if ( ! response . ok ) {
24+ throw new Error ( `Failed to fetch dining data: HTTP ${ response . status } ${ response . statusText } ` ) ;
25+ }
26+
27+ const data = await response . json ( ) as RawScrapedData ;
28+
29+ if ( ! data ?. data ?. eateries ) {
30+ throw new Error ( 'Invalid response format from Cornell Dining API' ) ;
31+ }
32+
1633 return data ;
1734}
1835
@@ -25,7 +42,7 @@ function loadStaticEateries(): RawStaticEatery[] {
2542 const eateries = Array . isArray ( data ) ? data : ( data . eateries || [ ] ) ;
2643 return eateries ;
2744 } catch ( error ) {
28- if ( ( error as NodeJS . ErrnoException ) . code === 'ENOENT' ) {
45+ if ( ( error as { code ?: string } ) . code === 'ENOENT' ) {
2946 console . log ( 'No static eateries file found, skipping...' ) ;
3047 return [ ] ;
3148 }
@@ -252,44 +269,41 @@ function transformEatery(rawEatery: RawEatery) {
252269async function transformEateriesConcurrently (
253270 rawEateries : RawEatery [ ] ,
254271 concurrency : number = 5
255- ) : Promise < Array < { index : number ; result : ReturnType < typeof transformEatery > } > > {
256- const results : Array < { index : number ; result : ReturnType < typeof transformEatery > } > = [ ] ;
257- const errors : Array < { index : number ; eatery : RawEatery ; error : unknown } > = [ ] ;
258-
259- const queue : Array < { index : number ; eatery : RawEatery } > = rawEateries . map ( ( eatery , index ) => ( {
260- index,
261- eatery,
262- } ) ) ;
263-
264- async function worker ( workerId : number ) : Promise < void > {
265- while ( true ) {
266- await new Promise ( ( resolve ) => setImmediate ( resolve ) ) ;
267-
268- const item = queue . shift ( ) ;
269- if ( ! item ) break ;
270-
271- const { index, eatery : rawEatery } = item ;
272-
273- try {
274- const transformed = transformEatery ( rawEatery ) ;
275- results . push ( { index, result : transformed } ) ;
276- console . log ( ` [Worker ${ workerId } ] ✓ Transformed ${ rawEatery . name } (${ transformed . events . length } events)` ) ;
277- } catch ( error ) {
278- errors . push ( { index, eatery : rawEatery , error } ) ;
279- console . error ( ` [Worker ${ workerId } ] ✗ Error transforming ${ rawEatery . name } :` , error ) ;
272+ ) : Promise < ReturnType < typeof transformEatery > [ ] > {
273+ const results : ReturnType < typeof transformEatery > [ ] = [ ] ;
274+ const errors : Array < { eatery : RawEatery ; error : unknown } > = [ ] ;
275+
276+ // Process in batches for better performance
277+ for ( let i = 0 ; i < rawEateries . length ; i += concurrency ) {
278+ const batch = rawEateries . slice ( i , i + concurrency ) ;
279+ const batchResults = await Promise . allSettled (
280+ batch . map ( async ( rawEatery ) => {
281+ try {
282+ const transformed = transformEatery ( rawEatery ) ;
283+ console . log ( ` ✓ Transformed ${ rawEatery . name } (${ transformed . events . length } events)` ) ;
284+ return transformed ;
285+ } catch ( error ) {
286+ console . error ( ` ✗ Error transforming ${ rawEatery . name } :` , error ) ;
287+ throw error ;
288+ }
289+ } )
290+ ) ;
291+
292+ for ( let j = 0 ; j < batchResults . length ; j ++ ) {
293+ const result = batchResults [ j ] ;
294+ if ( result . status === 'fulfilled' ) {
295+ results . push ( result . value ) ;
296+ } else {
297+ errors . push ( { eatery : batch [ j ] , error : result . reason } ) ;
280298 }
281299 }
282300 }
283301
284- const workers = Array . from ( { length : concurrency } , ( _ , i ) => worker ( i + 1 ) ) ;
285- await Promise . all ( workers ) ;
286-
287302 if ( errors . length > 0 ) {
288- const errorMessages = errors . map ( ( e ) => `Eatery "${ e . eatery . name } " (index ${ e . index } ) : ${ e . error } ` ) . join ( '\n' ) ;
303+ const errorMessages = errors . map ( ( e ) => `Eatery "${ e . eatery . name } ": ${ e . error } ` ) . join ( '\n' ) ;
289304 throw new Error ( `Failed to transform ${ errors . length } eatery(ies):\n${ errorMessages } ` ) ;
290305 }
291306
292- results . sort ( ( a , b ) => a . index - b . index ) ;
293307 return results ;
294308}
295309
@@ -332,33 +346,45 @@ async function processAllEateries(
332346 } >
333347) {
334348 return await prisma . $transaction ( async ( tx ) => {
349+ // Clear existing data
335350 await tx . event . deleteMany ( { } ) ;
336351 await tx . eatery . deleteMany ( { } ) ;
337352
338- for ( const { eatery, events } of transformedEateries ) {
339- await tx . eatery . create ( {
340- data : {
341- ...eatery ,
342- events : {
343- create : events . map ( ( rawEvent ) => ( {
344- type : mapEventType ( rawEvent . type ) ,
345- startTimestamp : rawEvent . startTimestamp ,
346- endTimestamp : rawEvent . endTimestamp ,
347- menu : {
348- create : rawEvent . menu . map ( ( rawCategory ) => ( {
349- name : rawCategory . category ,
350- items : {
351- create : rawCategory . items . map ( ( rawItem ) => ( {
352- name : rawItem . item ,
353+ // Process eateries in smaller batches within the transaction
354+ const BATCH_SIZE = 10 ;
355+ for ( let i = 0 ; i < transformedEateries . length ; i += BATCH_SIZE ) {
356+ const batch = transformedEateries . slice ( i , i + BATCH_SIZE ) ;
357+
358+ await Promise . all (
359+ batch . map ( ( { eatery, events } ) =>
360+ tx . eatery . create ( {
361+ data : {
362+ ...eatery ,
363+ events : {
364+ create : events . map ( ( rawEvent ) => ( {
365+ type : mapEventType ( rawEvent . type ) ,
366+ startTimestamp : rawEvent . startTimestamp ,
367+ endTimestamp : rawEvent . endTimestamp ,
368+ menu : {
369+ create : rawEvent . menu . map ( ( rawCategory ) => ( {
370+ name : rawCategory . category ,
371+ items : {
372+ create : rawCategory . items . map ( ( rawItem ) => ( {
373+ name : rawItem . item ,
374+ } ) ) ,
375+ } ,
353376 } ) ) ,
354377 } ,
355378 } ) ) ,
356379 } ,
357- } ) ) ,
358- } ,
359- } ,
360- } ) ;
380+ } ,
381+ } )
382+ )
383+ ) ;
361384 }
385+ } , {
386+ maxWait : 20000 , // Wait up to 20 seconds to start the transaction
387+ timeout : 60000 , // Allow the transaction to run for up to 60 seconds
362388 } ) ;
363389}
364390
@@ -383,6 +409,50 @@ async function getAllEateriesData() {
383409 } ) ;
384410}
385411
412+ async function updateServerCache ( ) : Promise < void > {
413+ const serverUrl = process . env . SERVER_URL ;
414+ const cacheRefreshHeader = process . env . CACHE_REFRESH_HEADER ;
415+ const cacheRefreshSecret = process . env . CACHE_REFRESH_SECRET ;
416+
417+ if ( ! serverUrl || ! cacheRefreshHeader || ! cacheRefreshSecret ) {
418+ console . log ( '⚠️ Server cache update skipped: Missing SERVER_URL, CACHE_REFRESH_HEADER, or CACHE_REFRESH_SECRET' ) ;
419+ return ;
420+ }
421+
422+ try {
423+ console . log ( 'Fetching all eatery data for server update...' ) ;
424+ const startFetchTime = Date . now ( ) ;
425+ const allEateryData = await getAllEateriesData ( ) ;
426+ const fetchDuration = ( ( Date . now ( ) - startFetchTime ) / 1000 ) . toFixed ( 2 ) ;
427+ console . log ( `Fetched ${ allEateryData . length } eateries in ${ fetchDuration } s` ) ;
428+
429+ console . log ( 'Updating server cache with new eatery data...' ) ;
430+ const startUpdateTime = Date . now ( ) ;
431+ const response = await fetch ( `${ serverUrl } /internal/cache/` , {
432+ method : 'POST' ,
433+ headers : {
434+ 'Content-Type' : 'application/json' ,
435+ [ cacheRefreshHeader ] : cacheRefreshSecret ,
436+ } ,
437+ body : JSON . stringify ( { eateries : allEateryData } ) ,
438+ signal : AbortSignal . timeout ( 30000 ) , // 30 second timeout
439+ } ) ;
440+
441+ if ( ! response . ok ) {
442+ const errorText = await response . text ( ) . catch ( ( ) => 'Unable to read error response' ) ;
443+ throw new Error (
444+ `Server responded with status ${ response . status } : ${ errorText } ` ,
445+ ) ;
446+ }
447+
448+ const updateDuration = ( ( Date . now ( ) - startUpdateTime ) / 1000 ) . toFixed ( 2 ) ;
449+ console . log ( `✓ Server cache updated successfully in ${ updateDuration } s` ) ;
450+ } catch ( error ) {
451+ console . error ( '✗ Failed to update server cache with new eatery data:' , error ) ;
452+ // Don't throw - this is not critical enough to fail the entire scraper
453+ }
454+ }
455+
386456export async function main ( ) {
387457 const startTime = Date . now ( ) ;
388458 console . log ( 'Starting scraper at' , new Date ( startTime ) . toString ( ) , '\n' ) ;
@@ -440,9 +510,9 @@ export async function main() {
440510 console . log ( `Found ${ diningData . data . eateries . length } eateries from API (${ apiFetchDuration } s)` ) ;
441511
442512 const transformStartTime = Date . now ( ) ;
443- console . log ( `Transforming API eatery data with ${ process . env . WORKERS } concurrent workers...` ) ;
444- const transformResults = await transformEateriesConcurrently ( diningData . data . eateries , parseInt ( process . env . WORKERS || '4' , 10 ) ) ;
445- const transformedApiEateries = transformResults . map ( ( r ) => r . result ) ;
513+ const workerCount = parseInt ( process . env . WORKERS || '4' , 10 ) ;
514+ console . log ( `Transforming API eatery data with ${ workerCount } concurrent workers...` ) ;
515+ const transformedApiEateries = await transformEateriesConcurrently ( diningData . data . eateries , workerCount ) ;
446516 const transformDuration = ( ( Date . now ( ) - transformStartTime ) / 1000 ) . toFixed ( 2 ) ;
447517 console . log ( `✓ Successfully transformed ${ transformedApiEateries . length } API eateries (${ transformDuration } s)\n` ) ;
448518
@@ -490,46 +560,8 @@ export async function main() {
490560 }
491561
492562 // Send newly populated data to server
493- console . log ( '[Scheduler] Scraper run finished\n' ) ;
494- try {
495- console . log ( '[Scheduler] Fetching all eatery data for server update...' ) ;
496- const startFetchTime = Date . now ( ) ;
497- const allEateryData = await getAllEateriesData ( ) ;
498- const fetchDuration = ( ( Date . now ( ) - startFetchTime ) / 1000 ) . toFixed ( 2 ) ;
499- console . log (
500- `[Scheduler] Fetched ${ allEateryData . length } eateries in ${ fetchDuration } s` ,
501- ) ;
502-
503- console . log ( '[Scheduler] Updating server cache with new eatery data...' ) ;
504- const startUpdateTime = Date . now ( ) ;
505- const response = await fetch (
506- `${ process . env . SERVER_URL } /internal/cache/` ,
507- {
508- method : 'POST' ,
509- headers : {
510- 'Content-Type' : 'application/json' ,
511- [ process . env . CACHE_REFRESH_HEADER ! ] :
512- process . env . CACHE_REFRESH_SECRET ! ,
513- } ,
514- body : JSON . stringify ( { eateries : allEateryData } ) ,
515- } ,
516- ) ;
517- if ( ! response . ok ) {
518- throw new Error (
519- `Server responded with status ${ response . status } during cache update` ,
520- ) ;
521- }
522- const updateDuration = ( ( Date . now ( ) - startUpdateTime ) / 1000 ) . toFixed ( 2 ) ;
523- console . log (
524- `[Scheduler] Server cache updated successfully in ${ updateDuration } s` ,
525- ) ;
526- }
527- catch ( error ) {
528- console . error (
529- '[Scheduler] Failed to update server cache with new eatery data:' ,
530- error ,
531- ) ;
532- }
563+ console . log ( '\nScraper run finished\n' ) ;
564+ await updateServerCache ( ) ;
533565
534566 const totalDuration = ( ( Date . now ( ) - startTime ) / 1000 ) . toFixed ( 2 ) ;
535567 console . log ( `\n✅ Dining data scraped successfully in ${ totalDuration } s` ) ;
@@ -584,8 +616,19 @@ function startScraperScheduler() {
584616}
585617
586618if ( process . env . SCHEDULED_MODE === 'true' ) {
619+ console . log ( '[Scheduler] Running initial scraper on startup...' ) ;
620+
621+ // Run scraper immediately on startup
622+ runScraperSafely ( ) . then ( ( ) => {
623+ console . log ( '[Scheduler] Initial scraper run completed' ) ;
624+ } ) . catch ( ( error ) => {
625+ console . error ( '[Scheduler] Initial scraper run failed:' , error ) ;
626+ } ) ;
627+
628+ // Start the scheduler for future runs
587629 startScraperScheduler ( ) ;
588630 console . log ( '[Scheduler] Scraper scheduler is running. Press Ctrl+C to stop.' ) ;
631+
589632 const gracefulShutdown = async ( ) => {
590633 console . log ( '[Scheduler] Shutting down gracefully...' ) ;
591634 await prisma . $disconnect ( ) ;
0 commit comments