@@ -2,6 +2,7 @@ import fetch from "node-fetch";
22import fs from "fs" ;
33import { parse } from "node-html-parser" ;
44import { setTimeout } from "timers/promises" ;
5+ import { fileURLToPath } from "url" ;
56
67// Type for a single sighting entry
78export type Sighting = {
@@ -30,9 +31,10 @@ const ENDPOINT =
3031 "https://nuforc.org/wp-admin/admin-ajax.php?action=get_wdtable&table_id=1&wdt_var1=Post&wdt_var2=-1"
3132const WDT_NONCE = process . env . WDT_NONCE ?? ""
3233// Delay between requests in milliseconds to avoid rate limiting
33- const REQUEST_DELAY = process . env . REQUEST_DELAY ? parseInt ( process . env . REQUEST_DELAY ) : 1000
34- // Maximum number of records to scrape (optional, for testing)
35- const MAX_RECORDS = process . env . MAX_RECORDS ? parseInt ( process . env . MAX_RECORDS ) : undefined
34+ const parsedDelay = Number ( process . env . REQUEST_DELAY )
35+ const REQUEST_DELAY = Number . isFinite ( parsedDelay ) && parsedDelay > 0 ? parsedDelay : 1000
36+ const parsedMaxRecords = Number ( process . env . MAX_RECORDS )
37+ const MAX_RECORDS = Number . isFinite ( parsedMaxRecords ) && parsedMaxRecords > 0 ? parsedMaxRecords : undefined
3638
3739const PAGE_SIZE = 100
3840
@@ -204,15 +206,15 @@ export function parseArgs(argv: string[]): { maxRecords?: number; force: boolean
204206
205207 if ( arg . startsWith ( '--max-records=' ) ) {
206208 const value = arg . split ( '=' ) [ 1 ] ;
207- maxRecords = parseInt ( value ) ;
208- if ( isNaN ( maxRecords ) ) {
209- console . error ( 'Invalid value for --max-records. Must be a number.' ) ;
209+ maxRecords = Number ( value ) ;
210+ if ( ! Number . isFinite ( maxRecords ) || maxRecords <= 0 ) {
211+ console . error ( 'Invalid value for --max-records. Must be a positive number.' ) ;
210212 process . exit ( 1 ) ;
211213 }
212214 } else if ( arg === '--max-records' && i + 1 < argv . length ) {
213- maxRecords = parseInt ( argv [ i + 1 ] ) ;
214- if ( isNaN ( maxRecords ) ) {
215- console . error ( 'Invalid value for --max-records. Must be a number.' ) ;
215+ maxRecords = Number ( argv [ i + 1 ] ) ;
216+ if ( ! Number . isFinite ( maxRecords ) || maxRecords <= 0 ) {
217+ console . error ( 'Invalid value for --max-records. Must be a positive number.' ) ;
216218 process . exit ( 1 ) ;
217219 }
218220 i ++ ;
@@ -251,7 +253,7 @@ async function main() {
251253 const maxRetries = 3
252254
253255 console . log ( "Starting NUFORC scraping..." )
254- console . log ( `Using WDT_NONCE: ${ WDT_NONCE . slice ( 0 , 3 ) } ...` )
256+ console . log ( " WDT_NONCE: set" )
255257 if ( maxRecords ) {
256258 console . log ( `Will scrape a maximum of ${ maxRecords } records (for testing)` )
257259 }
@@ -278,12 +280,12 @@ async function main() {
278280 } else if ( remainingSlots < sightings . length ) {
279281 // Only add up to the limit
280282 console . log ( `Adding ${ remainingSlots } more records to reach limit of ${ maxRecords } ` )
281- allSightings = allSightings . concat ( sightings . slice ( 0 , remainingSlots ) )
283+ allSightings . push ( ... sightings . slice ( 0 , remainingSlots ) )
282284 console . log ( `Reached maximum of ${ maxRecords } records, stopping scrape` )
283285 moreData = false ;
284286 } else {
285287 // Add all sightings from this page
286- allSightings = allSightings . concat ( sightings )
288+ allSightings . push ( ... sightings )
287289 console . log ( `Now have ${ allSightings . length } /${ maxRecords } records` )
288290 if ( allSightings . length === maxRecords ) {
289291 console . log ( `Exactly reached maximum of ${ maxRecords } records, stopping scrape` )
@@ -292,7 +294,7 @@ async function main() {
292294 }
293295 } else {
294296 // No max limit, add all sightings
295- allSightings = allSightings . concat ( sightings )
297+ allSightings . push ( ... sightings )
296298 }
297299
298300 // Only increment if we're continuing
@@ -351,14 +353,20 @@ async function main() {
351353 console . error ( "Error writing to file:" , error )
352354 const backupFilename = `nuforc-results-backup-${ Date . now ( ) } .json`
353355 console . log ( `Attempting to write to backup file: ${ backupFilename } ` )
354- fs . writeFileSync ( backupFilename , jsonContent )
355- console . log ( `Successfully wrote data to backup file: ${ backupFilename } ` )
356+ try {
357+ fs . writeFileSync ( backupFilename , jsonContent )
358+ console . log ( `Successfully wrote data to backup file: ${ backupFilename } ` )
359+ } catch ( backupError ) {
360+ console . error ( "Backup write also failed:" , backupError )
361+ console . error ( `${ allSightings . length } records were scraped but could not be saved.` )
362+ }
356363 }
357364
358365 console . log ( "Done!" )
359366}
360367
361- const isDirectRun = process . argv [ 1 ] ?. includes ( "nuforc_scraper" )
368+ const __filename = fileURLToPath ( import . meta. url )
369+ const isDirectRun = process . argv [ 1 ] === __filename
362370if ( isDirectRun ) {
363371 main ( ) . catch ( ( err ) => {
364372 console . error ( err )
0 commit comments