Skip to content

Commit bbb054b

Browse files
Fix critical and major review issues: nonce logging, parseInt guards, O(n^2) concat, error handling
1 parent 967e91e commit bbb054b

2 files changed

Lines changed: 25 additions & 17 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ npm install
2626

2727
The scraper can be configured using environment variables:
2828

29-
- `WDT_NONCE`: The NUFORC website's nonce value (defaults to a development value)
29+
- `WDT_NONCE`: **Required.** The NUFORC website's nonce value. Visit nuforc.org/subndx/?id=all and inspect the page source for `wdtNonceFrontendServerSide` to obtain it
3030
- `REQUEST_DELAY`: Delay between requests in milliseconds (defaults to 1000ms)
3131
- `MAX_RECORDS`: Maximum number of records to scrape (optional, for testing)
3232

nuforc_scraper.ts

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import fetch from "node-fetch";
22
import fs from "fs";
33
import { parse } from "node-html-parser";
44
import { setTimeout } from "timers/promises";
5+
import { fileURLToPath } from "url";
56

67
// Type for a single sighting entry
78
export type Sighting = {
@@ -30,9 +31,10 @@ const ENDPOINT =
3031
"https://nuforc.org/wp-admin/admin-ajax.php?action=get_wdtable&table_id=1&wdt_var1=Post&wdt_var2=-1"
3132
const WDT_NONCE = process.env.WDT_NONCE ?? ""
3233
// Delay between requests in milliseconds to avoid rate limiting
33-
const REQUEST_DELAY = process.env.REQUEST_DELAY ? parseInt(process.env.REQUEST_DELAY) : 1000
34-
// Maximum number of records to scrape (optional, for testing)
35-
const MAX_RECORDS = process.env.MAX_RECORDS ? parseInt(process.env.MAX_RECORDS) : undefined
34+
const parsedDelay = Number(process.env.REQUEST_DELAY)
35+
const REQUEST_DELAY = Number.isFinite(parsedDelay) && parsedDelay > 0 ? parsedDelay : 1000
36+
const parsedMaxRecords = Number(process.env.MAX_RECORDS)
37+
const MAX_RECORDS = Number.isFinite(parsedMaxRecords) && parsedMaxRecords > 0 ? parsedMaxRecords : undefined
3638

3739
const PAGE_SIZE = 100
3840

@@ -204,15 +206,15 @@ export function parseArgs(argv: string[]): { maxRecords?: number; force: boolean
204206

205207
if (arg.startsWith('--max-records=')) {
206208
const value = arg.split('=')[1];
207-
maxRecords = parseInt(value);
208-
if (isNaN(maxRecords)) {
209-
console.error('Invalid value for --max-records. Must be a number.');
209+
maxRecords = Number(value);
210+
if (!Number.isFinite(maxRecords) || maxRecords <= 0) {
211+
console.error('Invalid value for --max-records. Must be a positive number.');
210212
process.exit(1);
211213
}
212214
} else if (arg === '--max-records' && i + 1 < argv.length) {
213-
maxRecords = parseInt(argv[i + 1]);
214-
if (isNaN(maxRecords)) {
215-
console.error('Invalid value for --max-records. Must be a number.');
215+
maxRecords = Number(argv[i + 1]);
216+
if (!Number.isFinite(maxRecords) || maxRecords <= 0) {
217+
console.error('Invalid value for --max-records. Must be a positive number.');
216218
process.exit(1);
217219
}
218220
i++;
@@ -251,7 +253,7 @@ async function main() {
251253
const maxRetries = 3
252254

253255
console.log("Starting NUFORC scraping...")
254-
console.log(`Using WDT_NONCE: ${WDT_NONCE.slice(0, 3)}...`)
256+
console.log("WDT_NONCE: set")
255257
if (maxRecords) {
256258
console.log(`Will scrape a maximum of ${maxRecords} records (for testing)`)
257259
}
@@ -278,12 +280,12 @@ async function main() {
278280
} else if (remainingSlots < sightings.length) {
279281
// Only add up to the limit
280282
console.log(`Adding ${remainingSlots} more records to reach limit of ${maxRecords}`)
281-
allSightings = allSightings.concat(sightings.slice(0, remainingSlots))
283+
allSightings.push(...sightings.slice(0, remainingSlots))
282284
console.log(`Reached maximum of ${maxRecords} records, stopping scrape`)
283285
moreData = false;
284286
} else {
285287
// Add all sightings from this page
286-
allSightings = allSightings.concat(sightings)
288+
allSightings.push(...sightings)
287289
console.log(`Now have ${allSightings.length}/${maxRecords} records`)
288290
if (allSightings.length === maxRecords) {
289291
console.log(`Exactly reached maximum of ${maxRecords} records, stopping scrape`)
@@ -292,7 +294,7 @@ async function main() {
292294
}
293295
} else {
294296
// No max limit, add all sightings
295-
allSightings = allSightings.concat(sightings)
297+
allSightings.push(...sightings)
296298
}
297299

298300
// Only increment if we're continuing
@@ -351,14 +353,20 @@ async function main() {
351353
console.error("Error writing to file:", error)
352354
const backupFilename = `nuforc-results-backup-${Date.now()}.json`
353355
console.log(`Attempting to write to backup file: ${backupFilename}`)
354-
fs.writeFileSync(backupFilename, jsonContent)
355-
console.log(`Successfully wrote data to backup file: ${backupFilename}`)
356+
try {
357+
fs.writeFileSync(backupFilename, jsonContent)
358+
console.log(`Successfully wrote data to backup file: ${backupFilename}`)
359+
} catch (backupError) {
360+
console.error("Backup write also failed:", backupError)
361+
console.error(`${allSightings.length} records were scraped but could not be saved.`)
362+
}
356363
}
357364

358365
console.log("Done!")
359366
}
360367

361-
const isDirectRun = process.argv[1]?.includes("nuforc_scraper")
368+
const __filename = fileURLToPath(import.meta.url)
369+
const isDirectRun = process.argv[1] === __filename
362370
if (isDirectRun) {
363371
main().catch((err) => {
364372
console.error(err)

0 commit comments

Comments
 (0)