@@ -12,9 +12,10 @@ import {
1212 blank ,
1313 clearLines ,
1414 formatDuration ,
15+ formatProgress ,
1516 header ,
1617 keyValue ,
17- progressBar ,
18+ logError ,
1819 section ,
1920 writeMultiLineProgress ,
2021} from "./ui" ;
@@ -28,10 +29,11 @@ interface ProcessContext {
2829 stats : { saved : number ; skipped : number ; failed : number } ;
2930 rateLimiter : RateLimiter ;
3031 force ?: boolean ;
32+ onError ?: ( status : number , url : string , message : string ) => void ;
3133}
3234
3335async function processRecord ( record : CdxRecord , ctx : ProcessContext ) {
34- const { db, storage, config, crawlId, stats, rateLimiter, force } = ctx ;
36+ const { db, storage, config, crawlId, stats, rateLimiter, force, onError } = ctx ;
3537
3638 // Check if already processed (skip if --force)
3739 if ( ! force ) {
@@ -48,6 +50,7 @@ async function processRecord(record: CdxRecord, ctx: ProcessContext) {
4850 result = await fetchWarcRecord ( record , {
4951 timeoutMs : config . crawl . timeoutMs ,
5052 rateLimiter,
53+ onError,
5154 } ) ;
5255 } catch ( err ) {
5356 stats . failed ++ ;
@@ -169,40 +172,44 @@ export async function scrape(
169172 // Track throughput
170173 let lastThroughputUpdate = Date . now ( ) ;
171174 let docsAtLastUpdate = 0 ;
175+ let currentDocsPerSec = 0 ;
172176
173177 // Track line count for clearing
174- let prevLineCount = 1 ;
178+ let prevLineCount = 2 ;
179+
180+ // Error logging for verbose mode
181+ const onError = verbose
182+ ? ( _status : number , url : string , message : string ) => {
183+ // Clear progress lines, log error, then redraw progress
184+ clearLines ( prevLineCount ) ;
185+ logError ( `${ message } - ${ url } ` ) ;
186+ prevLineCount = 0 ; // Reset so next update draws fresh
187+ }
188+ : undefined ;
175189
176190 // Progress update function
177191 const updateProgress = ( ) => {
178- const lines : string [ ] = [ ] ;
179-
180192 // Calculate docs/sec
181193 const now = Date . now ( ) ;
182194 const elapsed = ( now - lastThroughputUpdate ) / 1000 ;
183- let docsPerSec = 0 ;
184195 if ( elapsed >= 1 ) {
185- docsPerSec = ( stats . saved - docsAtLastUpdate ) / elapsed ;
196+ currentDocsPerSec = ( stats . saved - docsAtLastUpdate ) / elapsed ;
186197 lastThroughputUpdate = now ;
187198 docsAtLastUpdate = stats . saved ;
188199 }
189200
190201 const { errorCount } = rateLimiter . getStats ( ) ;
191202
192- const extras : string [ ] = [ ] ;
193- if ( docsPerSec > 0 ) extras . push ( `${ docsPerSec . toFixed ( 1 ) } /s` ) ;
194- if ( stats . skipped > 0 ) extras . push ( `${ stats . skipped } dup` ) ;
195- if ( stats . failed > 0 ) extras . push ( `${ stats . failed } fail` ) ;
196- if ( errorCount > 0 ) extras . push ( `${ errorCount } retried` ) ;
197- const extrasText = extras . length > 0 ? ` (${ extras . join ( " · " ) } )` : "" ;
198-
199- if ( batchSize === Infinity ) {
200- lines . push ( ` WARC: ${ stats . saved } saved${ extrasText } ` ) ;
201- } else {
202- const savedDisplay = Math . min ( stats . saved , batchSize ) ;
203- const warcBar = progressBar ( savedDisplay , batchSize ) ;
204- lines . push ( ` WARC: ${ warcBar } ${ savedDisplay } /${ batchSize } saved${ extrasText } ` ) ;
205- }
203+ const lines = formatProgress ( {
204+ saved : Math . min ( stats . saved , batchSize ) ,
205+ total : batchSize ,
206+ docsPerSec : currentDocsPerSec ,
207+ currentRps : rateLimiter . getCurrentRps ( ) ,
208+ skipped : stats . skipped ,
209+ failed : stats . failed ,
210+ retried : errorCount ,
211+ elapsedMs : Date . now ( ) - startTime ,
212+ } ) ;
206213
207214 prevLineCount = writeMultiLineProgress ( lines , prevLineCount ) ;
208215 } ;
@@ -229,8 +236,9 @@ export async function scrape(
229236 config,
230237 crawlId,
231238 stats,
232- rateLimiter : rateLimiter ,
239+ rateLimiter,
233240 force,
241+ onError,
234242 } ) ;
235243 updateProgress ( ) ;
236244 } ) . finally ( ( ) => tasks . delete ( task ) ) ;
0 commit comments