@@ -26,6 +26,7 @@ export class FailedPodHandler {
2626 private readonly informer : Informer < V1Pod > ;
2727 private readonly reconnectIntervalMs : number ;
2828 private reconnecting = false ;
29+ private reconnectAttempt = 0 ;
2930
3031 // Metrics
3132 private readonly register : Registry ;
@@ -271,24 +272,34 @@ export class FailedPodHandler {
271272 this . reconnecting = true ;
272273
273274 try {
274- const error = err instanceof Error ? err : undefined ;
275+ const errorDetails = this . getErrorDetails ( err ) ;
276+ const reconnectDelayMs = Math . min (
277+ this . reconnectIntervalMs * 2 ** Math . max ( this . reconnectAttempt , 0 ) ,
278+ 30_000
279+ ) ;
275280 this . logger . error ( "error event fired" , {
276281 informerName,
277- error : error ?. message ,
278- errorType : error ?. name ,
282+ reconnectAttempt : this . reconnectAttempt + 1 ,
283+ reconnectDelayMs,
284+ ...errorDetails ,
279285 } ) ;
280286 this . informerEventsTotal . inc ( { namespace : this . namespace , verb : "error" } ) ;
287+ this . reconnectAttempt ++ ;
281288
282289 // Reconnect on errors
283- await setTimeout ( this . reconnectIntervalMs ) ;
290+ await setTimeout ( reconnectDelayMs ) ;
291+ await this . informer . stop ( ) . catch ( ( stopError ) => {
292+ this . logger . warn ( "onError: informer stop before reconnect failed" , {
293+ informerName,
294+ ...this . getErrorDetails ( stopError ) ,
295+ } ) ;
296+ } ) ;
284297 await this . informer . start ( ) ;
285298 } catch ( handlerError ) {
286- const error = handlerError instanceof Error ? handlerError : undefined ;
287299 this . logger . error ( "onError: reconnection attempt failed" , {
288300 informerName,
289- error : error ?. message ,
290- errorType : error ?. name ,
291- errorStack : error ?. stack ,
301+ reconnectAttempt : this . reconnectAttempt ,
302+ ...this . getErrorDetails ( handlerError ) ,
292303 } ) ;
293304 } finally {
294305 this . reconnecting = false ;
@@ -300,10 +311,51 @@ export class FailedPodHandler {
300311 }
301312
302313 private async onConnect ( informerName : string ) {
314+ this . reconnectAttempt = 0 ;
303315 this . logger . info ( `informer connected: ${ informerName } ` ) ;
304316 this . informerEventsTotal . inc ( { namespace : this . namespace , verb : "connect" } ) ;
305317 }
306318
319+ private getErrorDetails ( error : unknown ) {
320+ if ( error instanceof Error ) {
321+ return {
322+ error : error . message ,
323+ errorType : error . name ,
324+ errorStack : error . stack ,
325+ errorCause :
326+ error . cause instanceof Error
327+ ? {
328+ name : error . cause . name ,
329+ message : error . cause . message ,
330+ }
331+ : error . cause ,
332+ } ;
333+ }
334+
335+ if ( typeof error === "object" && error !== null ) {
336+ const details = error as Record < string , unknown > ;
337+ const message = typeof details . message === "string" ? details . message : undefined ;
338+ const code = typeof details . code === "string" ? details . code : undefined ;
339+ const statusCode =
340+ typeof details . statusCode === "number"
341+ ? details . statusCode
342+ : typeof details . statusCode === "string"
343+ ? Number ( details . statusCode )
344+ : undefined ;
345+
346+ return {
347+ error : message ,
348+ errorCode : code ,
349+ statusCode : Number . isNaN ( statusCode ) ? undefined : statusCode ,
350+ rawError : details ,
351+ } ;
352+ }
353+
354+ return {
355+ rawError : error ,
356+ } ;
357+ }
358+
307359 private podSummary ( pod : V1Pod ) {
308360 return {
309361 name : pod . metadata ?. name ,
0 commit comments