@@ -5,6 +5,20 @@ import { setupMongoMetrics, withMongoMetrics } from './metrics';
55const hawkDBUrl = process . env . MONGO_HAWK_DB_URL || 'mongodb://localhost:27017/hawk' ;
66const eventsDBUrl = process . env . MONGO_EVENTS_DB_URL || 'mongodb://localhost:27017/events' ;
77
8+ const reconnectTries = Number ( process . env . MONGO_RECONNECT_TRIES ) || 60 ;
9+ const reconnectInterval = Number ( process . env . MONGO_RECONNECT_INTERVAL ) || 1000 ;
10+
11+ /**
12+ * serverSelectionTimeoutMS bounds how long an op waits for an available
13+ * server — without it queries hang forever during an outage.
14+ */
15+ const connectionConfig : MongoClientOptions = withMongoMetrics ( {
16+ serverSelectionTimeoutMS : 10000 ,
17+ socketTimeoutMS : 45000 ,
18+ retryWrites : true ,
19+ retryReads : true ,
20+ } ) ;
21+
822/**
923 * Connections to Hawk databases
1024 */
@@ -52,42 +66,117 @@ export const mongoClients: MongoClients = {
5266} ;
5367
5468/**
55- * Common params for all connections
69+ * Connects to the given URL, retrying with a fixed interval up to
70+ * MONGO_RECONNECT_TRIES times before giving up.
71+ *
72+ * @param name - logical name for logging
73+ * @param url - MongoDB connection string
74+ * @returns connected client
5675 */
76+ async function connectWithRetry ( name : string , url : string ) : Promise < MongoClient > {
77+ for ( let attempt = 1 ; attempt <= reconnectTries ; attempt ++ ) {
78+ const client = new MongoClient ( url , connectionConfig ) ;
79+
80+ try {
81+ await client . connect ( ) ;
82+ console . log ( `[Mongo:${ name } ] connected` ) ;
83+
84+ return client ;
85+ } catch ( err ) {
86+ await client . close ( ) . catch ( ( ) => undefined ) ;
87+
88+ const message = ( err as Error ) ?. message ?? String ( err ) ;
89+
90+ if ( attempt === reconnectTries ) {
91+ throw new Error ( `[Mongo:${ name } ] failed after ${ reconnectTries } attempts: ${ message } ` ) ;
92+ }
93+ console . warn ( `[Mongo:${ name } ] attempt ${ attempt } /${ reconnectTries } failed: ${ message } ` ) ;
94+ await new Promise ( ( resolve ) => setTimeout ( resolve , reconnectInterval ) ) ;
95+ }
96+ }
97+
98+ throw new Error ( `[Mongo:${ name } ] unreachable` ) ;
99+ }
100+
57101/**
58- * Common params for all connections
59- * Note: useNewUrlParser and useUnifiedTopology are deprecated in mongodb 6.x and removed
102+ * Logs and reports heartbeat failures / recoveries once per transition.
103+ *
104+ * @param name - logical name for logging
105+ * @param client - connected client to observe
60106 */
61- const connectionConfig : MongoClientOptions = withMongoMetrics ( { } ) ;
107+ function watchConnection ( name : string , client : MongoClient ) : void {
108+ let healthy = true ;
109+
110+ client . on ( 'serverHeartbeatFailed' , ( event ) => {
111+ if ( ! healthy ) {
112+ return ;
113+ }
114+ healthy = false ;
115+ const message = ( event . failure as Error ) ?. message ?? 'heartbeat failed' ;
116+
117+ console . error ( `[Mongo:${ name } ] connection lost: ${ message } ` ) ;
118+ HawkCatcher . send ( new Error ( `MongoDB ${ name } connection lost: ${ message } ` ) ) ;
119+ } ) ;
120+
121+ client . on ( 'serverHeartbeatSucceeded' , ( ) => {
122+ if ( healthy ) {
123+ return ;
124+ }
125+ healthy = true ;
126+ console . log ( `[Mongo:${ name } ] connection recovered` ) ;
127+ } ) ;
128+ }
62129
63130/**
64- * Setups connections to the databases (hawk api and events databases)
131+ * Connects to both databases with bounded retry. The driver auto-recovers
132+ * from transient failures on already-open clients, so retries here cover
133+ * the initial handshake only.
134+ *
135+ * @returns promise resolved when both clients are connected
65136 */
66137export async function setupConnections ( ) : Promise < void > {
67138 try {
68- const [ hawkMongoClient , eventsMongoClient ] = await Promise . all ( [
69- MongoClient . connect ( hawkDBUrl , connectionConfig ) ,
70- MongoClient . connect ( eventsDBUrl , connectionConfig ) ,
139+ const [ hawkClient , eventsClient ] = await Promise . all ( [
140+ connectWithRetry ( 'hawk' , hawkDBUrl ) ,
141+ connectWithRetry ( 'events' , eventsDBUrl ) ,
71142 ] ) ;
72143
73- mongoClients . hawk = hawkMongoClient ;
74- mongoClients . events = eventsMongoClient ;
75-
76- databases . hawk = hawkMongoClient . db ( ) ;
77- databases . events = eventsMongoClient . db ( ) ;
144+ mongoClients . hawk = hawkClient ;
145+ mongoClients . events = eventsClient ;
146+ databases . hawk = hawkClient . db ( ) ;
147+ databases . events = eventsClient . db ( ) ;
78148
79149 /**
80- * Log and and measure MongoDB metrics
150+ * Log and measure MongoDB metrics, then observe heartbeats for outage logs
81151 */
82- setupMongoMetrics ( hawkMongoClient ) ;
83- setupMongoMetrics ( eventsMongoClient ) ;
152+ setupMongoMetrics ( hawkClient ) ;
153+ setupMongoMetrics ( eventsClient ) ;
154+ watchConnection ( 'hawk' , hawkClient ) ;
155+ watchConnection ( 'events' , eventsClient ) ;
84156 } catch ( e ) {
85157 /** Catch start Mongo errors */
86158 HawkCatcher . send ( e as Error ) ;
87159 throw e ;
88160 }
89161}
90162
163+ /**
164+ * Closes both clients. Call from SIGTERM/SIGINT for graceful shutdown.
165+ *
166+ * @returns promise resolved once both clients are closed
167+ */
168+ export async function closeConnections ( ) : Promise < void > {
169+ await Promise . allSettled ( [
170+ mongoClients . hawk ?. close ( ) ,
171+ mongoClients . events ?. close ( ) ,
172+ ] ) ;
173+
174+ mongoClients . hawk = null ;
175+ mongoClients . events = null ;
176+ databases . hawk = null ;
177+ databases . events = null ;
178+ }
179+
91180/**
92181 * Makes '_id' field optional on type
93182 */
0 commit comments