@@ -5,6 +5,20 @@ import { setupMongoMetrics, withMongoMetrics } from './metrics';
55const hawkDBUrl = process . env . MONGO_HAWK_DB_URL || 'mongodb://localhost:27017/hawk' ;
66const eventsDBUrl = process . env . MONGO_EVENTS_DB_URL || 'mongodb://localhost:27017/events' ;
77
8+ const reconnectTries = Number ( process . env . MONGO_RECONNECT_TRIES ) || 60 ;
9+ const reconnectInterval = Number ( process . env . MONGO_RECONNECT_INTERVAL ) || 1000 ;
10+
11+ /**
12+ * serverSelectionTimeoutMS bounds how long an op waits for an available
13+ * server — without it queries hang forever during an outage.
14+ */
15+ const connectionConfig : MongoClientOptions = withMongoMetrics ( {
16+ serverSelectionTimeoutMS : 10000 ,
17+ socketTimeoutMS : 45000 ,
18+ retryWrites : true ,
19+ retryReads : true ,
20+ } ) ;
21+
822/**
923 * Connections to Hawk databases
1024 */
@@ -52,40 +66,124 @@ export const mongoClients: MongoClients = {
5266} ;
5367
5468/**
55- * Common params for all connections
69+ * Connects to the given URL, retrying with a fixed interval up to
70+ * MONGO_RECONNECT_TRIES times before giving up.
71+ *
72+ * @param name - logical name for logging
73+ * @param url - MongoDB connection string
74+ * @returns connected client
5675 */
76+ async function connectWithRetry ( name : string , url : string ) : Promise < MongoClient > {
77+ let lastError = 'unknown error' ;
78+
79+ for ( let attempt = 1 ; attempt <= reconnectTries ; attempt ++ ) {
80+ const client = new MongoClient ( url , connectionConfig ) ;
81+
82+ try {
83+ await client . connect ( ) ;
84+ console . log ( `[Mongo:${ name } ] connected` ) ;
85+
86+ return client ;
87+ } catch ( err ) {
88+ await client . close ( ) . catch ( ( ) => undefined ) ;
89+
90+ lastError = ( err as Error ) ?. message ?? String ( err ) ;
91+ console . warn ( `[Mongo:${ name } ] attempt ${ attempt } /${ reconnectTries } failed: ${ lastError } ` ) ;
92+
93+ if ( attempt < reconnectTries ) {
94+ await new Promise ( ( resolve ) => setTimeout ( resolve , reconnectInterval ) ) ;
95+ }
96+ }
97+ }
98+
99+ throw new Error ( `[Mongo:${ name } ] failed after ${ reconnectTries } attempts: ${ lastError } ` ) ;
100+ }
101+
57102/**
58- * Common params for all connections
59- * Note: useNewUrlParser and useUnifiedTopology are deprecated in mongodb 6.x and removed
103+ * Logs and reports heartbeat failures / recoveries once per transition.
104+ *
105+ * @param name - logical name for logging
106+ * @param client - connected client to observe
60107 */
61- const connectionConfig : MongoClientOptions = withMongoMetrics ( { } ) ;
108+ function watchConnection ( name : string , client : MongoClient ) : void {
109+ let healthy = true ;
110+
111+ client . on ( 'serverHeartbeatFailed' , ( event ) => {
112+ if ( ! healthy ) {
113+ return ;
114+ }
115+ healthy = false ;
116+ const message = ( event . failure as Error ) ?. message ?? 'heartbeat failed' ;
117+
118+ console . error ( `[Mongo:${ name } ] connection lost: ${ message } ` ) ;
119+ HawkCatcher . send ( new Error ( `MongoDB ${ name } connection lost: ${ message } ` ) ) ;
120+ } ) ;
121+
122+ client . on ( 'serverHeartbeatSucceeded' , ( ) => {
123+ if ( healthy ) {
124+ return ;
125+ }
126+ healthy = true ;
127+ console . log ( `[Mongo:${ name } ] connection recovered` ) ;
128+ } ) ;
129+ }
62130
63131/**
64- * Setups connections to the databases (hawk api and events databases)
132+ * Connects to both databases with bounded retry. The driver auto-recovers
133+ * from transient failures on already-open clients, so retries here cover
134+ * the initial handshake only.
135+ *
136+ * @returns promise resolved when both clients are connected
65137 */
66138export async function setupConnections ( ) : Promise < void > {
67- try {
68- const [ hawkMongoClient , eventsMongoClient ] = await Promise . all ( [
69- MongoClient . connect ( hawkDBUrl , connectionConfig ) ,
70- MongoClient . connect ( eventsDBUrl , connectionConfig ) ,
71- ] ) ;
72-
73- mongoClients . hawk = hawkMongoClient ;
74- mongoClients . events = eventsMongoClient ;
75-
76- databases . hawk = hawkMongoClient . db ( ) ;
77- databases . events = eventsMongoClient . db ( ) ;
78-
79- /**
80- * Log and and measure MongoDB metrics
81- */
82- setupMongoMetrics ( hawkMongoClient ) ;
83- setupMongoMetrics ( eventsMongoClient ) ;
84- } catch ( e ) {
85- /** Catch start Mongo errors */
86- HawkCatcher . send ( e as Error ) ;
87- throw e ;
139+ const results = await Promise . allSettled ( [
140+ connectWithRetry ( 'hawk' , hawkDBUrl ) ,
141+ connectWithRetry ( 'events' , eventsDBUrl ) ,
142+ ] ) ;
143+
144+ const failure = results . find ( ( r ) : r is PromiseRejectedResult => r . status === 'rejected' ) ;
145+
146+ if ( failure ) {
147+ /** Close any clients that did connect so we don't leak sockets */
148+ await Promise . allSettled (
149+ results . map ( ( r ) => ( r . status === 'fulfilled' ? r . value . close ( ) : Promise . resolve ( ) ) )
150+ ) ;
151+ HawkCatcher . send ( failure . reason as Error ) ;
152+ throw failure . reason ;
88153 }
154+
155+ const hawkClient = ( results [ 0 ] as PromiseFulfilledResult < MongoClient > ) . value ;
156+ const eventsClient = ( results [ 1 ] as PromiseFulfilledResult < MongoClient > ) . value ;
157+
158+ mongoClients . hawk = hawkClient ;
159+ mongoClients . events = eventsClient ;
160+ databases . hawk = hawkClient . db ( ) ;
161+ databases . events = eventsClient . db ( ) ;
162+
163+ /**
164+ * Log and measure MongoDB metrics, then observe heartbeats for outage logs
165+ */
166+ setupMongoMetrics ( hawkClient ) ;
167+ setupMongoMetrics ( eventsClient ) ;
168+ watchConnection ( 'hawk' , hawkClient ) ;
169+ watchConnection ( 'events' , eventsClient ) ;
170+ }
171+
172+ /**
173+ * Closes both clients. Call from SIGTERM/SIGINT for graceful shutdown.
174+ *
175+ * @returns promise resolved once both clients are closed
176+ */
177+ export async function closeConnections ( ) : Promise < void > {
178+ await Promise . allSettled ( [
179+ mongoClients . hawk ?. close ( ) ,
180+ mongoClients . events ?. close ( ) ,
181+ ] ) ;
182+
183+ mongoClients . hawk = null ;
184+ mongoClients . events = null ;
185+ databases . hawk = null ;
186+ databases . events = null ;
89187}
90188
91189/**
0 commit comments