@@ -72,8 +72,16 @@ export class MeshStore implements CommsStore {
7272 private peerInfo = new Map < string , PeerInfo > ( ) ;
7373 private staleCheckTimer : ReturnType < typeof setInterval > | undefined ;
7474 private isShutDown = false ;
75+ private initialised = false ;
7576 private pendingMarkReadTimers : ReturnType < typeof setTimeout > [ ] = [ ] ;
7677
78+ /** Whether the mesh has a live coordinator connection. */
79+ get connected ( ) : boolean {
80+ return (
81+ this . transport . isCoordinator || this . transport . hasCoordinatorConnection
82+ ) ;
83+ }
84+
7785 // -- Pending inbound connections awaiting approval --
7886 private pendingInboundConnections = new Map <
7987 string ,
@@ -146,6 +154,8 @@ export class MeshStore implements CommsStore {
146154 // -----------------------------------------------------------------------
147155
148156 async init ( ) : Promise < void > {
157+ if ( this . initialised ) return ;
158+ this . initialised = true ;
149159 await this . transport . startDataServer ( ) ;
150160
151161 // Register our own peer info
@@ -156,57 +166,49 @@ export class MeshStore implements CommsStore {
156166 } ) ;
157167
158168 // Try joining an existing mesh; fall back to becoming coordinator.
159- // If becomeCoordinator fails with EADDRINUSE (another process won the race),
160- // retry connecting — the new coordinator should be ready by now.
161- const MAX_RETRIES = 3 ;
162- const RETRY_DELAY_MS = 200 ;
169+ //
170+ // Single attempt: connect to an existing coordinator, or become one.
171+ // If the coordinator port is occupied but unresponsive (e.g. an orphan
172+ // process from a previous session), degrade gracefully instead of
173+ // retrying. Retrying tls.connect after a failed handshake to a
174+ // non-TLS endpoint can freeze the event loop (Node.js TLS session
175+ // cache bug), so we only try once.
163176 let connected = false ;
164- let lastError : Error | undefined ;
165177
166- for ( let attempt = 0 ; attempt < MAX_RETRIES ; attempt ++ ) {
178+ try {
179+ await this . transport . connectToCoordinator (
180+ COORDINATOR_HOST ,
181+ this . coordinatorPort ,
182+ this . peerId ,
183+ this . transport . dataPort ,
184+ ) ;
185+ connected = true ;
186+ } catch {
167187 try {
168- await this . transport . connectToCoordinator (
188+ await this . transport . becomeCoordinator (
169189 COORDINATOR_HOST ,
170190 this . coordinatorPort ,
171- this . peerId ,
172- this . transport . dataPort ,
173191 ) ;
192+ this . startStaleCheck ( ) ;
174193 connected = true ;
175- break ;
176- } catch ( err ) {
177- lastError = err instanceof Error ? err : new Error ( String ( err ) ) ;
178- // Only try to become coordinator on the first attempt
179- if ( attempt === 0 ) {
180- try {
181- await this . transport . becomeCoordinator (
182- COORDINATOR_HOST ,
183- this . coordinatorPort ,
184- ) ;
185- this . startStaleCheck ( ) ;
186- connected = true ;
187- break ;
188- } catch ( coordErr ) {
189- const msg =
190- coordErr instanceof Error ? coordErr . message : String ( coordErr ) ;
191- if ( ! msg . includes ( "EADDRINUSE" ) ) {
192- throw coordErr ;
193- }
194- // EADDRINUSE — another process became coordinator. Retry connect.
195- }
196- }
197- // Wait before retrying
198- if ( attempt < MAX_RETRIES - 1 ) {
199- await new Promise < void > ( ( resolve ) =>
200- setTimeout ( resolve , RETRY_DELAY_MS ) ,
201- ) ;
194+ } catch ( coordErr ) {
195+ const msg =
196+ coordErr instanceof Error ? coordErr . message : String ( coordErr ) ;
197+ if ( ! msg . includes ( "EADDRINUSE" ) ) {
198+ throw coordErr ;
202199 }
200+ // EADDRINUSE — port held by an unresponsive process. Degrade.
203201 }
204202 }
205203
206204 if ( ! connected ) {
207- throw new Error (
208- `Failed to join or create mesh on port ${ String ( this . coordinatorPort ) } : ${ lastError ?. message ?? "unknown error" } ` ,
205+ this . events . onError ?.(
206+ new Error (
207+ `MeshStore: could not join or create mesh on port ${ String ( this . coordinatorPort ) } . ` +
208+ "Running without mesh — agent-comms will be unavailable." ,
209+ ) ,
209210 ) ;
211+ return ;
210212 }
211213
212214 this . transport . unref ( ) ;
0 commit comments