@@ -25,6 +25,12 @@ export class Orchestrator {
2525 private cfg : OrchestratorConfig
2626 private log : typeof logger
2727
28+ // Mutex-style guard: ensures only one startAll pipeline is in flight at a
29+ // time. Re-entrant callers (e.g. an auto-resume after a successful
30+ // restartService while the initial startAll hasn't returned yet) await the
31+ // existing promise instead of racing a second pipeline.
32+ private startInProgress : Promise < void > | null = null
33+
2834 private proxyDownloadState : DownloadItem = {
2935 name : 'Proxy Router' ,
3036 status : 'pending' ,
@@ -63,7 +69,20 @@ export class Orchestrator {
6369 } )
6470 }
6571
66- async startAll ( ) {
72+ async startAll ( ) : Promise < void > {
73+ if ( this . startInProgress ) {
74+ this . log . info ( 'startAll already in progress; awaiting existing run' )
75+ return this . startInProgress
76+ }
77+ this . startInProgress = this . runStartupPipeline ( )
78+ try {
79+ await this . startInProgress
80+ } finally {
81+ this . startInProgress = null
82+ }
83+ }
84+
85+ private async runStartupPipeline ( ) {
6786 this . log . info ( 'Orchestrator started' )
6887 await this . resetState ( )
6988 this . emitStateUpdate ( )
@@ -86,7 +105,7 @@ export class Orchestrator {
86105 this . proxyDownloadState . status = 'success'
87106 this . emitStateUpdate ( )
88107
89- if ( this . cfg . aiRuntime . downloadUrl ) {
108+ if ( this . cfg . aiRuntime . downloadUrl && this . cfg . aiRuntime . extractPath ) {
90109 if ( fs . existsSync ( resolveAppDataPath ( this . cfg . aiRuntime . extractPath ) ) ) {
91110 this . log . info (
92111 'AI runtime already exists, skipping download' ,
@@ -146,6 +165,7 @@ export class Orchestrator {
146165
147166 if (
148167 this . cfg . ipfs . downloadUrl &&
168+ this . cfg . ipfs . extractPath &&
149169 ! fs . existsSync ( resolveAppDataPath ( this . cfg . ipfs . extractPath ) )
150170 ) {
151171 await downloadFile (
@@ -225,7 +245,7 @@ export class Orchestrator {
225245 const proxyFolder = path . dirname ( resolveAppDataPath ( this . cfg . proxyRouter . runPath ) )
226246
227247 // writting local config files if not exist
228- await this . writeEnvFile ( path . join ( proxyFolder , '.env' ) , this . cfg . proxyRouter . env )
248+ await this . writeEnvFile ( path . join ( proxyFolder , '.env' ) , this . cfg . proxyRouter . env ?? { } )
229249 await this . writeLocalConfigFile (
230250 path . join ( proxyFolder , 'models-config.json' ) ,
231251 this . cfg . proxyRouter . modelsConfig
@@ -286,8 +306,39 @@ export class Orchestrator {
286306 await process . stop ( )
287307 this . emitStateUpdate ( )
288308
289- await process . start ( )
290- this . emitStateUpdate ( )
309+ try {
310+ await process . start ( )
311+ } finally {
312+ this . emitStateUpdate ( )
313+ }
314+
315+ // If the original startAll pipeline aborted before reaching downstream
316+ // services (e.g. IPFS failed, so containerRuntime + proxyRouter were
317+ // never started), resume the pipeline now that this service is healthy.
318+ // startAll is idempotent: downloads check fs.exists, and each process's
319+ // start() short-circuits when already running.
320+ if ( process . getState ( ) === 'running' && ! this . allServicesRunning ( ) ) {
321+ this . log . info (
322+ `Service ${ service } restarted; resuming startup pipeline for any downstream services still pending`
323+ )
324+ try {
325+ await this . startAll ( )
326+ } catch ( err ) {
327+ this . log . error ( 'Resume after restart failed' , err )
328+ // Don't rethrow — the explicit restart did succeed; downstream
329+ // failures are surfaced via the per-service state.
330+ }
331+ }
332+ }
333+
334+ private allServicesRunning ( ) : boolean {
335+ const all = [
336+ this . ipfsProcess ,
337+ this . aiRuntimeProcess ,
338+ this . containerRuntimeProcess ,
339+ this . proxyRouterProcess
340+ ]
341+ return all . every ( ( p ) => p ?. getState ( ) === 'running' )
291342 }
292343
293344 async ping ( service : keyof OrchestratorConfig ) : Promise < boolean > {
@@ -435,21 +486,29 @@ export class Orchestrator {
435486 }
436487
437488 private async resetState ( ) {
489+ // Only reset processes that aren't already running. This preserves the
490+ // healthy ones across a resume (e.g. when restartService kicks off a
491+ // pipeline re-run, we don't want to flap IPFS / AI Runtime).
438492 this . proxyRouterProcess ?. getState ( ) !== 'running' && ( await this . proxyRouterProcess ?. reset ( ) )
439493 this . aiRuntimeProcess ?. getState ( ) !== 'running' && ( await this . aiRuntimeProcess ?. reset ( ) )
440494 this . ipfsProcess ?. getState ( ) !== 'running' && ( await this . ipfsProcess ?. reset ( ) )
441495 this . containerRuntimeProcess ?. getState ( ) !== 'running' &&
442496 ( await this . containerRuntimeProcess ?. reset ( ) )
443497
444- this . proxyDownloadState . error = undefined
445- this . aiRuntimeDownloadState . error = undefined
446- this . aiModelDownloadState . error = undefined
447- this . ipfsDownloadState . error = undefined
448-
449- this . proxyDownloadState . status = 'pending'
450- this . aiRuntimeDownloadState . status = 'pending'
451- this . aiModelDownloadState . status = 'pending'
452- this . ipfsDownloadState . status = 'pending'
498+ // Preserve `success` download statuses across resume so the UI doesn't
499+ // flash completed bars back to pending. Clear errors regardless — they
500+ // belong to the previous attempt.
501+ for ( const dl of [
502+ this . proxyDownloadState ,
503+ this . aiRuntimeDownloadState ,
504+ this . aiModelDownloadState ,
505+ this . ipfsDownloadState
506+ ] ) {
507+ dl . error = undefined
508+ if ( dl . status !== 'success' ) {
509+ dl . status = 'pending'
510+ }
511+ }
453512 }
454513}
455514
0 commit comments