Skip to content

Commit fbcd886

Browse files
authored
fix(application): stop backend processes synchronously on shutdown (#10058)
application.New wires a fire-and-forget goroutine that runs StopAllGRPC + distributed.Shutdown when the app context is cancelled. Callers (tests, CLI signal handler) cancel the context and then exit immediately, so the test binary / process can terminate before that goroutine kills the spawned backend children. go-processmanager sets no Pdeathsig, so the orphans are reparented to init and survive — leaving dozens of stray mock-backend processes after an e2e run. Add Application.Shutdown(), which runs the same cleanup synchronously on the caller's stack and is idempotent via sync.Once. The context-cancel goroutine, the CLI signal handler, and the test suites all call it, so cleanup is deterministic and the duplicated teardown logic collapses to one place. The async goroutine remains as a safety net for callers that forget; sync.Once dedupes the double call. Wire e2e_suite_test and the two mock-backend Contexts in app_test to call Shutdown in their AfterSuite/AfterEach. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Richard Palethorpe <io@richiejp.com>
1 parent e1a782b commit fbcd886

5 files changed

Lines changed: 64 additions & 22 deletions

File tree

core/application/application.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ type Application struct {
9090
// LocalAI Assistant in-process MCP server. nil when DisableLocalAIAssistant
9191
// is set; otherwise initialised in start() after galleryService.
9292
localAIAssistant *mcpTools.LocalAIAssistantHolder
93+
94+
shutdownOnce sync.Once
9395
}
9496

9597
func newApplication(appConfig *config.ApplicationConfig) *Application {
@@ -320,6 +322,24 @@ func (a *Application) IsDistributed() bool {
320322
return a.distributed != nil
321323
}
322324

325+
// Shutdown stops backend gRPC processes and distributed services
326+
// synchronously on the caller's stack. The context-cancel goroutine wired
327+
// in New does the same work asynchronously, which races test-binary exit
328+
// and CLI shutdown — orphaning spawned mock-backend / llama.cpp / etc.
329+
// children to init. Callers that need a guarantee that cleanup has
330+
// finished before they proceed (AfterSuite/AfterEach, signal handlers)
331+
// must call this. Safe to call multiple times.
332+
func (a *Application) Shutdown() error {
333+
var err error
334+
a.shutdownOnce.Do(func() {
335+
a.distributed.Shutdown()
336+
if a.modelLoader != nil {
337+
err = a.modelLoader.StopAllGRPC()
338+
}
339+
})
340+
return err
341+
}
342+
323343
// waitForHealthyWorker blocks until at least one healthy backend worker is registered.
324344
// This prevents the agent pool from failing during startup when workers haven't connected yet.
325345
func (a *Application) waitForHealthyWorker() {

core/application/startup.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -449,13 +449,15 @@ func New(opts ...config.AppOption) (*Application, error) {
449449

450450
application.ModelLoader().SetBackendLoggingEnabled(options.EnableBackendLogging)
451451

452-
// turn off any process that was started by GRPC if the context is canceled
452+
// Safety-net cleanup if the application context is cancelled without
453+
// the caller invoking Shutdown directly. This is fire-and-forget — it
454+
// races binary exit and is unreliable in tests; the deterministic path
455+
// is application.Shutdown(), which Shutdown's sync.Once dedupes with
456+
// this goroutine.
453457
go func() {
454458
<-options.Context.Done()
455459
xlog.Debug("Context canceled, shutting down")
456-
application.distributed.Shutdown()
457-
err := application.ModelLoader().StopAllGRPC()
458-
if err != nil {
460+
if err := application.Shutdown(); err != nil {
459461
xlog.Error("error while stopping all grpc backends", "error", err)
460462
}
461463
}()

core/cli/run.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -577,12 +577,8 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
577577
}
578578

579579
signals.RegisterGracefulTerminationHandler(func() {
580-
if err := app.ModelLoader().StopAllGRPC(); err != nil {
581-
xlog.Error("error while stopping all grpc backends", "error", err)
582-
}
583-
// Clean up distributed services (idempotent — safe if already called)
584-
if d := app.Distributed(); d != nil {
585-
d.Shutdown()
580+
if err := app.Shutdown(); err != nil {
581+
xlog.Error("error while shutting down application", "error", err)
586582
}
587583
})
588584

core/http/app_test.go

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,11 @@ var _ = Describe("API test", func() {
308308
var cancel context.CancelFunc
309309
var tmpdir string
310310
var modelDir string
311+
// localAIApp captures the Application so AfterEach can synchronously
312+
// stop the spawned gRPC backend processes. application.New cancels
313+
// them asynchronously on context cancel, which races with test-binary
314+
// exit and leaks mock-backend children to init.
315+
var localAIApp *application.Application
311316

312317
commonOpts := []config.AppOption{
313318
config.WithDebug(true),
@@ -736,14 +741,14 @@ parameters:
736741
)
737742
Expect(err).ToNot(HaveOccurred())
738743

739-
application, err := application.New(
744+
localAIApp, err = application.New(
740745
append(commonOpts,
741746
config.WithContext(c),
742747
config.WithSystemState(systemState),
743748
)...)
744749
Expect(err).ToNot(HaveOccurred())
745-
application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
746-
app, err = API(application)
750+
localAIApp.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
751+
app, err = API(localAIApp)
747752
Expect(err).ToNot(HaveOccurred())
748753
go func() {
749754
if err := app.Start("127.0.0.1:9090"); err != nil && err != http.ErrServerClosed {
@@ -765,6 +770,11 @@ parameters:
765770
}, "2m").ShouldNot(HaveOccurred())
766771
})
767772
AfterEach(func() {
773+
// Synchronous shutdown — context-cancel cleanup is async and races
774+
// test-binary exit, orphaning mock-backend children to init.
775+
if localAIApp != nil {
776+
_ = localAIApp.Shutdown()
777+
}
768778
cancel()
769779
if app != nil {
770780
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
@@ -976,15 +986,15 @@ parameters:
976986
)
977987
Expect(err).ToNot(HaveOccurred())
978988

979-
application, err := application.New(
989+
localAIApp, err = application.New(
980990
append(commonOpts,
981991
config.WithContext(c),
982992
config.WithSystemState(systemState),
983993
config.WithConfigFile(configFile))...,
984994
)
985995
Expect(err).ToNot(HaveOccurred())
986-
application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
987-
app, err = API(application)
996+
localAIApp.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
997+
app, err = API(localAIApp)
988998
Expect(err).ToNot(HaveOccurred())
989999

9901000
go func() {
@@ -1005,6 +1015,11 @@ parameters:
10051015
}, "2m").ShouldNot(HaveOccurred())
10061016
})
10071017
AfterEach(func() {
1018+
// Synchronous shutdown — context-cancel cleanup is async and races
1019+
// test-binary exit, orphaning mock-backend children to init.
1020+
if localAIApp != nil {
1021+
_ = localAIApp.Shutdown()
1022+
}
10081023
cancel()
10091024
if app != nil {
10101025
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)

tests/e2e/e2e_suite_test.go

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"time"
1111

1212
"github.com/labstack/echo/v4"
13-
"github.com/mudler/LocalAI/core/application"
13+
localaiapp "github.com/mudler/LocalAI/core/application"
1414
"github.com/mudler/LocalAI/core/config"
1515
httpapi "github.com/mudler/LocalAI/core/http"
1616
"github.com/mudler/LocalAI/pkg/system"
@@ -41,6 +41,7 @@ var (
4141
cloudProxyPath string
4242
mcpServerURL string
4343
mcpServerShutdown func()
44+
localAIApp *localaiapp.Application
4445

4546
// Cloud-proxy fake upstreams. Live for the whole suite so the four
4647
// cloud-proxy model YAMLs can point at their URLs at startup time.
@@ -390,7 +391,7 @@ var _ = BeforeSuite(func() {
390391
// Create application instance (GeneratedContentDir so sound-generation/TTS can write files the handler sends)
391392
generatedDir := filepath.Join(tmpDir, "generated")
392393
Expect(os.MkdirAll(generatedDir, 0750)).To(Succeed())
393-
application, err := application.New(
394+
localAIApp, err = localaiapp.New(
394395
config.WithContext(appCtx),
395396
config.WithSystemState(systemState),
396397
config.WithDebug(true),
@@ -399,14 +400,14 @@ var _ = BeforeSuite(func() {
399400
Expect(err).ToNot(HaveOccurred())
400401

401402
// Register mock backend (always available for non-realtime tests).
402-
application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
403-
application.ModelLoader().SetExternalBackend("opus", mockBackendPath)
403+
localAIApp.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
404+
localAIApp.ModelLoader().SetExternalBackend("opus", mockBackendPath)
404405
if cloudProxyPath != "" {
405-
application.ModelLoader().SetExternalBackend("cloud-proxy", cloudProxyPath)
406+
localAIApp.ModelLoader().SetExternalBackend("cloud-proxy", cloudProxyPath)
406407
}
407408

408409
// Create HTTP app
409-
app, err = httpapi.API(application)
410+
app, err = httpapi.API(localAIApp)
410411
Expect(err).ToNot(HaveOccurred())
411412

412413
// Get free port
@@ -436,6 +437,14 @@ var _ = BeforeSuite(func() {
436437
})
437438

438439
var _ = AfterSuite(func() {
440+
// Synchronous shutdown — the context-cancel goroutine in application.New
441+
// runs the same cleanup asynchronously, which races test-binary exit and
442+
// orphans spawned mock-backend children to init.
443+
if localAIApp != nil {
444+
if err := localAIApp.Shutdown(); err != nil {
445+
xlog.Error("error shutting down application", "error", err)
446+
}
447+
}
439448
if appCancel != nil {
440449
appCancel()
441450
}

0 commit comments

Comments
 (0)