Skip to content

Commit 6f304d1

Browse files
authored
chore(refactor): use interface (#9226)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 557d0f0 commit 6f304d1

6 files changed

Lines changed: 24 additions & 14 deletions

File tree

core/cli/worker.go

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -512,11 +512,9 @@ func (s *backendSupervisor) stopBackend(backend string) {
512512

513513
// Network I/O outside the lock
514514
client := grpc.NewClientWithToken(bp.addr, false, nil, false, s.cmd.RegistrationToken)
515-
if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
516-
xlog.Debug("Calling Free() before stopping backend", "backend", backend)
517-
if err := freeFunc.Free(context.Background()); err != nil {
518-
xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
519-
}
515+
xlog.Debug("Calling Free() before stopping backend", "backend", backend)
516+
if err := client.Free(context.Background()); err != nil {
517+
xlog.Warn("Free() failed (best-effort)", "backend", backend, "error", err)
520518
}
521519

522520
xlog.Info("Stopping backend process", "backend", backend, "addr", bp.addr)
@@ -774,10 +772,8 @@ func (s *backendSupervisor) subscribeLifecycleEvents() {
774772
if targetAddr != "" {
775773
// Best-effort gRPC Free()
776774
client := grpc.NewClientWithToken(targetAddr, false, nil, false, s.cmd.RegistrationToken)
777-
if freeFunc, ok := client.(interface{ Free(context.Context) error }); ok {
778-
if err := freeFunc.Free(context.Background()); err != nil {
779-
xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
780-
}
775+
if err := client.Free(context.Background()); err != nil {
776+
xlog.Warn("Free() failed during model.unload", "error", err, "addr", targetAddr)
781777
}
782778
}
783779

core/services/nodes/health_mock_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,9 @@ func (c *fakeBackendClient) QuantizationProgress(_ context.Context, _ *pb.Quanti
231231
func (c *fakeBackendClient) StopQuantization(_ context.Context, _ *pb.QuantizationStopRequest, _ ...ggrpc.CallOption) (*pb.Result, error) {
232232
return nil, nil
233233
}
234+
func (c *fakeBackendClient) Free(_ context.Context) error {
235+
return nil
236+
}
234237

235238
// --- fakeBackendClientFactory ---
236239

core/services/nodes/inflight_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ func (f *fakeGRPCBackend) StopQuantization(_ context.Context, _ *pb.Quantization
175175
return &pb.Result{}, nil
176176
}
177177

178+
func (f *fakeGRPCBackend) Free(_ context.Context) error {
179+
return nil
180+
}
181+
178182
// --- Tests ---
179183

180184
var _ = Describe("InFlightTrackingClient", func() {

pkg/grpc/backend.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,7 @@ type Backend interface {
8585
StartQuantization(ctx context.Context, in *pb.QuantizationRequest, opts ...grpc.CallOption) (*pb.QuantizationJobResult, error)
8686
QuantizationProgress(ctx context.Context, in *pb.QuantizationProgressRequest, f func(update *pb.QuantizationProgressUpdate), opts ...grpc.CallOption) error
8787
StopQuantization(ctx context.Context, in *pb.QuantizationStopRequest, opts ...grpc.CallOption) (*pb.Result, error)
88+
89+
// Free releases GPU/model resources (e.g. VRAM) without stopping the process.
90+
Free(ctx context.Context) error
8891
}

pkg/grpc/embed.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@ func (e *embedBackend) StopQuantization(ctx context.Context, in *pb.Quantization
163163
return e.s.StopQuantization(ctx, in)
164164
}
165165

166+
func (e *embedBackend) Free(ctx context.Context) error {
167+
_, err := e.s.Free(ctx, &pb.HealthMessage{})
168+
return err
169+
}
170+
166171
var _ pb.Backend_FineTuneProgressServer = new(embedBackendFineTuneProgressStream)
167172

168173
type embedBackendFineTuneProgressStream struct {

pkg/model/process.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package model
22

33
import (
4+
"context"
45
"errors"
56
"fmt"
67
"os"
@@ -52,11 +53,9 @@ func (ml *ModelLoader) deleteProcess(s string) error {
5253
}
5354

5455
// Free GPU resources before stopping the process to ensure VRAM is released
55-
if freeFunc, ok := model.GRPC(false, ml.wd).(interface{ Free() error }); ok {
56-
xlog.Debug("Calling Free() to release GPU resources", "model", s)
57-
if err := freeFunc.Free(); err != nil {
58-
xlog.Warn("Error freeing GPU resources", "error", err, "model", s)
59-
}
56+
xlog.Debug("Calling Free() to release GPU resources", "model", s)
57+
if err := model.GRPC(false, ml.wd).Free(context.Background()); err != nil {
58+
xlog.Warn("Error freeing GPU resources", "error", err, "model", s)
6059
}
6160

6261
process := model.Process()

0 commit comments

Comments
 (0)