Skip to content

Commit 3904f23

Browse files
committed
OpenAIRecorder: Remove records on model eviction/termination
Signed-off-by: Dorin Geman <dorin.geman@docker.com>
1 parent be8f3e6 commit 3904f23

4 files changed

Lines changed: 39 additions & 13 deletions

File tree

pkg/inference/scheduling/loader.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/docker/model-runner/pkg/inference"
1414
"github.com/docker/model-runner/pkg/inference/models"
1515
"github.com/docker/model-runner/pkg/logging"
16+
"github.com/docker/model-runner/pkg/metrics"
1617
)
1718

1819
const (
@@ -92,13 +93,16 @@ type loader struct {
9293
timestamps []time.Time
9394
// runnerConfigs maps model names to runner configurations
9495
runnerConfigs map[runnerKey]inference.BackendConfiguration
96+
// openAIRecorder is used to record OpenAI API inference requests and responses.
97+
openAIRecorder *metrics.OpenAIRecorder
9598
}
9699

97100
// newLoader creates a new loader.
98101
func newLoader(
99102
log logging.Logger,
100103
backends map[string]inference.Backend,
101104
modelManager *models.Manager,
105+
openAIRecorder *metrics.OpenAIRecorder,
102106
) *loader {
103107
// Compute the number of runner slots to allocate. Because of RAM and VRAM
104108
// limitations, it's unlikely that we'll ever be able to fully populate
@@ -153,6 +157,7 @@ func newLoader(
153157
allocations: make([]uint64, nSlots),
154158
timestamps: make([]time.Time, nSlots),
155159
runnerConfigs: make(map[runnerKey]inference.BackendConfiguration),
160+
openAIRecorder: openAIRecorder,
156161
}
157162
l.guard <- struct{}{}
158163
return l
@@ -462,7 +467,7 @@ func (l *loader) load(ctx context.Context, backendName, model string, mode infer
462467
}
463468
// Create the runner.
464469
l.log.Infof("Loading %s backend runner with model %s in %s mode", backendName, model, mode)
465-
runner, err := run(l.log, backend, model, mode, slot, runnerConfig)
470+
runner, err := run(l.log, backend, model, mode, slot, runnerConfig, l.openAIRecorder)
466471
if err != nil {
467472
l.log.Warnf("Unable to start %s backend runner with model %s in %s mode: %v",
468473
backendName, model, mode, err,

pkg/inference/scheduling/runner.go

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515

1616
"github.com/docker/model-runner/pkg/inference"
1717
"github.com/docker/model-runner/pkg/logging"
18+
"github.com/docker/model-runner/pkg/metrics"
1819
)
1920

2021
const (
@@ -63,6 +64,8 @@ type runner struct {
6364
proxy *httputil.ReverseProxy
6465
// proxyLog is the stream used for logging by proxy.
6566
proxyLog io.Closer
67+
// openAIRecorder is used to record OpenAI API inference requests and responses.
68+
openAIRecorder *metrics.OpenAIRecorder
6669
// err is the error returned by the runner's backend, only valid after done is closed.
6770
err error
6871
}
@@ -75,6 +78,7 @@ func run(
7578
mode inference.BackendMode,
7679
slot int,
7780
runnerConfig *inference.BackendConfiguration,
81+
openAIRecorder *metrics.OpenAIRecorder,
7882
) (*runner, error) {
7983
// Create a dialer / transport that target backend on the specified slot.
8084
socket, err := RunnerSocketPath(slot)
@@ -124,16 +128,17 @@ func run(
124128
runDone := make(chan struct{})
125129

126130
r := &runner{
127-
log: log,
128-
backend: backend,
129-
model: model,
130-
mode: mode,
131-
cancel: runCancel,
132-
done: runDone,
133-
transport: transport,
134-
client: client,
135-
proxy: proxy,
136-
proxyLog: proxyLog,
131+
log: log,
132+
backend: backend,
133+
model: model,
134+
mode: mode,
135+
cancel: runCancel,
136+
done: runDone,
137+
transport: transport,
138+
client: client,
139+
proxy: proxy,
140+
proxyLog: proxyLog,
141+
openAIRecorder: openAIRecorder,
137142
}
138143

139144
proxy.ErrorHandler = func(w http.ResponseWriter, req *http.Request, err error) {
@@ -236,6 +241,8 @@ func (r *runner) terminate() {
236241
if err := r.proxyLog.Close(); err != nil {
237242
r.log.Warnf("Unable to close reverse proxy log writer: %v", err)
238243
}
244+
245+
r.openAIRecorder.RemoveModel(r.model)
239246
}
240247

241248
// ServeHTTP implements net/http.Handler.ServeHTTP. It forwards requests to the

pkg/inference/scheduling/scheduler.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,19 @@ func NewScheduler(
5656
allowedOrigins []string,
5757
tracker *metrics.Tracker,
5858
) *Scheduler {
59+
openAIRecorder := metrics.NewOpenAIRecorder(log.WithField("component", "openai-recorder"))
60+
5961
// Create the scheduler.
6062
s := &Scheduler{
6163
log: log,
6264
backends: backends,
6365
defaultBackend: defaultBackend,
6466
modelManager: modelManager,
6567
installer: newInstaller(log, backends, httpClient),
66-
loader: newLoader(log, backends, modelManager),
68+
loader: newLoader(log, backends, modelManager, openAIRecorder),
6769
router: http.NewServeMux(),
6870
tracker: tracker,
69-
openAIRecorder: metrics.NewOpenAIRecorder(log.WithField("component", "openai-recorder")),
71+
openAIRecorder: openAIRecorder,
7072
}
7173

7274
// Register routes.

pkg/metrics/openai_recorder.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,15 @@ func (r *OpenAIRecorder) GetRecordsByModel(model string) []*RequestResponsePair
226226

227227
return nil
228228
}
229+
230+
func (r *OpenAIRecorder) RemoveModel(model string) {
231+
r.m.Lock()
232+
defer r.m.Unlock()
233+
234+
if _, exists := r.records[model]; exists {
235+
delete(r.records, model)
236+
r.log.Infof("Removed records for model: %s", model)
237+
} else {
238+
r.log.Warnf("No records found for model: %s", model)
239+
}
240+
}

0 commit comments

Comments
 (0)