@@ -13,6 +13,7 @@ import (
1313 "github.com/docker/model-runner/pkg/inference"
1414 "github.com/docker/model-runner/pkg/inference/models"
1515 "github.com/docker/model-runner/pkg/logging"
16+ "github.com/docker/model-runner/pkg/metrics"
1617)
1718
1819const (
@@ -92,13 +93,16 @@ type loader struct {
9293 timestamps []time.Time
9394 // runnerConfigs maps model names to runner configurations
9495 runnerConfigs map [runnerKey ]inference.BackendConfiguration
96+ // openAIRecorder is used to record OpenAI API inference requests and responses.
97+ openAIRecorder * metrics.OpenAIRecorder
9598}
9699
97100// newLoader creates a new loader.
98101func newLoader (
99102 log logging.Logger ,
100103 backends map [string ]inference.Backend ,
101104 modelManager * models.Manager ,
105+ openAIRecorder * metrics.OpenAIRecorder ,
102106) * loader {
103107 // Compute the number of runner slots to allocate. Because of RAM and VRAM
104108 // limitations, it's unlikely that we'll ever be able to fully populate
@@ -153,6 +157,7 @@ func newLoader(
153157 allocations : make ([]uint64 , nSlots ),
154158 timestamps : make ([]time.Time , nSlots ),
155159 runnerConfigs : make (map [runnerKey ]inference.BackendConfiguration ),
160+ openAIRecorder : openAIRecorder ,
156161 }
157162 l .guard <- struct {}{}
158163 return l
@@ -462,7 +467,7 @@ func (l *loader) load(ctx context.Context, backendName, model string, mode infer
462467 }
463468 // Create the runner.
464469 l .log .Infof ("Loading %s backend runner with model %s in %s mode" , backendName , model , mode )
465- runner , err := run (l .log , backend , model , mode , slot , runnerConfig )
470+ runner , err := run (l .log , backend , model , mode , slot , runnerConfig , l . openAIRecorder )
466471 if err != nil {
467472 l .log .Warnf ("Unable to start %s backend runner with model %s in %s mode: %v" ,
468473 backendName , model , mode , err ,
0 commit comments