@@ -238,14 +238,6 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request
238238 s .tracker .TrackModel (model )
239239 }
240240
241- // Record the request in the OpenAI recorder.
242- recordID := s .openAIRecorder .RecordRequest (request .Model , r , body )
243- w = s .openAIRecorder .NewResponseRecorder (w )
244- defer func () {
245- // Record the response in the OpenAI recorder.
246- s .openAIRecorder .RecordResponse (recordID , request .Model , w )
247- }()
248-
249241 // Request a runner to execute the request and defer its release.
250242 runner , err := s .loader .load (r .Context (), backend .Name (), request .Model , backendMode )
251243 if err != nil {
@@ -254,6 +246,14 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request
254246 }
255247 defer s .loader .release (runner )
256248
249+ // Record the request in the OpenAI recorder.
250+ recordID := s .openAIRecorder .RecordRequest (request .Model , r , body )
251+ w = s .openAIRecorder .NewResponseRecorder (w )
252+ defer func () {
253+ // Record the response in the OpenAI recorder.
254+ s .openAIRecorder .RecordResponse (recordID , request .Model , w )
255+ }()
256+
257257 // Create a request with the body replaced for forwarding upstream.
258258 upstreamRequest := r .Clone (r .Context ())
259259 upstreamRequest .Body = io .NopCloser (bytes .NewReader (body ))
0 commit comments