Skip to content

Commit 94c1b25

Browse files
committed
feat(executor): add TTFT tracking and reporting for enhanced performance metrics
- Introduced Time-To-First-Token (TTFT) measurement and reporting across major executors. - Added TTFT calculation to `UsageReporter`, including support for HTTP clients and WebSocket communication. - Updated tests to validate TTFT tracking in streamed and non-streamed scenarios. - Ensured integration with `usage` plugin and augmented usage records with TTFT data.
1 parent 11f0f90 commit 94c1b25

17 files changed

Lines changed: 353 additions & 0 deletions

internal/redisqueue/plugin.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
7878
detail := requestDetail{
7979
Timestamp: timestamp,
8080
LatencyMs: record.Latency.Milliseconds(),
81+
TTFTMs: record.TTFT.Milliseconds(),
8182
Source: record.Source,
8283
AuthIndex: record.AuthIndex,
8384
Tokens: tokens,
@@ -118,6 +119,7 @@ type queuedUsageDetail struct {
118119
type requestDetail struct {
119120
Timestamp time.Time `json:"timestamp"`
120121
LatencyMs int64 `json:"latency_ms"`
122+
TTFTMs int64 `json:"ttft_ms"`
121123
Source string `json:"source"`
122124
AuthIndex string `json:"auth_index"`
123125
Tokens tokenStats `json:"tokens"`

internal/runtime/executor/aistudio_executor.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,16 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
168168
AuthValue: authValue,
169169
})
170170

171+
reporter.StartResponseTTFT()
171172
wsResp, err := e.relay.NonStream(ctx, authID, wsReq)
172173
if err != nil {
173174
helps.RecordAPIResponseError(ctx, e.cfg, err)
174175
return resp, err
175176
}
176177
helps.RecordAPIResponseMetadata(ctx, e.cfg, wsResp.Status, wsResp.Headers.Clone())
178+
reporter.StartResponseTTFT()
177179
if len(wsResp.Body) > 0 {
180+
reporter.MarkFirstResponseByte()
178181
helps.AppendAPIResponseChunk(ctx, e.cfg, wsResp.Body)
179182
}
180183
if wsResp.Status < 200 || wsResp.Status >= 300 {
@@ -231,6 +234,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
231234
AuthType: authType,
232235
AuthValue: authValue,
233236
})
237+
reporter.StartResponseTTFT()
234238
wsStream, err := e.relay.Stream(ctx, authID, wsReq)
235239
if err != nil {
236240
helps.RecordAPIResponseError(ctx, e.cfg, err)
@@ -246,10 +250,12 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
246250
metadataLogged := false
247251
if firstEvent.Status > 0 {
248252
helps.RecordAPIResponseMetadata(ctx, e.cfg, firstEvent.Status, firstEvent.Headers.Clone())
253+
reporter.StartResponseTTFT()
249254
metadataLogged = true
250255
}
251256
var body bytes.Buffer
252257
if len(firstEvent.Payload) > 0 {
258+
reporter.MarkFirstResponseByte()
253259
helps.AppendAPIResponseChunk(ctx, e.cfg, firstEvent.Payload)
254260
body.Write(firstEvent.Payload)
255261
}
@@ -266,9 +272,11 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
266272
}
267273
if !metadataLogged && event.Status > 0 {
268274
helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
275+
reporter.StartResponseTTFT()
269276
metadataLogged = true
270277
}
271278
if len(event.Payload) > 0 {
279+
reporter.MarkFirstResponseByte()
272280
helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload)
273281
body.Write(event.Payload)
274282
}
@@ -297,10 +305,12 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
297305
case wsrelay.MessageTypeStreamStart:
298306
if !metadataLogged && event.Status > 0 {
299307
helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
308+
reporter.StartResponseTTFT()
300309
metadataLogged = true
301310
}
302311
case wsrelay.MessageTypeStreamChunk:
303312
if len(event.Payload) > 0 {
313+
reporter.MarkFirstResponseByte()
304314
helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload)
305315
filtered := helps.FilterSSEUsageMetadata(event.Payload)
306316
if detail, ok := helps.ParseGeminiStreamUsage(filtered); ok {
@@ -321,9 +331,11 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
321331
case wsrelay.MessageTypeHTTPResp:
322332
if !metadataLogged && event.Status > 0 {
323333
helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone())
334+
reporter.StartResponseTTFT()
324335
metadataLogged = true
325336
}
326337
if len(event.Payload) > 0 {
338+
reporter.MarkFirstResponseByte()
327339
helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload)
328340
}
329341
lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, event.Payload, &param)
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
package executor
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
"net/http/httptest"
8+
"strings"
9+
"sync"
10+
"testing"
11+
"time"
12+
13+
"github.com/gorilla/websocket"
14+
"github.com/router-for-me/CLIProxyAPI/v7/internal/config"
15+
"github.com/router-for-me/CLIProxyAPI/v7/internal/wsrelay"
16+
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/auth"
17+
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor"
18+
"github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/usage"
19+
sdktranslator "github.com/router-for-me/CLIProxyAPI/v7/sdk/translator"
20+
)
21+
22+
func TestAIStudioExecutorExecuteStartsTTFTBeforeRelayWait(t *testing.T) {
23+
const authID = "aistudio-ttft-auth"
24+
delay := 40 * time.Millisecond
25+
connected := make(chan struct{})
26+
var connectedOnce sync.Once
27+
relay := wsrelay.NewManager(wsrelay.Options{
28+
ProviderFactory: func(*http.Request) (string, error) {
29+
return authID, nil
30+
},
31+
OnConnected: func(provider string) {
32+
if provider == authID {
33+
connectedOnce.Do(func() {
34+
close(connected)
35+
})
36+
}
37+
},
38+
})
39+
server := httptest.NewServer(relay.Handler())
40+
defer server.Close()
41+
defer func() {
42+
if errStop := relay.Stop(context.Background()); errStop != nil {
43+
t.Errorf("relay stop error = %v", errStop)
44+
}
45+
}()
46+
47+
wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + relay.Path()
48+
conn, _, errDial := websocket.DefaultDialer.Dial(wsURL, nil)
49+
if errDial != nil {
50+
t.Fatalf("dial websocket: %v", errDial)
51+
}
52+
defer func() {
53+
if errClose := conn.Close(); errClose != nil {
54+
t.Errorf("websocket close error = %v", errClose)
55+
}
56+
}()
57+
select {
58+
case <-connected:
59+
case <-time.After(time.Second):
60+
t.Fatal("timed out waiting for relay connection")
61+
}
62+
63+
clientDone := make(chan error, 1)
64+
go func() {
65+
var msg wsrelay.Message
66+
if errReadJSON := conn.ReadJSON(&msg); errReadJSON != nil {
67+
clientDone <- fmt.Errorf("read relay request: %w", errReadJSON)
68+
return
69+
}
70+
if msg.Type != wsrelay.MessageTypeHTTPReq {
71+
clientDone <- fmt.Errorf("relay message type = %q, want %q", msg.Type, wsrelay.MessageTypeHTTPReq)
72+
return
73+
}
74+
time.Sleep(delay)
75+
response := wsrelay.Message{
76+
ID: msg.ID,
77+
Type: wsrelay.MessageTypeHTTPResp,
78+
Payload: map[string]any{
79+
"status": float64(http.StatusOK),
80+
"headers": map[string]any{"Content-Type": "application/json"},
81+
"body": `{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}`,
82+
},
83+
}
84+
if errWriteJSON := conn.WriteJSON(response); errWriteJSON != nil {
85+
clientDone <- fmt.Errorf("write relay response: %w", errWriteJSON)
86+
return
87+
}
88+
clientDone <- nil
89+
}()
90+
91+
plugin := &captureAIStudioUsagePlugin{records: make(chan usage.Record, 16)}
92+
usage.RegisterPlugin(plugin)
93+
exec := NewAIStudioExecutor(&config.Config{}, "aistudio", relay)
94+
_, errExecute := exec.Execute(context.Background(), &cliproxyauth.Auth{ID: authID, Provider: "aistudio"}, cliproxyexecutor.Request{
95+
Model: "gemini-3.1-pro-preview",
96+
Payload: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`),
97+
}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FormatGemini})
98+
if errExecute != nil {
99+
t.Fatalf("Execute() error = %v", errExecute)
100+
}
101+
if errClient := <-clientDone; errClient != nil {
102+
t.Fatal(errClient)
103+
}
104+
105+
record := waitForAIStudioUsageRecord(t, plugin.records, "gemini-3.1-pro-preview")
106+
if record.TTFT < delay {
107+
t.Fatalf("ttft = %v, want >= %v", record.TTFT, delay)
108+
}
109+
}
110+
111+
type captureAIStudioUsagePlugin struct {
112+
records chan usage.Record
113+
}
114+
115+
func (p *captureAIStudioUsagePlugin) HandleUsage(_ context.Context, record usage.Record) {
116+
if p == nil {
117+
return
118+
}
119+
select {
120+
case p.records <- record:
121+
default:
122+
}
123+
}
124+
125+
func waitForAIStudioUsageRecord(t *testing.T, records <-chan usage.Record, model string) usage.Record {
126+
t.Helper()
127+
timeout := time.After(2 * time.Second)
128+
for {
129+
select {
130+
case record := <-records:
131+
if record.Provider == "aistudio" && record.Model == model {
132+
return record
133+
}
134+
case <-timeout:
135+
t.Fatalf("timed out waiting for AI Studio usage record")
136+
}
137+
}
138+
}

internal/runtime/executor/antigravity_executor.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
529529

530530
baseURLs := antigravityBaseURLFallbackOrder(auth)
531531
httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)
532+
httpClient = reporter.TrackHTTPClient(httpClient)
532533
attempts := antigravityRetryAttempts(auth, e.cfg)
533534

534535
attemptLoop:
@@ -728,6 +729,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
728729

729730
baseURLs := antigravityBaseURLFallbackOrder(auth)
730731
httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)
732+
httpClient = reporter.TrackHTTPClient(httpClient)
731733

732734
attempts := antigravityRetryAttempts(auth, e.cfg)
733735

@@ -1190,6 +1192,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
11901192

11911193
baseURLs := antigravityBaseURLFallbackOrder(auth)
11921194
httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)
1195+
httpClient = reporter.TrackHTTPClient(httpClient)
11931196

11941197
attempts := antigravityRetryAttempts(auth, e.cfg)
11951198

internal/runtime/executor/claude_executor.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
227227
})
228228

229229
httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0)
230+
httpClient = reporter.TrackHTTPClient(httpClient)
230231
httpResp, err := httpClient.Do(httpReq)
231232
if err != nil {
232233
helps.RecordAPIResponseError(ctx, e.cfg, err)
@@ -402,6 +403,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
402403
})
403404

404405
httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0)
406+
httpClient = reporter.TrackHTTPClient(httpClient)
405407
httpResp, err := httpClient.Do(httpReq)
406408
if err != nil {
407409
helps.RecordAPIResponseError(ctx, e.cfg, err)

internal/runtime/executor/codex_executor.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
311311
AuthValue: authValue,
312312
})
313313
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
314+
httpClient = reporter.TrackHTTPClient(httpClient)
314315
httpResp, err := httpClient.Do(httpReq)
315316
if err != nil {
316317
helps.RecordAPIResponseError(ctx, e.cfg, err)
@@ -468,6 +469,7 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
468469
AuthValue: authValue,
469470
})
470471
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
472+
httpClient = reporter.TrackHTTPClient(httpClient)
471473
httpResp, err := httpClient.Do(httpReq)
472474
if err != nil {
473475
helps.RecordAPIResponseError(ctx, e.cfg, err)
@@ -571,6 +573,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
571573
})
572574

573575
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
576+
httpClient = reporter.TrackHTTPClient(httpClient)
574577
httpResp, err := httpClient.Do(httpReq)
575578
if err != nil {
576579
helps.RecordAPIResponseError(ctx, e.cfg, err)

internal/runtime/executor/codex_openai_images.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ func (e *CodexExecutor) executeOpenAIImage(ctx context.Context, auth *cliproxyau
107107
recordCodexOpenAIImageRequest(ctx, e.cfg, e.Identifier(), auth, url, httpReq.Header.Clone(), body)
108108

109109
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
110+
httpClient = reporter.TrackHTTPClient(httpClient)
110111
httpResp, errDo := httpClient.Do(httpReq)
111112
if errDo != nil {
112113
helps.RecordAPIResponseError(ctx, e.cfg, errDo)
@@ -196,6 +197,7 @@ func (e *CodexExecutor) executeOpenAIImageStream(ctx context.Context, auth *clip
196197
recordCodexOpenAIImageRequest(ctx, e.cfg, e.Identifier(), auth, url, httpReq.Header.Clone(), body)
197198

198199
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
200+
httpClient = reporter.TrackHTTPClient(httpClient)
199201
httpResp, errDo := httpClient.Do(httpReq)
200202
if errDo != nil {
201203
helps.RecordAPIResponseError(ctx, e.cfg, errDo)

internal/runtime/executor/codex_websockets_executor.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
269269
return resp, errDial
270270
}
271271
recordAPIWebsocketHandshake(ctx, e.cfg, respHS)
272+
reporter.StartResponseTTFT()
272273
if sess == nil {
273274
logCodexWebsocketConnected(executionSessionID, authID, wsURL)
274275
defer func() {
@@ -312,6 +313,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
312313
AuthValue: authValue,
313314
})
314315
recordAPIWebsocketHandshake(ctx, e.cfg, respHSRetry)
316+
reporter.StartResponseTTFT()
315317
if errSendRetry := writeCodexWebsocketMessage(sess, connRetry, wsReqBodyRetry); errSendRetry == nil {
316318
conn = connRetry
317319
wsReqBody = wsReqBodyRetry
@@ -356,6 +358,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
356358
if len(payload) == 0 {
357359
continue
358360
}
361+
reporter.MarkFirstResponseByte()
359362
helps.AppendAPIWebsocketResponse(ctx, e.cfg, payload)
360363

361364
if wsErr, ok := parseCodexWebsocketError(payload); ok {
@@ -476,6 +479,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
476479
return nil, errDial
477480
}
478481
recordAPIWebsocketHandshake(ctx, e.cfg, respHS)
482+
reporter.StartResponseTTFT()
479483

480484
if sess == nil {
481485
logCodexWebsocketConnected(executionSessionID, authID, wsURL)
@@ -514,6 +518,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
514518
AuthValue: authValue,
515519
})
516520
recordAPIWebsocketHandshake(ctx, e.cfg, respHSRetry)
521+
reporter.StartResponseTTFT()
517522
if errSendRetry := writeCodexWebsocketMessage(sess, connRetry, wsReqBodyRetry); errSendRetry != nil {
518523
helps.RecordAPIWebsocketError(ctx, e.cfg, "send_retry", errSendRetry)
519524
e.invalidateUpstreamConn(sess, connRetry, "send_error", errSendRetry)
@@ -606,6 +611,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
606611
if len(payload) == 0 {
607612
continue
608613
}
614+
reporter.MarkFirstResponseByte()
609615
helps.AppendAPIWebsocketResponse(ctx, e.cfg, payload)
610616

611617
if wsErr, ok := parseCodexWebsocketError(payload); ok {

internal/runtime/executor/gemini_cli_executor.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
158158
}
159159

160160
httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
161+
httpClient = reporter.TrackHTTPClient(httpClient)
161162
respCtx := context.WithValue(ctx, "alt", opts.Alt)
162163

163164
var authID, authLabel, authType, authValue string
@@ -310,6 +311,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
310311
}
311312

312313
httpClient := newHTTPClient(ctx, e.cfg, auth, 0)
314+
httpClient = reporter.TrackHTTPClient(httpClient)
313315
respCtx := context.WithValue(ctx, "alt", opts.Alt)
314316

315317
var authID, authLabel, authType, authValue string

internal/runtime/executor/gemini_executor.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
183183
})
184184

185185
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
186+
httpClient = reporter.TrackHTTPClient(httpClient)
186187
httpResp, err := httpClient.Do(httpReq)
187188
if err != nil {
188189
helps.RecordAPIResponseError(ctx, e.cfg, err)
@@ -289,6 +290,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
289290
})
290291

291292
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
293+
httpClient = reporter.TrackHTTPClient(httpClient)
292294
httpResp, err := httpClient.Do(httpReq)
293295
if err != nil {
294296
helps.RecordAPIResponseError(ctx, e.cfg, err)

0 commit comments

Comments
 (0)