Skip to content

Commit 3a4bfcf

Browse files
committed
feat(usage): expose first-byte latency in request stats
1 parent 67b4b74 commit 3a4bfcf

File tree

6 files changed

+170
-47
lines changed

6 files changed

+170
-47
lines changed

internal/runtime/executor/helps/logging_helpers.go

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,13 @@ func RecordAPIRequest(ctx context.Context, cfg *config.Config, info UpstreamRequ
101101

102102
// RecordAPIResponseMetadata captures upstream response status/header information for the latest attempt.
103103
func RecordAPIResponseMetadata(ctx context.Context, cfg *config.Config, status int, headers http.Header) {
104+
ginCtx := ginContextFrom(ctx)
105+
if ginCtx != nil && !isStreamingResponseHeaders(headers) {
106+
markAPIResponseTimestamp(ginCtx)
107+
}
104108
if cfg == nil || !cfg.RequestLog {
105109
return
106110
}
107-
ginCtx := ginContextFrom(ctx)
108111
if ginCtx == nil {
109112
return
110113
}
@@ -125,7 +128,13 @@ func RecordAPIResponseMetadata(ctx context.Context, cfg *config.Config, status i
125128
updateAggregatedResponse(ginCtx, attempts)
126129
}
127130

128-
// RecordAPIResponseError adds an error entry for the latest attempt when no HTTP response is available.
131+
func isStreamingResponseHeaders(headers http.Header) bool {
132+
if len(headers) == 0 {
133+
return false
134+
}
135+
return strings.Contains(strings.ToLower(strings.TrimSpace(headers.Get("Content-Type"))), "text/event-stream")
136+
}
137+
129138
func RecordAPIResponseError(ctx context.Context, cfg *config.Config, err error) {
130139
if cfg == nil || !cfg.RequestLog || err == nil {
131140
return
@@ -152,14 +161,17 @@ func RecordAPIResponseError(ctx context.Context, cfg *config.Config, err error)
152161

153162
// AppendAPIResponseChunk appends an upstream response chunk to Gin context for request logging.
154163
func AppendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byte) {
155-
if cfg == nil || !cfg.RequestLog {
156-
return
157-
}
158164
data := bytes.TrimSpace(chunk)
159165
if len(data) == 0 {
160166
return
161167
}
162168
ginCtx := ginContextFrom(ctx)
169+
if ginCtx != nil {
170+
markAPIResponseTimestamp(ginCtx)
171+
}
172+
if cfg == nil || !cfg.RequestLog {
173+
return
174+
}
163175
if ginCtx == nil {
164176
return
165177
}
@@ -283,18 +295,20 @@ func WebsocketUpgradeRequestURL(rawURL string) string {
283295

284296
// AppendAPIWebsocketResponse stores an upstream websocket response frame in Gin context.
285297
func AppendAPIWebsocketResponse(ctx context.Context, cfg *config.Config, payload []byte) {
286-
if cfg == nil || !cfg.RequestLog {
287-
return
288-
}
289298
data := bytes.TrimSpace(payload)
290299
if len(data) == 0 {
291300
return
292301
}
293302
ginCtx := ginContextFrom(ctx)
303+
if ginCtx != nil {
304+
markAPIResponseTimestamp(ginCtx)
305+
}
306+
if cfg == nil || !cfg.RequestLog {
307+
return
308+
}
294309
if ginCtx == nil {
295310
return
296311
}
297-
markAPIResponseTimestamp(ginCtx)
298312

299313
builder := &strings.Builder{}
300314
builder.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
@@ -307,14 +321,16 @@ func AppendAPIWebsocketResponse(ctx context.Context, cfg *config.Config, payload
307321

308322
// RecordAPIWebsocketError stores an upstream websocket error event in Gin context.
309323
func RecordAPIWebsocketError(ctx context.Context, cfg *config.Config, stage string, err error) {
324+
ginCtx := ginContextFrom(ctx)
325+
if ginCtx != nil {
326+
markAPIResponseTimestamp(ginCtx)
327+
}
310328
if cfg == nil || !cfg.RequestLog || err == nil {
311329
return
312330
}
313-
ginCtx := ginContextFrom(ctx)
314331
if ginCtx == nil {
315332
return
316333
}
317-
markAPIResponseTimestamp(ginCtx)
318334

319335
builder := &strings.Builder{}
320336
builder.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
@@ -328,6 +344,9 @@ func RecordAPIWebsocketError(ctx context.Context, cfg *config.Config, stage stri
328344
}
329345

330346
func ginContextFrom(ctx context.Context) *gin.Context {
347+
if ctx == nil {
348+
return nil
349+
}
331350
ginCtx, _ := ctx.Value("gin").(*gin.Context)
332351
return ginCtx
333352
}

internal/runtime/executor/helps/usage_helpers.go

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func (r *UsageReporter) publishWithOutcome(ctx context.Context, detail usage.Det
7070
}
7171
}
7272
r.once.Do(func() {
73-
usage.PublishRecord(ctx, r.buildRecord(detail, failed))
73+
usage.PublishRecord(ctx, r.buildRecord(ctx, detail, failed))
7474
})
7575
}
7676

@@ -83,25 +83,26 @@ func (r *UsageReporter) EnsurePublished(ctx context.Context) {
8383
return
8484
}
8585
r.once.Do(func() {
86-
usage.PublishRecord(ctx, r.buildRecord(usage.Detail{}, false))
86+
usage.PublishRecord(ctx, r.buildRecord(ctx, usage.Detail{}, false))
8787
})
8888
}
8989

90-
func (r *UsageReporter) buildRecord(detail usage.Detail, failed bool) usage.Record {
90+
func (r *UsageReporter) buildRecord(ctx context.Context, detail usage.Detail, failed bool) usage.Record {
9191
if r == nil {
9292
return usage.Record{Detail: detail, Failed: failed}
9393
}
9494
return usage.Record{
95-
Provider: r.provider,
96-
Model: r.model,
97-
Source: r.source,
98-
APIKey: r.apiKey,
99-
AuthID: r.authID,
100-
AuthIndex: r.authIndex,
101-
RequestedAt: r.requestedAt,
102-
Latency: r.latency(),
103-
Failed: failed,
104-
Detail: detail,
95+
Provider: r.provider,
96+
Model: r.model,
97+
Source: r.source,
98+
APIKey: r.apiKey,
99+
AuthID: r.authID,
100+
AuthIndex: r.authIndex,
101+
RequestedAt: r.requestedAt,
102+
Latency: r.latency(),
103+
FirstByteLatency: r.firstByteLatency(ctx),
104+
Failed: failed,
105+
Detail: detail,
105106
}
106107
}
107108

@@ -116,6 +117,40 @@ func (r *UsageReporter) latency() time.Duration {
116117
return latency
117118
}
118119

120+
func (r *UsageReporter) firstByteLatency(ctx context.Context) time.Duration {
121+
if r == nil || r.requestedAt.IsZero() {
122+
return 0
123+
}
124+
responseTimestamp := apiResponseTimestampFromContext(ctx)
125+
if responseTimestamp.IsZero() {
126+
return 0
127+
}
128+
latency := responseTimestamp.Sub(r.requestedAt)
129+
if latency <= 0 {
130+
return 0
131+
}
132+
return latency
133+
}
134+
135+
func apiResponseTimestampFromContext(ctx context.Context) time.Time {
136+
if ctx == nil {
137+
return time.Time{}
138+
}
139+
ginCtx, ok := ctx.Value("gin").(*gin.Context)
140+
if !ok || ginCtx == nil {
141+
return time.Time{}
142+
}
143+
value, exists := ginCtx.Get("API_RESPONSE_TIMESTAMP")
144+
if !exists {
145+
return time.Time{}
146+
}
147+
timestamp, ok := value.(time.Time)
148+
if !ok {
149+
return time.Time{}
150+
}
151+
return timestamp
152+
}
153+
119154
func APIKeyFromContext(ctx context.Context) string {
120155
if ctx == nil {
121156
return ""

internal/runtime/executor/helps/usage_helpers_test.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
package helps
22

33
import (
4+
"context"
5+
"net/http/httptest"
46
"testing"
57
"time"
68

9+
"github.com/gin-gonic/gin"
710
"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
811
)
912

@@ -54,11 +57,36 @@ func TestUsageReporterBuildRecordIncludesLatency(t *testing.T) {
5457
requestedAt: time.Now().Add(-1500 * time.Millisecond),
5558
}
5659

57-
record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
60+
record := reporter.buildRecord(context.Background(), usage.Detail{TotalTokens: 3}, false)
5861
if record.Latency < time.Second {
5962
t.Fatalf("latency = %v, want >= 1s", record.Latency)
6063
}
6164
if record.Latency > 3*time.Second {
6265
t.Fatalf("latency = %v, want <= 3s", record.Latency)
6366
}
67+
if record.FirstByteLatency != 0 {
68+
t.Fatalf("first byte latency = %v, want 0", record.FirstByteLatency)
69+
}
70+
}
71+
72+
func TestUsageReporterBuildRecordIncludesFirstByteLatency(t *testing.T) {
73+
gin.SetMode(gin.TestMode)
74+
75+
recorder := httptest.NewRecorder()
76+
ginCtx, _ := gin.CreateTestContext(recorder)
77+
requestedAt := time.Now().Add(-1500 * time.Millisecond)
78+
firstByteAt := requestedAt.Add(250 * time.Millisecond)
79+
ginCtx.Set("API_RESPONSE_TIMESTAMP", firstByteAt)
80+
81+
ctx := context.WithValue(context.Background(), "gin", ginCtx)
82+
reporter := &UsageReporter{
83+
provider: "openai",
84+
model: "gpt-5.4",
85+
requestedAt: requestedAt,
86+
}
87+
88+
record := reporter.buildRecord(ctx, usage.Detail{TotalTokens: 3}, false)
89+
if record.FirstByteLatency != 250*time.Millisecond {
90+
t.Fatalf("first byte latency = %v, want 250ms", record.FirstByteLatency)
91+
}
6492
}

internal/usage/logger_plugin.go

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,13 @@ type modelStats struct {
8989

9090
// RequestDetail stores the timestamp, latency, and token usage for a single request.
9191
type RequestDetail struct {
92-
Timestamp time.Time `json:"timestamp"`
93-
LatencyMs int64 `json:"latency_ms"`
94-
Source string `json:"source"`
95-
AuthIndex string `json:"auth_index"`
96-
Tokens TokenStats `json:"tokens"`
97-
Failed bool `json:"failed"`
92+
Timestamp time.Time `json:"timestamp"`
93+
LatencyMs int64 `json:"latency_ms"`
94+
FirstByteLatencyMs *int64 `json:"first_byte_latency_ms,omitempty"`
95+
Source string `json:"source"`
96+
AuthIndex string `json:"auth_index"`
97+
Tokens TokenStats `json:"tokens"`
98+
Failed bool `json:"failed"`
9899
}
99100

100101
// TokenStats captures the token usage breakdown for a request.
@@ -198,12 +199,13 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record)
198199
s.apis[statsKey] = stats
199200
}
200201
s.updateAPIStats(stats, modelName, RequestDetail{
201-
Timestamp: timestamp,
202-
LatencyMs: normaliseLatency(record.Latency),
203-
Source: record.Source,
204-
AuthIndex: record.AuthIndex,
205-
Tokens: detail,
206-
Failed: failed,
202+
Timestamp: timestamp,
203+
LatencyMs: normaliseLatency(record.Latency),
204+
FirstByteLatencyMs: normaliseOptionalLatency(record.FirstByteLatency),
205+
Source: record.Source,
206+
AuthIndex: record.AuthIndex,
207+
Tokens: detail,
208+
Failed: failed,
207209
})
208210

209211
s.requestsByDay[dayKey]++
@@ -475,6 +477,14 @@ func normaliseLatency(latency time.Duration) int64 {
475477
return latency.Milliseconds()
476478
}
477479

480+
func normaliseOptionalLatency(latency time.Duration) *int64 {
481+
if latency <= 0 {
482+
return nil
483+
}
484+
value := latency.Milliseconds()
485+
return &value
486+
}
487+
478488
func formatHour(hour int) string {
479489
if hour < 0 {
480490
hour = 0

internal/usage/logger_plugin_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,36 @@ func TestRequestStatisticsRecordIncludesLatency(t *testing.T) {
3030
if details[0].LatencyMs != 1500 {
3131
t.Fatalf("latency_ms = %d, want 1500", details[0].LatencyMs)
3232
}
33+
if details[0].FirstByteLatencyMs != nil {
34+
t.Fatalf("first_byte_latency_ms = %v, want nil", *details[0].FirstByteLatencyMs)
35+
}
36+
}
37+
38+
func TestRequestStatisticsRecordIncludesFirstByteLatency(t *testing.T) {
39+
stats := NewRequestStatistics()
40+
stats.Record(context.Background(), coreusage.Record{
41+
APIKey: "test-key",
42+
Model: "gpt-5.4",
43+
RequestedAt: time.Date(2026, 3, 20, 12, 0, 0, 0, time.UTC),
44+
FirstByteLatency: 250 * time.Millisecond,
45+
Detail: coreusage.Detail{
46+
InputTokens: 10,
47+
OutputTokens: 20,
48+
TotalTokens: 30,
49+
},
50+
})
51+
52+
snapshot := stats.Snapshot()
53+
details := snapshot.APIs["test-key"].Models["gpt-5.4"].Details
54+
if len(details) != 1 {
55+
t.Fatalf("details len = %d, want 1", len(details))
56+
}
57+
if details[0].FirstByteLatencyMs == nil {
58+
t.Fatal("first_byte_latency_ms = nil, want value")
59+
}
60+
if *details[0].FirstByteLatencyMs != 250 {
61+
t.Fatalf("first_byte_latency_ms = %d, want 250", *details[0].FirstByteLatencyMs)
62+
}
3363
}
3464

3565
func TestRequestStatisticsMergeSnapshotDedupIgnoresLatency(t *testing.T) {

sdk/cliproxy/usage/manager.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,17 @@ import (
1010

1111
// Record contains the usage statistics captured for a single provider request.
1212
type Record struct {
13-
Provider string
14-
Model string
15-
APIKey string
16-
AuthID string
17-
AuthIndex string
18-
Source string
19-
RequestedAt time.Time
20-
Latency time.Duration
21-
Failed bool
22-
Detail Detail
13+
Provider string
14+
Model string
15+
APIKey string
16+
AuthID string
17+
AuthIndex string
18+
Source string
19+
RequestedAt time.Time
20+
Latency time.Duration
21+
FirstByteLatency time.Duration
22+
Failed bool
23+
Detail Detail
2324
}
2425

2526
// Detail holds the token usage breakdown.

0 commit comments

Comments
 (0)