Skip to content

Commit 90a2442

Browse files
committed
fix(usage): reset first-byte latency per upstream attempt
Track first-byte timestamps per upstream attempt so retries do not reuse stale response times and drop successful latency metrics.
1 parent be3aa8a commit 90a2442

File tree

3 files changed

+74
-7
lines changed

3 files changed

+74
-7
lines changed

internal/runtime/executor/helps/logging_helpers.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ import (
2020
)
2121

2222
const (
23-
apiAttemptsKey = "API_UPSTREAM_ATTEMPTS"
24-
apiRequestKey = "API_REQUEST"
25-
apiResponseKey = "API_RESPONSE"
26-
apiWebsocketTimelineKey = "API_WEBSOCKET_TIMELINE"
23+
apiAttemptsKey = "API_UPSTREAM_ATTEMPTS"
24+
apiRequestKey = "API_REQUEST"
25+
apiResponseKey = "API_RESPONSE"
26+
apiWebsocketTimelineKey = "API_WEBSOCKET_TIMELINE"
27+
apiAttemptResponseTimestampKey = "API_ATTEMPT_RESPONSE_TIMESTAMP"
2728
)
2829

2930
// UpstreamRequestLog captures the outbound upstream request details for logging.
@@ -54,10 +55,13 @@ type upstreamAttempt struct {
5455

5556
// RecordAPIRequest stores the upstream request metadata in Gin context for request logging.
5657
func RecordAPIRequest(ctx context.Context, cfg *config.Config, info UpstreamRequestLog) {
58+
ginCtx := ginContextFrom(ctx)
59+
if ginCtx != nil {
60+
ginCtx.Set(apiAttemptResponseTimestampKey, time.Time{})
61+
}
5762
if cfg == nil || !cfg.RequestLog {
5863
return
5964
}
60-
ginCtx := ginContextFrom(ctx)
6165
if ginCtx == nil {
6266
return
6367
}
@@ -206,10 +210,13 @@ func AppendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
206210

207211
// RecordAPIWebsocketRequest stores an upstream websocket request event in Gin context.
208212
func RecordAPIWebsocketRequest(ctx context.Context, cfg *config.Config, info UpstreamRequestLog) {
213+
ginCtx := ginContextFrom(ctx)
214+
if ginCtx != nil {
215+
ginCtx.Set(apiAttemptResponseTimestampKey, time.Time{})
216+
}
209217
if cfg == nil || !cfg.RequestLog {
210218
return
211219
}
212-
ginCtx := ginContextFrom(ctx)
213220
if ginCtx == nil {
214221
return
215222
}
@@ -451,10 +458,16 @@ func markAPIResponseTimestamp(ginCtx *gin.Context) {
451458
if ginCtx == nil {
452459
return
453460
}
461+
now := time.Now()
462+
if value, exists := ginCtx.Get(apiAttemptResponseTimestampKey); !exists {
463+
ginCtx.Set(apiAttemptResponseTimestampKey, now)
464+
} else if timestamp, ok := value.(time.Time); !ok || timestamp.IsZero() {
465+
ginCtx.Set(apiAttemptResponseTimestampKey, now)
466+
}
454467
if _, exists := ginCtx.Get("API_RESPONSE_TIMESTAMP"); exists {
455468
return
456469
}
457-
ginCtx.Set("API_RESPONSE_TIMESTAMP", time.Now())
470+
ginCtx.Set("API_RESPONSE_TIMESTAMP", now)
458471
}
459472

460473
func writeHeaders(builder *strings.Builder, headers http.Header) {

internal/runtime/executor/helps/usage_helpers.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,13 @@ func apiResponseTimestampFromContext(ctx context.Context) time.Time {
140140
if !ok || ginCtx == nil {
141141
return time.Time{}
142142
}
143+
if value, exists := ginCtx.Get(apiAttemptResponseTimestampKey); exists {
144+
timestamp, ok := value.(time.Time)
145+
if !ok {
146+
return time.Time{}
147+
}
148+
return timestamp
149+
}
143150
value, exists := ginCtx.Get("API_RESPONSE_TIMESTAMP")
144151
if !exists {
145152
return time.Time{}

internal/runtime/executor/helps/usage_helpers_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,50 @@ func TestUsageReporterBuildRecordIncludesFirstByteLatency(t *testing.T) {
9090
t.Fatalf("first byte latency = %v, want 250ms", record.FirstByteLatency)
9191
}
9292
}
93+
94+
func TestUsageReporterBuildRecordPrefersAttemptFirstByteLatency(t *testing.T) {
95+
gin.SetMode(gin.TestMode)
96+
97+
recorder := httptest.NewRecorder()
98+
ginCtx, _ := gin.CreateTestContext(recorder)
99+
requestedAt := time.Now().Add(-1500 * time.Millisecond)
100+
staleFirstByteAt := requestedAt.Add(-200 * time.Millisecond)
101+
attemptFirstByteAt := requestedAt.Add(250 * time.Millisecond)
102+
ginCtx.Set("API_RESPONSE_TIMESTAMP", staleFirstByteAt)
103+
ginCtx.Set(apiAttemptResponseTimestampKey, attemptFirstByteAt)
104+
105+
ctx := context.WithValue(context.Background(), "gin", ginCtx)
106+
reporter := &UsageReporter{
107+
provider: "openai",
108+
model: "gpt-5.4",
109+
requestedAt: requestedAt,
110+
}
111+
112+
record := reporter.buildRecord(ctx, usage.Detail{TotalTokens: 3}, false)
113+
if record.FirstByteLatency != 250*time.Millisecond {
114+
t.Fatalf("first byte latency = %v, want 250ms", record.FirstByteLatency)
115+
}
116+
}
117+
118+
func TestUsageReporterBuildRecordDoesNotFallbackToGlobalTimestampAfterAttemptReset(t *testing.T) {
119+
gin.SetMode(gin.TestMode)
120+
121+
recorder := httptest.NewRecorder()
122+
ginCtx, _ := gin.CreateTestContext(recorder)
123+
requestedAt := time.Now().Add(-1500 * time.Millisecond)
124+
staleFirstByteAt := requestedAt.Add(-200 * time.Millisecond)
125+
ginCtx.Set("API_RESPONSE_TIMESTAMP", staleFirstByteAt)
126+
ginCtx.Set(apiAttemptResponseTimestampKey, time.Time{})
127+
128+
ctx := context.WithValue(context.Background(), "gin", ginCtx)
129+
reporter := &UsageReporter{
130+
provider: "openai",
131+
model: "gpt-5.4",
132+
requestedAt: requestedAt,
133+
}
134+
135+
record := reporter.buildRecord(ctx, usage.Detail{TotalTokens: 3}, false)
136+
if record.FirstByteLatency != 0 {
137+
t.Fatalf("first byte latency = %v, want 0", record.FirstByteLatency)
138+
}
139+
}

0 commit comments

Comments
 (0)