Skip to content

Commit 71c185f

Browse files
committed
feat(usage): add service tier tracking and defaults in usage reporting
- Introduced `service_tier` metadata key to capture client-requested service tiers. - Updated usage records, context propagation, and plugins to include service tier data. - Added default handling logic for cases where `service_tier` is absent. - Implemented tests for `service_tier` extraction, defaults, and updates across components.
1 parent 65e760a commit 71c185f

10 files changed

Lines changed: 180 additions & 7 deletions

File tree

internal/redisqueue/plugin.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
5252
if reasoningEffort == "" {
5353
reasoningEffort = coreusage.ReasoningEffortFromContext(ctx)
5454
}
55+
serviceTier := strings.TrimSpace(record.ServiceTier)
56+
if serviceTier == "" {
57+
serviceTier = coreusage.ServiceTierFromContext(ctx)
58+
}
5559

5660
tokens := tokenStats{
5761
InputTokens: record.Detail.InputTokens,
@@ -97,6 +101,7 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
97101
APIKey: apiKey,
98102
RequestID: requestID,
99103
ReasoningEffort: reasoningEffort,
104+
ServiceTier: serviceTier,
100105
})
101106
if err != nil {
102107
return
@@ -114,6 +119,7 @@ type queuedUsageDetail struct {
114119
APIKey string `json:"api_key"`
115120
RequestID string `json:"request_id"`
116121
ReasoningEffort string `json:"reasoning_effort"`
122+
ServiceTier string `json:"service_tier"`
117123
}
118124

119125
type requestDetail struct {

internal/redisqueue/plugin_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
3333
AuthType: "apikey",
3434
Source: "user@example.com",
3535
ReasoningEffort: "medium",
36+
ServiceTier: "priority",
3637
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
3738
Latency: 1500 * time.Millisecond,
3839
Detail: coreusage.Detail{
@@ -53,6 +54,7 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
5354
requireMissingField(t, payload, "user_api_key")
5455
requireStringField(t, payload, "request_id", "ctx-request-id")
5556
requireStringField(t, payload, "reasoning_effort", "medium")
57+
requireStringField(t, payload, "service_tier", "priority")
5658
requireHeaderField(t, payload, "response_headers", "X-Upstream-Request-Id", []string{"upstream-req-1"})
5759
requireHeaderField(t, payload, "response_headers", "Retry-After", []string{"30"})
5860
requireBoolField(t, payload, "failed", false)

internal/runtime/executor/helps/usage_helpers.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type UsageReporter struct {
3030
apiKey string
3131
source string
3232
reasoning string
33+
serviceTier string
3334
requestedAt time.Time
3435
ttftMu sync.RWMutex
3536
ttft time.Duration
@@ -53,6 +54,7 @@ func NewUsageReporter(ctx context.Context, provider, model string, auth *cliprox
5354
source: resolveUsageSource(auth, apiKey),
5455
authType: resolveUsageAuthType(auth),
5556
reasoning: usage.ReasoningEffortFromContext(ctx),
57+
serviceTier: usage.ServiceTierFromContext(ctx),
5658
}
5759
if auth != nil {
5860
reporter.authID = auth.ID
@@ -78,6 +80,7 @@ func (r *UsageReporter) SetTranslatedReasoningEffort(payload []byte, format stri
7880
return
7981
}
8082
r.reasoning = thinking.ExtractTranslatedReasoningEffort(payload, format)
83+
r.serviceTier = extractServiceTierFromPayload(payload)
8184
}
8285

8386
func (r *UsageReporter) TrackHTTPClient(client *http.Client) *http.Client {
@@ -239,6 +242,7 @@ func (r *UsageReporter) buildRecordForModel(model string, detail usage.Detail, f
239242
AuthIndex: r.authIndex,
240243
AuthType: r.authType,
241244
ReasoningEffort: r.reasoning,
245+
ServiceTier: r.serviceTier,
242246
RequestedAt: r.requestedAt,
243247
Latency: r.latency(),
244248
TTFT: r.ttftDuration(),
@@ -248,6 +252,19 @@ func (r *UsageReporter) buildRecordForModel(model string, detail usage.Detail, f
248252
}
249253
}
250254

255+
func extractServiceTierFromPayload(payload []byte) string {
256+
if len(payload) == 0 {
257+
return usage.DefaultServiceTier
258+
}
259+
for _, path := range []string{"service_tier", "request.service_tier", "response.service_tier"} {
260+
serviceTier := strings.TrimSpace(gjson.GetBytes(payload, path).String())
261+
if serviceTier != "" {
262+
return serviceTier
263+
}
264+
}
265+
return usage.DefaultServiceTier
266+
}
267+
251268
func failFromErrors(errs ...error) usage.Failure {
252269
for _, err := range errs {
253270
if err == nil {

internal/runtime/executor/helps/usage_helpers_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,39 @@ func TestUsageReporterBuildRecordIncludesReasoningEffort(t *testing.T) {
241241
}
242242
}
243243

244+
func TestUsageReporterBuildRecordIncludesServiceTier(t *testing.T) {
245+
ctx := usage.WithServiceTier(context.Background(), "priority")
246+
reporter := NewUsageReporter(ctx, "openai", "gpt-5.4", nil)
247+
248+
record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
249+
if record.ServiceTier != "priority" {
250+
t.Fatalf("service tier = %q, want %q", record.ServiceTier, "priority")
251+
}
252+
}
253+
254+
func TestUsageReporterSetTranslatedReasoningEffortUpdatesServiceTier(t *testing.T) {
255+
reporter := NewUsageReporter(context.Background(), "openai", "gpt-5.4", nil)
256+
257+
reporter.SetTranslatedReasoningEffort([]byte(`{"service_tier":"priority"}`), "openai")
258+
259+
record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
260+
if record.ServiceTier != "priority" {
261+
t.Fatalf("service tier = %q, want %q", record.ServiceTier, "priority")
262+
}
263+
}
264+
265+
func TestUsageReporterSetTranslatedReasoningEffortDefaultsServiceTierWhenRemoved(t *testing.T) {
266+
ctx := usage.WithServiceTier(context.Background(), "priority")
267+
reporter := NewUsageReporter(ctx, "openai", "gpt-5.4", nil)
268+
269+
reporter.SetTranslatedReasoningEffort([]byte(`{"model":"gpt-5.4"}`), "openai")
270+
271+
record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
272+
if record.ServiceTier != usage.DefaultServiceTier {
273+
t.Fatalf("service tier = %q, want %q", record.ServiceTier, usage.DefaultServiceTier)
274+
}
275+
}
276+
244277
func TestUsageReporterBuildAdditionalModelRecordSkipsZeroTokens(t *testing.T) {
245278
reporter := &UsageReporter{
246279
provider: "codex",

sdk/api/handlers/handlers.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ import (
2020
"github.com/router-for-me/CLIProxyAPI/v7/internal/util"
2121
coreauth "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/auth"
2222
coreexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor"
23+
coreusage "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/usage"
2324
"github.com/router-for-me/CLIProxyAPI/v7/sdk/config"
2425
sdktranslator "github.com/router-for-me/CLIProxyAPI/v7/sdk/translator"
26+
"github.com/tidwall/gjson"
2527
"golang.org/x/net/context"
2628
)
2729

@@ -242,6 +244,21 @@ func setReasoningEffortMetadata(meta map[string]any, handlerType, model string,
242244
meta[coreexecutor.ReasoningEffortMetadataKey] = effort
243245
}
244246

247+
func setServiceTierMetadata(meta map[string]any, rawJSON []byte) {
248+
if meta == nil {
249+
return
250+
}
251+
serviceTier := coreusage.DefaultServiceTier
252+
node := gjson.GetBytes(rawJSON, "service_tier")
253+
if node.Exists() {
254+
value := strings.TrimSpace(node.String())
255+
if value != "" {
256+
serviceTier = value
257+
}
258+
}
259+
meta[coreexecutor.ServiceTierMetadataKey] = serviceTier
260+
}
261+
245262
// headersFromContext extracts the original HTTP request headers from the gin context
246263
// embedded in the provided context. This allows session affinity selectors to read
247264
// client headers like X-Amp-Thread-Id.
@@ -562,6 +579,7 @@ func (h *BaseAPIHandler) executeWithAuthManager(ctx context.Context, handlerType
562579
reqMeta := requestExecutionMetadata(ctx)
563580
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
564581
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
582+
setServiceTierMetadata(reqMeta, rawJSON)
565583
payload := rawJSON
566584
if len(payload) == 0 {
567585
payload = nil
@@ -611,6 +629,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
611629
reqMeta := requestExecutionMetadata(ctx)
612630
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
613631
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
632+
setServiceTierMetadata(reqMeta, rawJSON)
614633
payload := rawJSON
615634
if len(payload) == 0 {
616635
payload = nil
@@ -673,6 +692,7 @@ func (h *BaseAPIHandler) executeStreamWithAuthManager(ctx context.Context, handl
673692
reqMeta := requestExecutionMetadata(ctx)
674693
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
675694
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
695+
setServiceTierMetadata(reqMeta, rawJSON)
676696
payload := rawJSON
677697
if len(payload) == 0 {
678698
payload = nil

sdk/api/handlers/handlers_metadata_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,25 @@ func TestSetReasoningEffortMetadataSupportsOpenAIResponses(t *testing.T) {
3838
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "medium")
3939
}
4040
}
41+
42+
func TestSetServiceTierMetadataExtractsValue(t *testing.T) {
43+
meta := make(map[string]any)
44+
45+
setServiceTierMetadata(meta, []byte(`{"service_tier":"priority"}`))
46+
47+
gotServiceTier := meta[coreexecutor.ServiceTierMetadataKey]
48+
if gotServiceTier != "priority" {
49+
t.Fatalf("ServiceTierMetadataKey = %v, want %q", gotServiceTier, "priority")
50+
}
51+
}
52+
53+
func TestSetServiceTierMetadataDefaultsWhenMissing(t *testing.T) {
54+
meta := make(map[string]any)
55+
56+
setServiceTierMetadata(meta, []byte(`{"model":"gpt-5.4"}`))
57+
58+
gotServiceTier := meta[coreexecutor.ServiceTierMetadataKey]
59+
if gotServiceTier != "default" {
60+
t.Fatalf("ServiceTierMetadataKey = %v, want %q", gotServiceTier, "default")
61+
}
62+
}

sdk/cliproxy/auth/conductor.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1731,9 +1731,14 @@ func (m *Manager) prepareRequestAuth(ctx context.Context, executor ProviderExecu
17311731
func contextWithRequestedModelAlias(ctx context.Context, opts cliproxyexecutor.Options, fallback string) context.Context {
17321732
alias := requestedModelAliasFromOptions(opts, fallback)
17331733
ctx = coreusage.WithRequestedModelAlias(ctx, alias)
1734-
if effort := reasoningEffortFromOptions(opts); effort != "" {
1734+
effort := reasoningEffortFromOptions(opts)
1735+
if effort != "" {
17351736
ctx = coreusage.WithReasoningEffort(ctx, effort)
17361737
}
1738+
serviceTier := serviceTierFromOptions(opts)
1739+
if serviceTier != "" {
1740+
ctx = coreusage.WithServiceTier(ctx, serviceTier)
1741+
}
17371742
return ctx
17381743
}
17391744

@@ -1780,6 +1785,24 @@ func reasoningEffortFromOptions(opts cliproxyexecutor.Options) string {
17801785
}
17811786
}
17821787

1788+
func serviceTierFromOptions(opts cliproxyexecutor.Options) string {
1789+
if len(opts.Metadata) == 0 {
1790+
return ""
1791+
}
1792+
raw, ok := opts.Metadata[cliproxyexecutor.ServiceTierMetadataKey]
1793+
if !ok || raw == nil {
1794+
return ""
1795+
}
1796+
switch value := raw.(type) {
1797+
case string:
1798+
return strings.TrimSpace(value)
1799+
case []byte:
1800+
return strings.TrimSpace(string(value))
1801+
default:
1802+
return ""
1803+
}
1804+
}
1805+
17831806
func pinnedAuthIDFromMetadata(meta map[string]any) string {
17841807
if len(meta) == 0 {
17851808
return ""

sdk/cliproxy/auth/conductor_usage_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ func TestContextWithRequestedModelAliasIncludesReasoningEffort(t *testing.T) {
1313
Metadata: map[string]any{
1414
cliproxyexecutor.RequestedModelMetadataKey: "client-model",
1515
cliproxyexecutor.ReasoningEffortMetadataKey: "medium",
16+
cliproxyexecutor.ServiceTierMetadataKey: "priority",
1617
},
1718
}, "fallback-model")
1819

@@ -22,4 +23,8 @@ func TestContextWithRequestedModelAliasIncludesReasoningEffort(t *testing.T) {
2223
if got := coreusage.ReasoningEffortFromContext(ctx); got != "medium" {
2324
t.Fatalf("reasoning effort = %q, want %q", got, "medium")
2425
}
26+
gotServiceTier := coreusage.ServiceTierFromContext(ctx)
27+
if gotServiceTier != "priority" {
28+
t.Fatalf("service tier = %q, want %q", gotServiceTier, "priority")
29+
}
2530
}

sdk/cliproxy/executor/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ const DisallowFreeAuthMetadataKey = "disallow_free_auth"
2020
// ReasoningEffortMetadataKey stores the client-requested reasoning effort for usage logs.
2121
const ReasoningEffortMetadataKey = "reasoning_effort"
2222

23+
// ServiceTierMetadataKey stores the client-requested service tier for usage logs.
24+
const ServiceTierMetadataKey = "service_tier"
25+
2326
const (
2427
// PinnedAuthMetadataKey locks execution to a specific auth ID.
2528
PinnedAuthMetadataKey = "pinned_auth_id"

sdk/cliproxy/usage/manager.go

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ import (
1010
log "github.com/sirupsen/logrus"
1111
)
1212

13+
// DefaultServiceTier is used when a request does not specify service_tier.
14+
const DefaultServiceTier = "default"
15+
1316
// Record contains the usage statistics captured for a single provider request.
1417
type Record struct {
1518
Provider string
@@ -22,12 +25,14 @@ type Record struct {
2225
Source string
2326
// ReasoningEffort stores the translated upstream thinking level for request event logs.
2427
ReasoningEffort string
25-
RequestedAt time.Time
26-
Latency time.Duration
27-
TTFT time.Duration
28-
Failed bool
29-
Fail Failure
30-
Detail Detail
28+
// ServiceTier stores the client-requested service tier for request event logs.
29+
ServiceTier string
30+
RequestedAt time.Time
31+
Latency time.Duration
32+
TTFT time.Duration
33+
Failed bool
34+
Fail Failure
35+
Detail Detail
3136
// ResponseHeaders stores a snapshot of upstream response headers for usage sinks.
3237
ResponseHeaders http.Header
3338
}
@@ -51,6 +56,7 @@ type Detail struct {
5156

5257
type requestedModelAliasContextKey struct{}
5358
type reasoningEffortContextKey struct{}
59+
type serviceTierContextKey struct{}
5460

5561
// WithRequestedModelAlias stores the client-requested model name for usage sinks.
5662
func WithRequestedModelAlias(ctx context.Context, alias string) context.Context {
@@ -108,6 +114,42 @@ func ReasoningEffortFromContext(ctx context.Context) string {
108114
}
109115
}
110116

117+
// WithServiceTier stores the client-requested service tier for usage sinks.
118+
func WithServiceTier(ctx context.Context, tier string) context.Context {
119+
if ctx == nil {
120+
ctx = context.Background()
121+
}
122+
tier = strings.TrimSpace(tier)
123+
if tier == "" {
124+
tier = DefaultServiceTier
125+
}
126+
return context.WithValue(ctx, serviceTierContextKey{}, tier)
127+
}
128+
129+
// ServiceTierFromContext returns the client-requested service tier stored in ctx.
130+
func ServiceTierFromContext(ctx context.Context) string {
131+
if ctx == nil {
132+
return DefaultServiceTier
133+
}
134+
raw := ctx.Value(serviceTierContextKey{})
135+
switch value := raw.(type) {
136+
case string:
137+
tier := strings.TrimSpace(value)
138+
if tier == "" {
139+
return DefaultServiceTier
140+
}
141+
return tier
142+
case []byte:
143+
tier := strings.TrimSpace(string(value))
144+
if tier == "" {
145+
return DefaultServiceTier
146+
}
147+
return tier
148+
default:
149+
return DefaultServiceTier
150+
}
151+
}
152+
111153
// Plugin consumes usage records emitted by the proxy runtime.
112154
type Plugin interface {
113155
HandleUsage(ctx context.Context, record Record)

0 commit comments

Comments
 (0)