Skip to content

Commit 0e77bee

Browse files
committed
feat(proxy): forward Anthropic speed:fast to Codex priority tier
Map official speed:"fast" to upstream service_tier priority, omit priority when absent, and log resolved service_tier on /v1/messages for Usage admin fast stats.
1 parent 1ca1d67 commit 0e77bee

5 files changed

Lines changed: 102 additions & 0 deletions

File tree

docs/API.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
Codex2API 提供兼容 OpenAI 风格的 API 接口,同时包含完整的管理后台 API。
3535

36+
Anthropic `/v1/messages` 仅将官方 `speed:"fast"` 映射为上游 Codex `service_tier:"priority"`;Anthropic 请求侧 `service_tier`(Priority Tier)不在此映射范围内。用量日志的 `service_tier` / `fast` 过滤反映该解析结果。
37+
3638
**Base URL:** `http://localhost:8080` (默认端口)
3739

3840
**请求格式:**

docs/ARCHITECTURE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ func TranslateStreamChunk(data []byte, model, chunkID string) ([]byte, bool)
239239
- `messages` → `input`
240240
- `max_tokens/temperature` → 删除(Codex 不支持)
241241
- `reasoning_effort` → `reasoning.effort`
242+
- Anthropic `/v1/messages` 的 `speed:"fast"` → Codex `service_tier:"priority"`(Anthropic 入参 `service_tier` 为 Priority Tier,不参与 fast mode 映射)
242243
- SSE 事件类型转换
243244

244245
---

proxy/anthropic.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ type anthropicRequest struct {
2626
OutputConfig *anthropicOutputConfig `json:"output_config,omitempty"`
2727
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
2828
Metadata json.RawMessage `json:"metadata,omitempty"`
29+
Speed string `json:"speed,omitempty"`
2930
}
3031

3132
type anthropicThinking struct {
@@ -240,6 +241,10 @@ func TranslateAnthropicToCodex(rawJSON []byte, modelMappingJSON string) ([]byte,
240241
return TranslateAnthropicToCodexWithModels(rawJSON, modelMappingJSON, SupportedModels)
241242
}
242243

244+
func shouldUseCodexPriorityForAnthropicSpeed(speed string) bool {
245+
return strings.ToLower(strings.TrimSpace(speed)) == "fast"
246+
}
247+
243248
// TranslateAnthropicToCodexWithModels 将 Anthropic Messages 请求转换为 Codex Responses 格式
244249
// 返回: (codex 请求体, 原始 Anthropic model 名, error)
245250
func TranslateAnthropicToCodexWithModels(rawJSON []byte, modelMappingJSON string, supportedModels []string) ([]byte, string, error) {
@@ -271,6 +276,12 @@ func TranslateAnthropicToCodexWithModels(rawJSON []byte, modelMappingJSON string
271276
"summary": "auto",
272277
}
273278

279+
if shouldUseCodexPriorityForAnthropicSpeed(req.Speed) {
280+
if upstreamTier, ok := upstreamServiceTier("priority"); ok {
281+
out["service_tier"] = upstreamTier
282+
}
283+
}
284+
274285
// tools
275286
if len(req.Tools) > 0 {
276287
out["tools"] = convertAnthropicTools(req.Tools)

proxy/anthropic_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ func TestTranslateAnthropicToCodex_DefaultsReasoningHighWithSummary(t *testing.T
6262
if summary := gjson.GetBytes(got, "reasoning.summary").String(); summary != "auto" {
6363
t.Fatalf("reasoning.summary = %q, want auto; body=%s", summary, got)
6464
}
65+
if tier := gjson.GetBytes(got, "service_tier"); tier.Exists() {
66+
t.Fatalf("service_tier should be omitted when speed is absent; body=%s", got)
67+
}
6568
}
6669

6770
func TestTranslateAnthropicToCodex_ThinkingBudgetDoesNotControlEffort(t *testing.T) {
@@ -81,6 +84,78 @@ func TestTranslateAnthropicToCodex_ThinkingBudgetDoesNotControlEffort(t *testing
8184
}
8285
}
8386

87+
func TestTranslateAnthropicToCodex_SpeedFastMapsToCodexPriority(t *testing.T) {
88+
cases := []struct {
89+
name string
90+
field string
91+
wantTier bool
92+
}{
93+
{"absent omits priority", "", false},
94+
{"speed fast maps to priority", `,"speed":"fast"`, true},
95+
{"speed standard omits priority", `,"speed":"standard"`, false},
96+
}
97+
for _, tc := range cases {
98+
t.Run(tc.name, func(t *testing.T) {
99+
raw := []byte(`{
100+
"model":"claude-sonnet-4-5",
101+
"messages":[{"role":"user","content":"hello"}]` + tc.field + `
102+
}`)
103+
104+
got, _, err := TranslateAnthropicToCodexWithModels(raw, "", []string{"gpt-5.4"})
105+
if err != nil {
106+
t.Fatalf("TranslateAnthropicToCodexWithModels returned error: %v", err)
107+
}
108+
109+
tier := gjson.GetBytes(got, "service_tier")
110+
if tc.wantTier {
111+
if tier.String() != "priority" {
112+
t.Fatalf("service_tier = %q, want priority; body=%s", tier.String(), got)
113+
}
114+
if speed := gjson.GetBytes(got, "speed"); speed.Exists() {
115+
t.Fatalf("speed should not be forwarded to Codex body; body=%s", got)
116+
}
117+
return
118+
}
119+
if tier.Exists() {
120+
t.Fatalf("service_tier should be omitted; body=%s", got)
121+
}
122+
if speed := gjson.GetBytes(got, "speed"); speed.Exists() {
123+
t.Fatalf("speed should not be forwarded to Codex body; body=%s", got)
124+
}
125+
})
126+
}
127+
}
128+
129+
func TestAnthropicUsageServiceTierResolution(t *testing.T) {
130+
cases := []struct {
131+
name string
132+
speed string
133+
actual string
134+
want string
135+
}{
136+
{"no fast intent", "", "default", "default"},
137+
{"fast intent upstream default", "fast", "default", "fast"},
138+
{"fast intent upstream priority", "fast", "priority", "fast"},
139+
}
140+
for _, tc := range cases {
141+
t.Run(tc.name, func(t *testing.T) {
142+
field := ""
143+
if tc.speed != "" {
144+
field = `,"speed":"` + tc.speed + `"`
145+
}
146+
raw := []byte(`{"model":"claude-opus-4-7","messages":[{"role":"user","content":"hi"}]` + field + `}`)
147+
codexBody, _, err := TranslateAnthropicToCodexWithModels(raw, "", []string{"gpt-5.5"})
148+
if err != nil {
149+
t.Fatalf("TranslateAnthropicToCodexWithModels returned error: %v", err)
150+
}
151+
got := resolveServiceTier(tc.actual, extractServiceTier(codexBody))
152+
if got != tc.want {
153+
t.Fatalf("resolveServiceTier(%q, %q) = %q, want %q", tc.actual, extractServiceTier(codexBody), got, tc.want)
154+
}
155+
})
156+
}
157+
}
158+
84159
func TestTranslateAnthropicToCodexCanonicalizesDynamicMappedModelAlias(t *testing.T) {
85160
raw := []byte(`{
86161
"model":"claude-haiku-4-5-20251001",

proxy/handler_anthropic.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ func (h *Handler) Messages(c *gin.Context) {
121121

122122
// 提取 reasoning effort(从翻译后的 codex body 中)
123123
reasoningEffort := extractReasoningEffort(codexBody)
124+
serviceTier := extractServiceTier(codexBody)
124125
sessionID := ResolveSessionID(c.Request.Header, codexBody)
125126
apiKeyID := requestAPIKeyID(c)
126127
affinityKey := sessionAffinityKey(sessionID, apiKeyID)
@@ -225,6 +226,7 @@ func (h *Handler) Messages(c *gin.Context) {
225226
InboundEndpoint: "/v1/messages",
226227
UpstreamEndpoint: "/v1/responses",
227228
Stream: isStream,
229+
ServiceTier: resolveServiceTier("", serviceTier),
228230
IsRetryAttempt: shouldRetry,
229231
AttemptIndex: attempt + 1,
230232
UpstreamErrorKind: upstreamErrorKind(resp.StatusCode, errBody, decision),
@@ -257,6 +259,7 @@ func (h *Handler) Messages(c *gin.Context) {
257259

258260
var firstTokenMs int
259261
var usage *UsageInfo
262+
var actualServiceTier string
260263
ttftRecorded := false
261264
gotTerminal := false
262265
deltaCharCount := 0
@@ -301,6 +304,9 @@ func (h *Handler) Messages(c *gin.Context) {
301304
// 提取 usage
302305
if eventType == "response.completed" {
303306
usage = extractUsageFromResult(parsed.Get("response.usage"))
307+
if tier := parsed.Get("response.service_tier").String(); tier != "" {
308+
actualServiceTier = tier
309+
}
304310
gotTerminal = true
305311
}
306312
if eventType == "response.failed" {
@@ -359,6 +365,9 @@ func (h *Handler) Messages(c *gin.Context) {
359365
}
360366
if eventType == "response.completed" {
361367
usage = extractUsageFromResult(parsed.Get("response.usage"))
368+
if tier := parsed.Get("response.service_tier").String(); tier != "" {
369+
actualServiceTier = tier
370+
}
362371
lastCompletedData = data
363372
gotTerminal = true
364373
return false
@@ -418,6 +427,9 @@ func (h *Handler) Messages(c *gin.Context) {
418427
}
419428
}
420429

430+
resolvedServiceTier := resolveServiceTier(actualServiceTier, serviceTier)
431+
c.Set("x-service-tier", resolvedServiceTier)
432+
421433
logInput := &database.UsageLogInput{
422434
AccountID: account.ID(),
423435
Endpoint: "/v1/messages",
@@ -430,6 +442,7 @@ func (h *Handler) Messages(c *gin.Context) {
430442
InboundEndpoint: "/v1/messages",
431443
UpstreamEndpoint: "/v1/responses",
432444
Stream: isStream,
445+
ServiceTier: resolvedServiceTier,
433446
}
434447
if logStatusCode != http.StatusOK {
435448
logInput.ErrorMessage = usageLogErrorMessage(logStatusCode, []byte(outcome.failureMessage))

0 commit comments

Comments
 (0)