Skip to content

Commit bd19194

Browse files
committed
feat(proxy): forward Anthropic speed:fast to Codex priority tier
Map official speed:"fast" to upstream service_tier priority, omit priority when absent, document Anthropic service_tier is out of scope, and log resolved service_tier on /v1/messages for Usage admin fast stats.
1 parent 1ca1d67 commit bd19194

5 files changed

Lines changed: 124 additions & 2 deletions

File tree

docs/API.md

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
- [Responses](#2-responses)
1212
- [Images](#3-images)
1313
- [List Models](#4-list-models)
14-
- [Health Check](#5-health-check)
14+
- [Anthropic Messages](#5-anthropic-messages)
15+
- [Health Check](#6-health-check)
1516
- [管理 API](#管理-api)
1617
- [统计接口](#统计接口)
1718
- [账号管理](#账号管理) — 添加 RT / AT 账号、批量导入、导出、迁移
@@ -298,7 +299,27 @@ data: [DONE]
298299
}
299300
```
300301

301-
### 5. Health Check
302+
### 5. Anthropic Messages
303+
304+
**端点:** `POST /v1/messages`
305+
306+
**说明:** Anthropic Messages API 兼容层(Claude Code 等)。请求体会被译为 **Codex `POST .../responses`** 形状;官方 Anthropic `speed:"fast"` 会映射为上游 Codex `service_tier:"priority"`。Anthropic 请求侧的 `service_tier` 属于 Priority Tier,和 fast mode 是不同能力,当前兼容层不解析 Anthropic 入参 `service_tier`,并带 `reasoning.summary=auto`
307+
308+
**行为摘要:**
309+
310+
| 主题 | 行为 |
311+
|------|------|
312+
| **模型** | 经管理台模型映射映射到 Codex 模型名(如 Claude → `gpt-5.5`|
313+
| **`speed`** | Anthropic 入参仅识别官方 `speed:"fast"`,并映射为 Codex `service_tier:"priority"`;其它 `speed` 值不触发 priority |
314+
| **`service_tier`** | Anthropic 请求侧 `service_tier` 不在当前兼容范围内,不从该字段解析 fast mode |
315+
| **`reasoning.effort`** | 优先使用 `output_config.effort`(经归一为 `low` / `medium` / `high` / `xhigh`);否则若存在 Anthropic **`thinking.type=enabled`**,按 `budget_tokens` 粗映射档位;否则默认 **`medium`**(避免未带 `output_config` 的客户端始终吃满 `high` 推理成本) |
316+
| **前缀缓存** | `prompt_cache_key` 与会话 ID 解析见 `ResolveSessionID` / 连续性注入;WebSocket 上游会在 `prepareWebsocketBody` 中固定 `stream: true``prompt_cache_key` |
317+
318+
**请求头(常见):** `anthropic-version``Authorization` / `x-api-key` / `anthropic-auth-token`(与其它 Anthropic 客户端一致)。
319+
320+
**响应:** Anthropic SSE(`text/event-stream`)或聚合 JSON(`stream: false`)。
321+
322+
### 6. Health Check
302323

303324
**端点:** `GET /health`
304325

docs/ARCHITECTURE.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ func TranslateStreamChunk(data []byte, model, chunkID string) ([]byte, bool)
241241
- `reasoning_effort` → `reasoning.effort`
242242
- SSE 事件类型转换
243243

244+
**Anthropic Messages (`/v1/messages`):** 独立翻译路径(`TranslateAnthropicToCodex*`),产出 Codex `responses` 请求体;官方 `speed:"fast"` 会映射为上游 Codex `service_tier:"priority"`;Anthropic 请求侧 `service_tier` 属于 Priority Tier,和 fast mode 是不同能力,当前兼容层不解析该字段(effort 规则见 [API.md](API.md) 中 Anthropic Messages 小节)。上游可为 HTTP `/responses` 或 WebSocket(由 `CODEX_UPSTREAM_TRANSPORT` / `USE_WEBSOCKET` 决定)。
245+
244246
---
245247

246248
## 数据流

proxy/anthropic.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ type anthropicRequest struct {
2626
OutputConfig *anthropicOutputConfig `json:"output_config,omitempty"`
2727
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
2828
Metadata json.RawMessage `json:"metadata,omitempty"`
29+
Speed string `json:"speed,omitempty"`
2930
}
3031

3132
type anthropicThinking struct {
@@ -240,6 +241,10 @@ func TranslateAnthropicToCodex(rawJSON []byte, modelMappingJSON string) ([]byte,
240241
return TranslateAnthropicToCodexWithModels(rawJSON, modelMappingJSON, SupportedModels)
241242
}
242243

244+
func shouldUseCodexPriorityForAnthropicSpeed(speed string) bool {
245+
return strings.ToLower(strings.TrimSpace(speed)) == "fast"
246+
}
247+
243248
// TranslateAnthropicToCodexWithModels 将 Anthropic Messages 请求转换为 Codex Responses 格式
244249
// 返回: (codex 请求体, 原始 Anthropic model 名, error)
245250
func TranslateAnthropicToCodexWithModels(rawJSON []byte, modelMappingJSON string, supportedModels []string) ([]byte, string, error) {
@@ -271,6 +276,12 @@ func TranslateAnthropicToCodexWithModels(rawJSON []byte, modelMappingJSON string
271276
"summary": "auto",
272277
}
273278

279+
if shouldUseCodexPriorityForAnthropicSpeed(req.Speed) {
280+
if upstreamTier, ok := upstreamServiceTier("priority"); ok {
281+
out["service_tier"] = upstreamTier
282+
}
283+
}
284+
274285
// tools
275286
if len(req.Tools) > 0 {
276287
out["tools"] = convertAnthropicTools(req.Tools)

proxy/anthropic_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ func TestTranslateAnthropicToCodex_DefaultsReasoningHighWithSummary(t *testing.T
6262
if summary := gjson.GetBytes(got, "reasoning.summary").String(); summary != "auto" {
6363
t.Fatalf("reasoning.summary = %q, want auto; body=%s", summary, got)
6464
}
65+
if tier := gjson.GetBytes(got, "service_tier"); tier.Exists() {
66+
t.Fatalf("service_tier should be omitted when speed is absent; body=%s", got)
67+
}
6568
}
6669

6770
func TestTranslateAnthropicToCodex_ThinkingBudgetDoesNotControlEffort(t *testing.T) {
@@ -81,6 +84,78 @@ func TestTranslateAnthropicToCodex_ThinkingBudgetDoesNotControlEffort(t *testing
8184
}
8285
}
8386

87+
func TestTranslateAnthropicToCodex_SpeedFastMapsToCodexPriority(t *testing.T) {
88+
cases := []struct {
89+
name string
90+
field string
91+
wantTier bool
92+
}{
93+
{"absent omits priority", "", false},
94+
{"speed fast maps to priority", `,"speed":"fast"`, true},
95+
{"speed standard omits priority", `,"speed":"standard"`, false},
96+
}
97+
for _, tc := range cases {
98+
t.Run(tc.name, func(t *testing.T) {
99+
raw := []byte(`{
100+
"model":"claude-sonnet-4-5",
101+
"messages":[{"role":"user","content":"hello"}]` + tc.field + `
102+
}`)
103+
104+
got, _, err := TranslateAnthropicToCodexWithModels(raw, "", []string{"gpt-5.4"})
105+
if err != nil {
106+
t.Fatalf("TranslateAnthropicToCodexWithModels returned error: %v", err)
107+
}
108+
109+
tier := gjson.GetBytes(got, "service_tier")
110+
if tc.wantTier {
111+
if tier.String() != "priority" {
112+
t.Fatalf("service_tier = %q, want priority; body=%s", tier.String(), got)
113+
}
114+
if speed := gjson.GetBytes(got, "speed"); speed.Exists() {
115+
t.Fatalf("speed should not be forwarded to Codex body; body=%s", got)
116+
}
117+
return
118+
}
119+
if tier.Exists() {
120+
t.Fatalf("service_tier should be omitted; body=%s", got)
121+
}
122+
if speed := gjson.GetBytes(got, "speed"); speed.Exists() {
123+
t.Fatalf("speed should not be forwarded to Codex body; body=%s", got)
124+
}
125+
})
126+
}
127+
}
128+
129+
func TestAnthropicUsageServiceTierResolution(t *testing.T) {
130+
cases := []struct {
131+
name string
132+
speed string
133+
actual string
134+
want string
135+
}{
136+
{"no fast intent", "", "default", "default"},
137+
{"fast intent upstream default", "fast", "default", "fast"},
138+
{"fast intent upstream priority", "fast", "priority", "fast"},
139+
}
140+
for _, tc := range cases {
141+
t.Run(tc.name, func(t *testing.T) {
142+
field := ""
143+
if tc.speed != "" {
144+
field = `,"speed":"` + tc.speed + `"`
145+
}
146+
raw := []byte(`{"model":"claude-opus-4-7","messages":[{"role":"user","content":"hi"}]` + field + `}`)
147+
codexBody, _, err := TranslateAnthropicToCodexWithModels(raw, "", []string{"gpt-5.5"})
148+
if err != nil {
149+
t.Fatalf("TranslateAnthropicToCodexWithModels returned error: %v", err)
150+
}
151+
got := resolveServiceTier(tc.actual, extractServiceTier(codexBody))
152+
if got != tc.want {
153+
t.Fatalf("resolveServiceTier(%q, %q) = %q, want %q", tc.actual, extractServiceTier(codexBody), got, tc.want)
154+
}
155+
})
156+
}
157+
}
158+
84159
func TestTranslateAnthropicToCodexCanonicalizesDynamicMappedModelAlias(t *testing.T) {
85160
raw := []byte(`{
86161
"model":"claude-haiku-4-5-20251001",

proxy/handler_anthropic.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ func (h *Handler) Messages(c *gin.Context) {
121121

122122
// 提取 reasoning effort(从翻译后的 codex body 中)
123123
reasoningEffort := extractReasoningEffort(codexBody)
124+
serviceTier := extractServiceTier(codexBody)
124125
sessionID := ResolveSessionID(c.Request.Header, codexBody)
125126
apiKeyID := requestAPIKeyID(c)
126127
affinityKey := sessionAffinityKey(sessionID, apiKeyID)
@@ -225,6 +226,7 @@ func (h *Handler) Messages(c *gin.Context) {
225226
InboundEndpoint: "/v1/messages",
226227
UpstreamEndpoint: "/v1/responses",
227228
Stream: isStream,
229+
ServiceTier: resolveServiceTier("", serviceTier),
228230
IsRetryAttempt: shouldRetry,
229231
AttemptIndex: attempt + 1,
230232
UpstreamErrorKind: upstreamErrorKind(resp.StatusCode, errBody, decision),
@@ -257,6 +259,7 @@ func (h *Handler) Messages(c *gin.Context) {
257259

258260
var firstTokenMs int
259261
var usage *UsageInfo
262+
var actualServiceTier string
260263
ttftRecorded := false
261264
gotTerminal := false
262265
deltaCharCount := 0
@@ -301,6 +304,9 @@ func (h *Handler) Messages(c *gin.Context) {
301304
// 提取 usage
302305
if eventType == "response.completed" {
303306
usage = extractUsageFromResult(parsed.Get("response.usage"))
307+
if tier := parsed.Get("response.service_tier").String(); tier != "" {
308+
actualServiceTier = tier
309+
}
304310
gotTerminal = true
305311
}
306312
if eventType == "response.failed" {
@@ -359,6 +365,9 @@ func (h *Handler) Messages(c *gin.Context) {
359365
}
360366
if eventType == "response.completed" {
361367
usage = extractUsageFromResult(parsed.Get("response.usage"))
368+
if tier := parsed.Get("response.service_tier").String(); tier != "" {
369+
actualServiceTier = tier
370+
}
362371
lastCompletedData = data
363372
gotTerminal = true
364373
return false
@@ -418,6 +427,9 @@ func (h *Handler) Messages(c *gin.Context) {
418427
}
419428
}
420429

430+
resolvedServiceTier := resolveServiceTier(actualServiceTier, serviceTier)
431+
c.Set("x-service-tier", resolvedServiceTier)
432+
421433
logInput := &database.UsageLogInput{
422434
AccountID: account.ID(),
423435
Endpoint: "/v1/messages",
@@ -430,6 +442,7 @@ func (h *Handler) Messages(c *gin.Context) {
430442
InboundEndpoint: "/v1/messages",
431443
UpstreamEndpoint: "/v1/responses",
432444
Stream: isStream,
445+
ServiceTier: resolvedServiceTier,
433446
}
434447
if logStatusCode != http.StatusOK {
435448
logInput.ErrorMessage = usageLogErrorMessage(logStatusCode, []byte(outcome.failureMessage))

0 commit comments

Comments
 (0)