Skip to content

Commit 6875571

Browse files
DouDOU-startclaude
andcommitted
fix(plugin): classify exhausted route failures
Return differentiated OpenAI-compatible statuses when route failover is exhausted, while preserving Retry-After for true rate limits. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 09f12dc commit 6875571

3 files changed

Lines changed: 243 additions & 31 deletions

File tree

backend/internal/plugin/forwarder.go

Lines changed: 141 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package plugin
22

33
import (
4+
"context"
5+
"errors"
46
"net/http"
57
"strings"
68
"time"
@@ -126,11 +128,7 @@ func (f *Forwarder) Forward(c *gin.Context) {
126128
startedAt := state.startedAt
127129
totalAttempts := 0
128130

129-
// rateLimited* 跟踪本次请求最近一次被上游限流时的退避建议。最终走到 all_routes_failed
130-
// 时用来给客户端回 429 + Retry-After,而不是无信号的 503,让 SDK 能正确退避。
131-
// 多次命中限流时取最小值(最早恢复的账号决定何时重试最合理)。
132-
rateLimitedSeen := false
133-
rateLimitedRetryAfter := time.Duration(0)
131+
failureSummary := allRoutesFailureSummary{}
134132

135133
for _, route := range routes {
136134
state.selectedRoute = route
@@ -146,6 +144,7 @@ func (f *Forwarder) Forward(c *gin.Context) {
146144
exclude = append(exclude, softExclude...)
147145

148146
if err := f.pickAccount(c, state, exclude...); err != nil {
147+
failureSummary.recordPickAccountError(err)
149148
if len(softExclude) > 0 && time.Now().Before(queueDeadline) {
150149
softExclude = nil
151150
select {
@@ -171,6 +170,7 @@ func (f *Forwarder) Forward(c *gin.Context) {
171170
attemptLogger := logger.With(sdk.LogFieldAccountID, accountID)
172171
releaseAccountSlot, ok := f.acquireAccountSlot(c, state)
173172
if !ok {
173+
failureSummary.recordLocalCapacityFailure()
174174
softExclude = append(softExclude, accountID)
175175
continue
176176
}
@@ -191,6 +191,7 @@ func (f *Forwarder) Forward(c *gin.Context) {
191191
totalAttempts++
192192

193193
if f.canFailover(c, state, execution) {
194+
failureSummary.recordExecution(execution)
194195
attrs := []any{
195196
"attempt", attempt,
196197
"kind", execution.outcome.Kind,
@@ -207,14 +208,6 @@ func (f *Forwarder) Forward(c *gin.Context) {
207208
releaseAccountSlot()
208209
f.applyOutcome(ctx, state, execution)
209210

210-
if execution.outcome.Kind == sdk.OutcomeAccountRateLimited {
211-
ra := execution.outcome.RetryAfter
212-
if !rateLimitedSeen || (ra > 0 && (rateLimitedRetryAfter == 0 || ra < rateLimitedRetryAfter)) {
213-
rateLimitedSeen = true
214-
rateLimitedRetryAfter = ra
215-
}
216-
}
217-
218211
if execution.outcome.Kind.IsAccountFault() {
219212
hardExclude = append(hardExclude, accountID)
220213
} else {
@@ -257,21 +250,146 @@ func (f *Forwarder) Forward(c *gin.Context) {
257250
if len(hardExclude) > 0 {
258251
failAttrs = append(failAttrs, "tried_accounts", hardExclude)
259252
}
260-
if rateLimitedSeen {
261-
failAttrs = append(failAttrs, "rate_limited_retry_after_ms", rateLimitedRetryAfter.Milliseconds())
253+
if failureSummary.rateLimitedSeen {
254+
failAttrs = append(failAttrs, "rate_limited_retry_after_ms", failureSummary.rateLimitedRetryAfter.Milliseconds())
262255
}
263256
logger.Error("forward_request_failed", failAttrs...)
264257

265-
// 走到这里都是"上游容量不足"——上游 429、家族冷却中、并发槽满 + 排队超时,
266-
// 客户端视角统一归为可重试的限流,回 429 + Retry-After 让 SDK 自动退避。
267-
// 真正的"无候选分组 / 配置错"已经在最前面 routes 为空时回了 no_available_route,
268-
// 不会走到这里;这里再回 503 只会让客户端拿到无信号的失败,触发更猛的重试。
269-
retryAfter := rateLimitedRetryAfter
258+
writeAllRoutesFailed(c, failureSummary)
259+
}
260+
261+
type allRoutesFailureSummary struct {
262+
rateLimitedSeen bool
263+
rateLimitedRetryAfter time.Duration
264+
localCapacitySeen bool
265+
accountUnavailable bool
266+
accountDeadSeen bool
267+
upstreamTimeoutSeen bool
268+
upstreamFailureSeen bool
269+
}
270+
271+
func (s *allRoutesFailureSummary) recordExecution(execution forwardExecution) {
272+
switch execution.outcome.Kind {
273+
case sdk.OutcomeAccountRateLimited:
274+
s.rateLimitedSeen = true
275+
s.recordRetryAfter(execution.outcome.RetryAfter)
276+
case sdk.OutcomeAccountDead:
277+
s.accountDeadSeen = true
278+
case sdk.OutcomeUpstreamTransient:
279+
if isTimeoutFailure(execution) {
280+
s.upstreamTimeoutSeen = true
281+
return
282+
}
283+
s.upstreamFailureSeen = true
284+
case sdk.OutcomeUnknown:
285+
if execution.err != nil {
286+
s.upstreamFailureSeen = true
287+
}
288+
}
289+
}
290+
291+
func (s *allRoutesFailureSummary) recordRetryAfter(retryAfter time.Duration) {
270292
if retryAfter <= 0 {
271-
retryAfter = allRoutesFailedDefaultRetryAfter
293+
return
294+
}
295+
if s.rateLimitedRetryAfter == 0 || retryAfter < s.rateLimitedRetryAfter {
296+
s.rateLimitedRetryAfter = retryAfter
297+
}
298+
}
299+
300+
func (s *allRoutesFailureSummary) recordPickAccountError(error) {
301+
s.accountUnavailable = true
302+
}
303+
304+
func (s *allRoutesFailureSummary) recordLocalCapacityFailure() {
305+
s.localCapacitySeen = true
306+
}
307+
308+
type allRoutesFailureResponse struct {
309+
status int
310+
errType string
311+
code string
312+
message string
313+
retryAfter time.Duration
314+
}
315+
316+
func writeAllRoutesFailed(c *gin.Context, summary allRoutesFailureSummary) {
317+
response := selectAllRoutesFailureResponse(summary)
318+
if response.status == http.StatusTooManyRequests {
319+
openAIRateLimitError(c, response.status, response.code, response.message, response.retryAfter)
320+
return
321+
}
322+
openAIError(c, response.status, response.errType, response.code, response.message)
323+
}
324+
325+
func selectAllRoutesFailureResponse(summary allRoutesFailureSummary) allRoutesFailureResponse {
326+
if summary.rateLimitedSeen {
327+
retryAfter := summary.rateLimitedRetryAfter
328+
if retryAfter <= 0 {
329+
retryAfter = allRoutesFailedDefaultRetryAfter
330+
}
331+
return allRoutesFailureResponse{
332+
status: http.StatusTooManyRequests,
333+
errType: "rate_limit_error",
334+
code: "all_routes_rate_limited",
335+
message: "上游账号当前被限流,请稍后重试",
336+
retryAfter: retryAfter,
337+
}
338+
}
339+
if summary.localCapacitySeen {
340+
return allRoutesFailureResponse{
341+
status: http.StatusTooManyRequests,
342+
errType: "rate_limit_error",
343+
code: "all_routes_capacity_exhausted",
344+
message: "上游容量暂时不足,请稍后重试",
345+
retryAfter: allRoutesFailedDefaultRetryAfter,
346+
}
347+
}
348+
if summary.upstreamTimeoutSeen {
349+
return allRoutesFailureResponse{
350+
status: http.StatusGatewayTimeout,
351+
errType: "server_error",
352+
code: "upstream_timeout",
353+
message: "上游请求超时,请稍后重试",
354+
}
355+
}
356+
if summary.upstreamFailureSeen {
357+
return allRoutesFailureResponse{
358+
status: http.StatusBadGateway,
359+
errType: "server_error",
360+
code: "upstream_error",
361+
message: "上游服务暂不可用,请稍后重试",
362+
}
363+
}
364+
if summary.accountDeadSeen || summary.accountUnavailable {
365+
return allRoutesFailureResponse{
366+
status: http.StatusServiceUnavailable,
367+
errType: "server_error",
368+
code: "no_available_account",
369+
message: "暂无可用上游账号,请稍后重试",
370+
}
371+
}
372+
return allRoutesFailureResponse{
373+
status: http.StatusServiceUnavailable,
374+
errType: "server_error",
375+
code: "all_routes_failed",
376+
message: "请求暂时无法完成,请稍后重试",
377+
}
378+
}
379+
380+
func isTimeoutFailure(execution forwardExecution) bool {
381+
if execution.outcome.Upstream.StatusCode == http.StatusGatewayTimeout {
382+
return true
383+
}
384+
if errors.Is(execution.err, context.DeadlineExceeded) {
385+
return true
386+
}
387+
var timeoutErr interface{ Timeout() bool }
388+
if errors.As(execution.err, &timeoutErr) && timeoutErr.Timeout() {
389+
return true
272390
}
273-
openAIRateLimitError(c, http.StatusTooManyRequests, "all_routes_failed",
274-
"上游容量暂时不足,请稍后重试", retryAfter)
391+
reason := strings.ToLower(judgmentReason(execution))
392+
return strings.Contains(reason, "timeout") || strings.Contains(reason, "timed out") || strings.Contains(reason, "deadline exceeded")
275393
}
276394

277395
func routesForAPIKey(state *forwardState, requirements routing.Requirements) []routing.Candidate {

backend/internal/plugin/forwarder_test.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
package plugin
22

33
import (
4+
"context"
45
"net/http"
56
"net/http/httptest"
67
"testing"
8+
"time"
79

810
"github.com/gin-gonic/gin"
911

1012
"github.com/DouDOU-start/airgate-core/ent"
1113
"github.com/DouDOU-start/airgate-core/internal/auth"
1214
"github.com/DouDOU-start/airgate-core/internal/routing"
15+
sdk "github.com/DouDOU-start/airgate-sdk"
1316
)
1417

1518
func TestParseBody(t *testing.T) {
@@ -161,3 +164,92 @@ func TestRoutesForAPIKeyRejectsImageWhenBoundGroupDisabled(t *testing.T) {
161164
t.Fatalf("len(routes) = %d, want 0", len(routes))
162165
}
163166
}
167+
168+
func TestSelectAllRoutesFailureResponse(t *testing.T) {
169+
t.Parallel()
170+
171+
tests := []struct {
172+
name string
173+
summary allRoutesFailureSummary
174+
wantStatus int
175+
wantCode string
176+
}{
177+
{
178+
name: "upstream rate limited",
179+
summary: allRoutesFailureSummary{
180+
rateLimitedSeen: true,
181+
rateLimitedRetryAfter: 3 * time.Second,
182+
upstreamFailureSeen: true,
183+
},
184+
wantStatus: http.StatusTooManyRequests,
185+
wantCode: "all_routes_rate_limited",
186+
},
187+
{
188+
name: "local capacity exhausted",
189+
summary: allRoutesFailureSummary{
190+
localCapacitySeen: true,
191+
upstreamFailureSeen: true,
192+
},
193+
wantStatus: http.StatusTooManyRequests,
194+
wantCode: "all_routes_capacity_exhausted",
195+
},
196+
{
197+
name: "upstream timeout",
198+
summary: allRoutesFailureSummary{
199+
upstreamTimeoutSeen: true,
200+
upstreamFailureSeen: true,
201+
},
202+
wantStatus: http.StatusGatewayTimeout,
203+
wantCode: "upstream_timeout",
204+
},
205+
{
206+
name: "upstream failure",
207+
summary: allRoutesFailureSummary{
208+
upstreamFailureSeen: true,
209+
},
210+
wantStatus: http.StatusBadGateway,
211+
wantCode: "upstream_error",
212+
},
213+
{
214+
name: "no available account",
215+
summary: allRoutesFailureSummary{
216+
accountDeadSeen: true,
217+
accountUnavailable: true,
218+
},
219+
wantStatus: http.StatusServiceUnavailable,
220+
wantCode: "no_available_account",
221+
},
222+
}
223+
224+
for _, tt := range tests {
225+
tt := tt
226+
t.Run(tt.name, func(t *testing.T) {
227+
t.Parallel()
228+
229+
got := selectAllRoutesFailureResponse(tt.summary)
230+
if got.status != tt.wantStatus {
231+
t.Fatalf("status = %d, want %d", got.status, tt.wantStatus)
232+
}
233+
if got.code != tt.wantCode {
234+
t.Fatalf("code = %q, want %q", got.code, tt.wantCode)
235+
}
236+
})
237+
}
238+
}
239+
240+
func TestAllRoutesFailureSummaryRecordsTimeout(t *testing.T) {
241+
t.Parallel()
242+
243+
summary := allRoutesFailureSummary{}
244+
summary.recordExecution(forwardExecution{
245+
outcome: sdk.ForwardOutcome{Kind: sdk.OutcomeUpstreamTransient},
246+
err: context.DeadlineExceeded,
247+
})
248+
249+
if !summary.upstreamTimeoutSeen {
250+
t.Fatalf("upstreamTimeoutSeen = false, want true")
251+
}
252+
if summary.upstreamFailureSeen {
253+
t.Fatalf("upstreamFailureSeen = true, want false")
254+
}
255+
}

web/src/content/default-docs.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ client = OpenAI(
7474
resp = client.images.generate(
7575
model="gpt-image-2", # gpt-image-1 | gpt-image-1.5 | gpt-image-2
7676
prompt="一只可爱的柴犬坐在樱花树下,日系水彩风格",
77-
size="1024x1024", # 1024x1024 | 1024x1536 | 1536x1024 | auto
77+
size="2048x2048", # gpt-image-2 支持任意合规 WIDTHxHEIGHT,或 auto
7878
quality="medium", # low | medium | high | auto
79+
background="opaque", # opaque | transparent
80+
output_format="png", # png | jpeg | webp
7981
n=1,
8082
extra_body={"stream": True}, # AirGate 长任务分块/保活,返回值仍是 ImagesResponse
8183
)
@@ -85,8 +87,6 @@ with open("out.png", "wb") as f:
8587
f.write(base64.b64decode(img.b64_json))
8688
```
8789

88-
> 计费说明:`input_tokens`(prompt)按 `$5/1M``output_tokens`(图像输出)按 `$40/1M`。图像 token 数按尺寸 × 质量估算(`1024x1024 low ≈ 272 tokens``medium ≈ 1056``high ≈ 4160`
89-
9090
### OpenAI Images SDK(图生图)
9191

9292
```python
@@ -95,7 +95,10 @@ with open("in.png", "rb") as f:
9595
model="gpt-image-2", # gpt-image-1.5 | gpt-image-2
9696
image=f, # 也可传 [f1, f2] 列表传多张参考图
9797
prompt="把这张图变成梵高星空风格的油画",
98-
size="1024x1024",
98+
size="1536x1024",
99+
quality="medium",
100+
background="opaque",
101+
output_format="png",
99102
n=1,
100103
extra_body={"stream": True}, # AirGate 长任务分块/保活,返回值仍是 ImagesResponse
101104
)
@@ -105,8 +108,6 @@ with open("out.png", "wb") as f:
105108
f.write(base64.b64decode(img.b64_json))
106109
```
107110

108-
> 额外计费项:每张参考图按尺寸估一份 image input tokens(与 low 质量输出同量级,`1024x1024 ≈ 272 tokens`),并入 `input_tokens``$5/1M`
109-
110111
### Anthropic Python SDK
111112

112113
```python
@@ -179,10 +180,11 @@ openclaw gateway
179180

180181
参数:
181182

182-
- `size``1024x1024``1024x1536``1536x1024``auto`
183+
- `size``auto``WIDTHxHEIGHT``gpt-image-2` 要求宽高均为 16 的倍数、单边不超过 3840、长短边比例不超过 3:1、总像素在 `655360``8294400` 之间;常用值如 `1024x1024``1536x1024``1024x1536``2048x2048``3840x2160`
183184
- `quality``low``medium``high``auto`
184-
- `n`目前仅支持 `1`(多图请多次调用)
185+
- `n`OAuth 模式目前仅支持 `1`;API Key 直通模式按上游能力处理。
185186
- `background``opaque` / `transparent`
186187
- `output_format``png` / `jpeg` / `webp`
188+
- `input_fidelity`:仅图生图可用,`gpt-image-1` / `gpt-image-1.5` 可传 `low` / `high``gpt-image-2` 默认高保真处理参考图,无需传。
187189

188190
响应使用标准 OpenAI Images API schema(`data[].b64_json` + `usage`),官方 SDK 能直接解析。

0 commit comments

Comments
 (0)