Skip to content

Commit 941334d

Browse files
committed
fix(auth): handle OAuth model alias in retry logic and refine Qwen quota handling
1 parent d54f816 commit 941334d

4 files changed

Lines changed: 76 additions & 23 deletions

File tree

internal/runtime/executor/qwen_executor.go

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,6 @@ const (
3232

3333
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
3434

35-
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
36-
var qwenBeijingLoc = func() *time.Location {
37-
loc, err := time.LoadLocation("Asia/Shanghai")
38-
if err != nil || loc == nil {
39-
log.Warnf("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8", err)
40-
return time.FixedZone("CST", 8*3600)
41-
}
42-
return loc
43-
}()
44-
4535
// qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion.
4636
var qwenQuotaCodes = map[string]struct{}{
4737
"insufficient_quota": {},
@@ -156,22 +146,13 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
156146
// Qwen returns 403 for quota errors, 429 for rate limits
157147
if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) {
158148
errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic
159-
cooldown := timeUntilNextDay()
160-
retryAfter = &cooldown
161-
helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown)
149+
// Do not force an excessively long retry-after (e.g. until tomorrow), otherwise
150+
// the global request-retry scheduler may skip retries due to max-retry-interval.
151+
helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d)", httpCode, errCode)
162152
}
163153
return errCode, retryAfter
164154
}
165155

166-
// timeUntilNextDay returns duration until midnight Beijing time (UTC+8).
167-
// Qwen's daily quota resets at 00:00 Beijing time.
168-
func timeUntilNextDay() time.Duration {
169-
now := time.Now()
170-
nowLocal := now.In(qwenBeijingLoc)
171-
tomorrow := time.Date(nowLocal.Year(), nowLocal.Month(), nowLocal.Day()+1, 0, 0, 0, 0, qwenBeijingLoc)
172-
return tomorrow.Sub(now)
173-
}
174-
175156
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
176157
// It always injects the default system prompt and merges any user-provided system messages
177158
// into the injected system message content to satisfy Qwen's strict message ordering rules.

internal/runtime/executor/qwen_executor_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package executor
22

33
import (
4+
"context"
5+
"net/http"
46
"testing"
57

68
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
@@ -152,3 +154,25 @@ func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) {
152154
t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
153155
}
154156
}
157+
158+
func TestWrapQwenError_InsufficientQuotaDoesNotSetRetryAfter(t *testing.T) {
159+
body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
160+
code, retryAfter := wrapQwenError(context.Background(), http.StatusTooManyRequests, body)
161+
if code != http.StatusTooManyRequests {
162+
t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
163+
}
164+
if retryAfter != nil {
165+
t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
166+
}
167+
}
168+
169+
func TestWrapQwenError_Maps403QuotaTo429WithoutRetryAfter(t *testing.T) {
170+
body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
171+
code, retryAfter := wrapQwenError(context.Background(), http.StatusForbidden, body)
172+
if code != http.StatusTooManyRequests {
173+
t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
174+
}
175+
if retryAfter != nil {
176+
t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
177+
}
178+
}

sdk/cliproxy/auth/conductor.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1830,7 +1830,11 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt
18301830
if attempt >= effectiveRetry {
18311831
continue
18321832
}
1833-
blocked, reason, next := isAuthBlockedForModel(auth, model, now)
1833+
checkModel := model
1834+
if strings.TrimSpace(model) != "" {
1835+
checkModel = m.selectionModelForAuth(auth, model)
1836+
}
1837+
blocked, reason, next := isAuthBlockedForModel(auth, checkModel, now)
18341838
if !blocked || next.IsZero() || reason == blockReasonDisabled {
18351839
continue
18361840
}

sdk/cliproxy/auth/conductor_overrides_test.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"time"
99

1010
"github.com/google/uuid"
11+
internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
1112
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
1213
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
1314
)
@@ -64,6 +65,49 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi
6465
}
6566
}
6667

68+
func TestManager_ShouldRetryAfterError_UsesOAuthModelAliasForCooldown(t *testing.T) {
69+
m := NewManager(nil, nil, nil)
70+
m.SetRetryConfig(3, 30*time.Second, 0)
71+
m.SetOAuthModelAlias(map[string][]internalconfig.OAuthModelAlias{
72+
"qwen": {
73+
{Name: "qwen3.6-plus", Alias: "coder-model"},
74+
},
75+
})
76+
77+
routeModel := "coder-model"
78+
upstreamModel := "qwen3.6-plus"
79+
next := time.Now().Add(5 * time.Second)
80+
81+
auth := &Auth{
82+
ID: "auth-1",
83+
Provider: "qwen",
84+
ModelStates: map[string]*ModelState{
85+
upstreamModel: {
86+
Unavailable: true,
87+
Status: StatusError,
88+
NextRetryAfter: next,
89+
Quota: QuotaState{
90+
Exceeded: true,
91+
Reason: "quota",
92+
NextRecoverAt: next,
93+
},
94+
},
95+
},
96+
}
97+
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
98+
t.Fatalf("register auth: %v", errRegister)
99+
}
100+
101+
_, _, maxWait := m.retrySettings()
102+
wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 429, Message: "quota"}, 0, []string{"qwen"}, routeModel, maxWait)
103+
if !shouldRetry {
104+
t.Fatalf("expected shouldRetry=true, got false (wait=%v)", wait)
105+
}
106+
if wait <= 0 {
107+
t.Fatalf("expected wait > 0, got %v", wait)
108+
}
109+
}
110+
67111
type credentialRetryLimitExecutor struct {
68112
id string
69113

0 commit comments

Comments
 (0)