@@ -32,16 +32,6 @@ const (
3232
3333var qwenDefaultSystemMessage = []byte (`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}` )
3434
35- // qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
36- var qwenBeijingLoc = func () * time.Location {
37- loc , err := time .LoadLocation ("Asia/Shanghai" )
38- if err != nil || loc == nil {
39- log .Warnf ("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8" , err )
40- return time .FixedZone ("CST" , 8 * 3600 )
41- }
42- return loc
43- }()
44-
4535// qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion.
4636var qwenQuotaCodes = map [string ]struct {}{
4737 "insufficient_quota" : {},
@@ -156,22 +146,13 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
156146 // Qwen returns 403 for quota errors, 429 for rate limits
157147 if (httpCode == http .StatusForbidden || httpCode == http .StatusTooManyRequests ) && isQwenQuotaError (body ) {
158148 errCode = http .StatusTooManyRequests // Map to 429 to trigger quota logic
159- cooldown := timeUntilNextDay ()
160- retryAfter = & cooldown
161- helps .LogWithRequestID (ctx ).Warnf ("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v) " , httpCode , errCode , cooldown )
149+ // Do not force an excessively long retry-after (e.g. until tomorrow), otherwise
150+ // the global request-retry scheduler may skip retries due to max-retry-interval.
151+ helps .LogWithRequestID (ctx ).Warnf ("qwen quota exceeded (http %d -> %d)" , httpCode , errCode )
162152 }
163153 return errCode , retryAfter
164154}
165155
166- // timeUntilNextDay returns duration until midnight Beijing time (UTC+8).
167- // Qwen's daily quota resets at 00:00 Beijing time.
168- func timeUntilNextDay () time.Duration {
169- now := time .Now ()
170- nowLocal := now .In (qwenBeijingLoc )
171- tomorrow := time .Date (nowLocal .Year (), nowLocal .Month (), nowLocal .Day ()+ 1 , 0 , 0 , 0 , 0 , qwenBeijingLoc )
172- return tomorrow .Sub (now )
173- }
174-
175156// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
176157// It always injects the default system prompt and merges any user-provided system messages
177158// into the injected system message content to satisfy Qwen's strict message ordering rules.
0 commit comments