diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 781778aefc..97da9f1f03 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -2319,11 +2319,16 @@ func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []stri if isRequestInvalidError(err) { return 0, false } - // Treat transient upstream stream interruptions (e.g. "unexpected EOF") - // like a normal retryable API error instead of surfacing them to the - // client. These carry no HTTP status, so they would otherwise fall through - // the 429-only retry path below and abort the request. - if isUnexpectedEOFError(err) { + // Treat transient, statusless upstream stream interruptions (e.g. + // "unexpected EOF") like a normal retryable API error instead of surfacing + // them to the client. These carry no HTTP status, so they would otherwise + // fall through the 429-only retry path below and abort the request. The + // status==0 guard is important: a status-bearing error (e.g. a 5xx whose + // body merely contains "unexpected EOF") has already had a cooldown applied + // by MarkResult, so it must fall through to the cooldown-aware path below + // rather than force an immediate retry that selection would reject with a + // model-cooldown error. + if status == 0 && isUnexpectedEOFError(err) { if !m.retryAllowed(attempt, providers) { return 0, false } diff --git a/sdk/cliproxy/auth/conductor_overrides_test.go b/sdk/cliproxy/auth/conductor_overrides_test.go index 7115f35f3e..3bf9a6a529 100644 --- a/sdk/cliproxy/auth/conductor_overrides_test.go +++ b/sdk/cliproxy/auth/conductor_overrides_test.go @@ -144,6 +144,38 @@ func TestManager_ShouldRetryAfterError_RetriesUnexpectedEOF(t *testing.T) { } } +func TestManager_ShouldRetryAfterError_StatusBearingEOFUsesCooldownPath(t *testing.T) { + m := NewManager(nil, nil, nil) + m.SetRetryConfig(3, 30*time.Second, 0) + + model := "test-model" + // MarkResult applies a one-minute cooldown for transient 5xx responses. + next := time.Now().Add(time.Minute) + auth := &Auth{ + ID: "auth-1", + Provider: "claude", + ModelStates: map[string]*ModelState{ + model: { + Unavailable: true, + Status: StatusError, + NextRetryAfter: next, + }, + }, + } + if _, errRegister := m.Register(context.Background(), auth); errRegister != nil { + t.Fatalf("register auth: %v", errRegister) + } + + _, _, maxWait := m.retrySettings() + // A 5xx whose body merely contains "unexpected EOF" must NOT take the + // immediate fast path; with a single cooled-down credential and a cooldown + // exceeding max-retry-interval, the cooldown-aware path declines the retry. + statusErr := &Error{HTTPStatus: 500, Message: "internal error: unexpected EOF"} + if wait, shouldRetry := m.shouldRetryAfterError(statusErr, 0, []string{"claude"}, model, maxWait); shouldRetry { + t.Fatalf("expected shouldRetry=false for status-bearing EOF beyond cooldown, got true (wait=%v)", wait) + } +} + func TestManager_ShouldRetryAfterError_UnexpectedEOFRespectsRetryDisabled(t *testing.T) { m := NewManager(nil, nil, nil) m.SetRetryConfig(0, 30*time.Second, 0)