@@ -99,6 +99,19 @@ async function internalErrorBackoff(retryIdx) {
9999 return ms ;
100100}
101101
102+ const UPSTREAM_DEADLINE_RE = / c o n t e x t d e a d l i n e e x c e e d e d | c o n t e x t c a n c e l l a t i o n w h i l e r e a d i n g b o d y | c l i e n t \. t i m e o u t / i;
103+
104+ export function isUpstreamDeadlineExceeded ( errOrMessage ) {
105+ const msg = typeof errOrMessage === 'string'
106+ ? errOrMessage
107+ : String ( errOrMessage ?. message || '' ) ;
108+ return UPSTREAM_DEADLINE_RE . test ( msg ) ;
109+ }
110+
111+ function upstreamDeadlineExceededMessage ( model ) {
112+ return `${ model } hit the upstream Windsurf provider deadline (~240s): model thinking/output ran longer than the single Cascade stream window. This is not controlled by WindsurfAPI timeout env vars. Split the task, lower reasoning/max output, or use a faster model.` ;
113+ }
114+
102115function upstreamTransientErrorMessage ( model , triedCount , reason = 'internal_error' ) {
103116 const detail = reason === 'cascade_transport'
104117 ? 'Cascade/语言服务器 HTTP/2 流被取消'
@@ -2100,7 +2113,7 @@ async function _handleChatCompletionsInner(body, context = {}) {
21002113 // rationale (cascade trajectory left half-broken, next reuse hits
21012114 // it and the model "loses" the prior conversation).
21022115 const _resultMsg = String ( result . body ?. error ?. message || '' ) ;
2103- if ( / c o n t e x t d e a d l i n e e x c e e d e d | c o n t e x t c a n c e l l a t i o n w h i l e r e a d i n g b o d y | c l i e n t \. t i m e o u t / i . test ( _resultMsg ) ) {
2116+ if ( isUpstreamDeadlineExceeded ( _resultMsg ) ) {
21042117 reuseEntryDead = true ;
21052118 }
21062119 lastErr = result ;
@@ -2150,6 +2163,9 @@ async function _handleChatCompletionsInner(body, context = {}) {
21502163 continue ;
21512164 }
21522165 // Cascade transient 错误通常是上游或本地 LS 短暂抖动,先退避再切账号,避免连续打爆同一热窗口。
2166+ if ( errType === 'upstream_deadline_exceeded' ) {
2167+ break ;
2168+ }
21532169 if ( errType === 'upstream_internal_error' || errType === 'upstream_transient_error' ) {
21542170 if ( acct ?. _sticky && isExperimentalEnabled ( 'stickyNoFallback' ) ) {
21552171 log . warn ( `Chat[${ reqId } ]: ${ acct . email } (sticky-bound) upstream transient error, stickyNoFallback enabled — not trying other accounts` ) ;
@@ -2663,8 +2679,9 @@ async function nonStreamResponse(client, id, created, model, modelKey, messages,
26632679 const isAuthFail = / u n a u t h e n t i c a t e d | i n v a l i d a p i k e y | i n v a l i d _ g r a n t | p e r m i s s i o n _ d e n i e d .* a c c o u n t / i. test ( err . message ) ;
26642680 const isRateLimit = / r a t e l i m i t | r a t e _ l i m i t | t o o m a n y r e q u e s t s | q u o t a / i. test ( err . message ) ;
26652681 const isInternal = / i n t e r n a l e r r o r o c c u r r e d .* e r r o r i d / i. test ( err . message ) ;
2682+ const isDeadline = isUpstreamDeadlineExceeded ( err ) ;
26662683 const isTransport = isCascadeTransportError ( err ) ;
2667- const isTransient = isUpstreamTransientError ( err , isInternal ) ;
2684+ const isTransient = ! isDeadline && isUpstreamTransientError ( err , isInternal ) ;
26682685 // v2.0.61 (#113): Anthropic / OpenAI content-policy / verification
26692686 // challenges are NOT transient — rotating accounts won't help and
26702687 // wastes quota. Detect and short-circuit with a clean 451 + clear
@@ -2732,6 +2749,20 @@ async function nonStreamResponse(client, id, created, model, modelKey, messages,
27322749 } ;
27332750 }
27342751 }
2752+ if ( isDeadline ) {
2753+ return {
2754+ status : 504 ,
2755+ reuseEntryInvalid : ! ! err . reuseEntryInvalid ,
2756+ body : {
2757+ error : {
2758+ message : upstreamDeadlineExceededMessage ( model ) ,
2759+ type : 'upstream_deadline_exceeded' ,
2760+ code : 'windsurf_provider_deadline' ,
2761+ upstream_message : sanitizeText ( err . message ) . slice ( 0 , 240 ) ,
2762+ } ,
2763+ } ,
2764+ } ;
2765+ }
27352766 return {
27362767 status : isTransient ? 502 : ( err . isModelError ? 403 : 502 ) ,
27372768 reuseEntryInvalid : ! ! err . reuseEntryInvalid ,
@@ -3452,14 +3483,15 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
34523483 // result with no earlier user prompts ("I can see the
34533484 // content from a previous tool call ... but I don't have
34543485 // the earlier conversation context").
3455- if ( / c o n t e x t d e a d l i n e e x c e e d e d | c o n t e x t c a n c e l l a t i o n w h i l e r e a d i n g b o d y | c l i e n t \. t i m e o u t / i. test ( err . message || '' ) ) {
3486+ const isDeadline = isUpstreamDeadlineExceeded ( err ) ;
3487+ if ( isDeadline ) {
34563488 reuseEntryDead = true ;
34573489 }
34583490 const isAuthFail = / u n a u t h e n t i c a t e d | i n v a l i d a p i k e y | i n v a l i d _ g r a n t | p e r m i s s i o n _ d e n i e d .* a c c o u n t / i. test ( err . message ) ;
34593491 const isRateLimit = / r a t e l i m i t | r a t e _ l i m i t | t o o m a n y r e q u e s t s | q u o t a / i. test ( err . message ) ;
34603492 const isInternal = / i n t e r n a l e r r o r o c c u r r e d .* e r r o r i d / i. test ( err . message ) ;
34613493 const isTransport = isCascadeTransportError ( err ) ;
3462- const isTransient = isUpstreamTransientError ( err , isInternal ) ;
3494+ const isTransient = ! isDeadline && isUpstreamTransientError ( err , isInternal ) ;
34633495 // v2.0.61 (#113) — same policy detection as nonStreamResponse.
34643496 const isPolicyBlocked = / c y b e r \s * v e r i f i c a t i o n | c o n t e n t [ \s _ - ] + p o l i c y | p o l i c y [ \s _ - ] + (?: v i o l a t i o n | b l o c k e d | d e n i e d ) | s a f e t y [ \s _ - ] + (?: p o l i c y | b l o c k e d ) | p r o m p t [ \s _ - ] + (?: r e j e c t e d | b l o c k e d ) \s + b y [ \s _ - ] + p o l i c y | u s a g e [ \s _ - ] + p o l i c y [ \s _ - ] + v i o l a t i o n / i. test ( err . message ) ;
34653497 if ( isAuthFail ) reportError ( currentApiKey ) ;
@@ -3511,6 +3543,11 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
35113543 log . warn ( `Chat[${ reqId } ] stream: policy_blocked on ${ currentApiKey ?. slice ( 0 , 12 ) } ..., not retrying` ) ;
35123544 break ;
35133545 }
3546+ if ( isDeadline ) {
3547+ err . type = 'upstream_deadline_exceeded' ;
3548+ err . code = 'windsurf_provider_deadline' ;
3549+ break ;
3550+ }
35143551 // Retry only if nothing has been streamed yet AND it's a retryable error
35153552 if ( ! hadSuccess && ( err . isModelError || isRateLimit ) ) {
35163553 if ( acct ?. _sticky && isExperimentalEnabled ( 'stickyNoFallback' ) ) {
@@ -3546,10 +3583,13 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
35463583 const rl = isAllRateLimited ( modelKey ) ;
35473584 const allInternal = streamInternalCount > 0 && tried . length > 0 && streamInternalCount >= tried . length ;
35483585 const poolExhausted = isLsPoolExhausted ( lastErr ) ;
3586+ const deadlineExceeded = isUpstreamDeadlineExceeded ( lastErr ) || lastErr ?. type === 'upstream_deadline_exceeded' ;
35493587 // 优先暴露 upstream_transient,避免把 Cascade transport 抖动误报成账号限流。
35503588 const lastIsTransport = isCascadeTransportError ( lastErr ) ;
35513589 const errMsg = allInternal
35523590 ? upstreamTransientErrorMessage ( model , tried . length , lastIsTransport ? 'cascade_transport' : 'internal_error' )
3591+ : deadlineExceeded
3592+ ? upstreamDeadlineExceededMessage ( model )
35533593 : poolExhausted
35543594 ? sanitizeText ( lastErr ?. message || 'language server pool exhausted' )
35553595 : temporaryUnavailable . allUnavailable
@@ -3576,22 +3616,26 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
35763616 // go to the server log.
35773617 const errType = allInternal
35783618 ? 'upstream_transient_error'
3619+ : deadlineExceeded
3620+ ? 'upstream_deadline_exceeded'
35793621 : poolExhausted
35803622 ? 'ls_pool_exhausted'
35813623 : ( temporaryUnavailable . allUnavailable || lastErr ?. type === 'rate_limit_exceeded' )
35823624 ? 'rate_limit_exceeded'
35833625 : 'upstream_error' ;
3584- send ( chatStreamError ( errMsg , errType ) ) ;
3626+ send ( chatStreamError ( errMsg , errType , deadlineExceeded ? 'windsurf_provider_deadline' : null ) ) ;
35853627 log . warn ( `Stream: partial response delivered then failed (${ errMsg } )` ) ;
35863628 } else {
35873629 const errType = allInternal
35883630 ? 'upstream_transient_error'
3631+ : deadlineExceeded
3632+ ? 'upstream_deadline_exceeded'
35893633 : poolExhausted
35903634 ? 'ls_pool_exhausted'
35913635 : ( temporaryUnavailable . allUnavailable || lastErr ?. type === 'rate_limit_exceeded' )
35923636 ? 'rate_limit_exceeded'
35933637 : 'upstream_error' ;
3594- send ( chatStreamError ( errMsg , errType ) ) ;
3638+ send ( chatStreamError ( errMsg , errType , deadlineExceeded ? 'windsurf_provider_deadline' : null ) ) ;
35953639 }
35963640 res . write ( 'data: [DONE]\n\n' ) ;
35973641 } catch { }
0 commit comments