@@ -660,6 +660,16 @@ func (h *Handler) Responses(c *gin.Context) {
660660 }
661661 }()
662662
663+ // 上游 ctx 生命周期:每次 attempt 开始前用新的 drainable ctx 替换,
664+ // defer 兜底确保函数退出时上游被释放。
665+ // 目的:客户端断连后仍给上游 upstreamDrainTimeout 时间捞 response.completed 的 usage。
666+ var lastUpstreamCancel context.CancelFunc
667+ defer func () {
668+ if lastUpstreamCancel != nil {
669+ lastUpstreamCancel ()
670+ }
671+ }()
672+
663673 for attempt := 0 ; attempt <= maxRetries ; attempt ++ {
664674 account , stickyProxyURL := h .nextAccountForSessionWithPreference (sessionID , excludeAccounts , preferPlan )
665675 if account == nil {
@@ -698,7 +708,15 @@ func (h *Handler) Responses(c *gin.Context) {
698708 // 透传下游请求头用于指纹学习
699709 downstreamHeaders := c .Request .Header .Clone ()
700710
701- resp , reqErr := ExecuteRequest (c .Request .Context (), account , codexBody , sessionID , proxyURL , apiKey , deviceCfg , downstreamHeaders , useWebsocket )
711+ // 上游使用与客户端解耦的 context:客户端中途断开时仍能继续读完
712+ // response.completed 拿到 usage(流式计费的关键)。
713+ // 重试前先 cancel 上一轮的上游 ctx。
714+ if lastUpstreamCancel != nil {
715+ lastUpstreamCancel ()
716+ }
717+ upstreamCtx , upstreamCancel := newDrainableUpstreamContext (c .Request .Context (), upstreamDrainTimeout )
718+ lastUpstreamCancel = upstreamCancel
719+ resp , reqErr := ExecuteRequest (upstreamCtx , account , codexBody , sessionID , proxyURL , apiKey , deviceCfg , downstreamHeaders , useWebsocket )
702720 durationMs := int (time .Since (start ).Milliseconds ())
703721
704722 if reqErr != nil {
@@ -779,6 +797,9 @@ func (h *Handler) Responses(c *gin.Context) {
779797 var lastFailedErrMsg string // 上游 response.failed 的 error.message(debug 用,不论是否 capacity)
780798
781799 if isStream {
800+ // clientGone:客户端写失败后置位,后续事件不再写客户端,
801+ // 但继续读上游直到 response.completed/failed,以拿到准确 usage。
802+ clientGone := false
782803 // 流式透传 + TTFT 跟踪(headers 已在 SetupKeepalive 里设置)
783804 readErr = ReadSSEStream (resp .Body , func (data []byte ) bool {
784805 parsed := gjson .ParseBytes (data )
@@ -794,8 +815,8 @@ func (h *Handler) Responses(c *gin.Context) {
794815 }
795816 }
796817
797- // TTFT: 记录第一个 output_text.delta 事件的时间
798- if ! ttftRecorded && eventType == "response.output_text.delta" {
818+ // TTFT: 黑名单策略 —— 排除控制/终止事件,其余均视为首字(覆盖纯工具调用/图像/推理流)
819+ if ! ttftRecorded && isFirstTokenEvent ( eventType ) {
799820 firstTokenMs = int (time .Since (start ).Milliseconds ())
800821 ttftRecorded = true
801822 }
@@ -821,11 +842,15 @@ func (h *Handler) Responses(c *gin.Context) {
821842
822843 // 画图场景下将 SSE 事件里的 response.model 改为 gpt-5.4
823844 dataToWrite := rewriteResponseModelIfDrawing (data , virtualHit , "response.model" )
824- if err := sseW .WriteEvent (dataToWrite ); err != nil {
825- writeErr = err
826- return false
845+ if ! clientGone {
846+ if err := sseW .WriteEvent (dataToWrite ); err != nil {
847+ writeErr = err
848+ clientGone = true
849+ } else {
850+ wroteAnyBody = true
851+ }
827852 }
828- wroteAnyBody = true
853+ // 客户端断开后仍继续读上游直到 terminal 事件,确保拿到 usage
829854 return eventType != "response.completed" && eventType != "response.failed"
830855 })
831856 } else {
@@ -838,7 +863,7 @@ func (h *Handler) Responses(c *gin.Context) {
838863 readErr = ReadSSEStream (resp .Body , func (data []byte ) bool {
839864 parsed := gjson .ParseBytes (data )
840865 eventType := parsed .Get ("type" ).String ()
841- if ! ttftRecorded && eventType == "response.output_text.delta" {
866+ if ! ttftRecorded && isFirstTokenEvent ( eventType ) {
842867 firstTokenMs = int (time .Since (start ).Milliseconds ())
843868 ttftRecorded = true
844869 }
@@ -1303,6 +1328,16 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
13031328 }
13041329 }()
13051330
1331+ // 上游 ctx 生命周期:每次 attempt 开始前用新的 drainable ctx 替换,
1332+ // defer 兜底确保函数退出时上游被释放。
1333+ // 目的:客户端断连后仍给上游 upstreamDrainTimeout 时间捞 response.completed 的 usage。
1334+ var lastUpstreamCancel context.CancelFunc
1335+ defer func () {
1336+ if lastUpstreamCancel != nil {
1337+ lastUpstreamCancel ()
1338+ }
1339+ }()
1340+
13061341 for attempt := 0 ; attempt <= maxRetries ; attempt ++ {
13071342 account , stickyProxyURL := h .nextAccountForSessionWithPreference (sessionID , excludeAccounts , preferPlan )
13081343 if account == nil {
@@ -1341,7 +1376,15 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
13411376 // 透传下游请求头用于指纹学习
13421377 downstreamHeaders := c .Request .Header .Clone ()
13431378
1344- resp , reqErr := ExecuteRequest (c .Request .Context (), account , codexBody , sessionID , proxyURL , apiKey , deviceCfg , downstreamHeaders , useWebsocket )
1379+ // 上游使用与客户端解耦的 context:客户端中途断开时仍能继续读完
1380+ // response.completed 拿到 usage(流式计费的关键)。
1381+ // 重试前先 cancel 上一轮的上游 ctx。
1382+ if lastUpstreamCancel != nil {
1383+ lastUpstreamCancel ()
1384+ }
1385+ upstreamCtx , upstreamCancel := newDrainableUpstreamContext (c .Request .Context (), upstreamDrainTimeout )
1386+ lastUpstreamCancel = upstreamCancel
1387+ resp , reqErr := ExecuteRequest (upstreamCtx , account , codexBody , sessionID , proxyURL , apiKey , deviceCfg , downstreamHeaders , useWebsocket )
13451388 durationMs := int (time .Since (start ).Milliseconds ())
13461389
13471390 if reqErr != nil {
@@ -1426,6 +1469,9 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
14261469 if isStream {
14271470 streamTranslator := NewStreamTranslator (chunkID , responseModel , created )
14281471
1472+ // clientGone:客户端写失败后置位,后续事件不再写客户端,
1473+ // 但继续读上游直到 response.completed/failed,以拿到准确 usage。
1474+ clientGone := false
14291475 readErr = ReadSSEStream (resp .Body , func (data []byte ) bool {
14301476 parsed := gjson .ParseBytes (data )
14311477 eventType := parsed .Get ("type" ).String ()
@@ -1444,7 +1490,7 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
14441490
14451491 chunk , done := streamTranslator .Translate (data )
14461492
1447- if ! ttftRecorded && strings . Contains (eventType , ".delta" ) {
1493+ if ! ttftRecorded && isFirstTokenEvent (eventType ) {
14481494 firstTokenMs = int (time .Since (start ).Milliseconds ())
14491495 ttftRecorded = true
14501496 }
@@ -1463,19 +1509,27 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
14631509 gotTerminal = true
14641510 }
14651511
1466- if chunk != nil {
1512+ if ! clientGone && chunk != nil {
14671513 if err := sseW .WriteEvent (chunk ); err != nil {
14681514 writeErr = err
1469- return false
1515+ clientGone = true
1516+ } else {
1517+ wroteAnyBody = true
14701518 }
1471- wroteAnyBody = true
14721519 }
1473- if done {
1520+ if ! clientGone && done {
14741521 if err := sseW .WriteRaw ("data: [DONE]\n \n " ); err != nil {
14751522 writeErr = err
1523+ clientGone = true
1524+ } else {
1525+ wroteAnyBody = true
1526+ }
1527+ if ! clientGone {
14761528 return false
14771529 }
1478- wroteAnyBody = true
1530+ }
1531+ // 客户端断开后,要等到 terminal 事件才退出,确保拿到 usage。
1532+ if gotTerminal {
14791533 return false
14801534 }
14811535 return true
@@ -1487,7 +1541,7 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
14871541 readErr = ReadSSEStream (resp .Body , func (data []byte ) bool {
14881542 parsed := gjson .ParseBytes (data )
14891543 eventType := parsed .Get ("type" ).String ()
1490- if ! ttftRecorded && strings . Contains (eventType , ".delta" ) {
1544+ if ! ttftRecorded && isFirstTokenEvent (eventType ) {
14911545 firstTokenMs = int (time .Since (start ).Milliseconds ())
14921546 ttftRecorded = true
14931547 }
0 commit comments