fix: clean partial stream error tails

dwgx · dwgx · commit 72e1b9cf079e · 2026-06-07T02:01:26.000+09:00
diff --git a/docs/releases/RELEASE_NOTES_2.0.142.md b/docs/releases/RELEASE_NOTES_2.0.142.md
@@ -0,0 +1,18 @@
+## v2.0.142 - partial stream cleanup
+
+This release keeps native bridge defaults unchanged.
+
+### Cursor / streaming compatibility
+
+- Fixed the OpenAI streaming error tail after partial assistant content was
+  already delivered. The stream now finishes with a normal `finish_reason:
+  "stop"` chunk and `[DONE]` instead of appending a structured `{"error": ...}`
+  frame after user-visible content.
+- Empty streams that fail before any real content/tool/thinking payload is sent
+  still return the structured stream error frame, so clients keep actionable
+  diagnostics when no answer was delivered.
+
+### Validation
+
+- Added regression coverage for partial upstream deadline failures after content
+  is emitted, plus the opposite case where only an empty role chunk was emitted.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "windsurf-api",
-  "version": "2.0.141",
+  "version": "2.0.142",
   "description": "Windsurf to OpenAI + Anthropic compatible API proxy. Turns Windsurf's 107 AI models (Claude, GPT, Gemini, DeepSeek, Grok, Qwen, Kimi, GLM, SWE) into dual-protocol API endpoints. Zero npm deps.",
   "type": "module",
   "main": "src/index.js",
diff --git a/src/handlers/chat.js b/src/handlers/chat.js
@@ -292,6 +292,19 @@ export function chatStreamError(message, type = 'upstream_error', code = null) {
   return { error: { message: sanitizeText(message || 'Upstream stream error'), type, code } };
 }
 
+export function finishPartialStreamAfterError({ id, created, model, send, res }) {
+  if (typeof send === 'function') {
+    send({
+      id,
+      object: 'chat.completion.chunk',
+      created,
+      model,
+      choices: [{ index: 0, delta: {}, finish_reason: 'stop' }],
+    });
+  }
+  if (res && !res.writableEnded) res.write('data: [DONE]\n\n');
+}
+
 /**
  * v2.0.71 (#115 server-side fabricate detection): when a tool-emulation
  * request comes back with `markers=none` AND the model output looks like
@@ -3119,6 +3132,7 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
       // Accumulate chunks so we can cache a successful response at the end.
       let accText = '';
       let accThinking = '';
+      let emittedClientPayload = false;
 
       // Cascade conversation pool (stream path). Opus 4.7 tool-emulated
       // requests opt in even when the global experiment toggle is off, because
@@ -3172,17 +3186,20 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
         // middle of a stream (fence might straddle a chunk, and we'd need
         // lookahead). On finish we'll emit one clean JSON payload.
         if (wantJson) return;
+        emittedClientPayload = true;
         send({ id, object: 'chat.completion.chunk', created, model,
           choices: [{ index: 0, delta: { content: clean }, finish_reason: null }] });
       };
       const emitThinking = (clean) => {
         if (!clean) return;
         accThinking += clean;
+        emittedClientPayload = true;
         send({ id, object: 'chat.completion.chunk', created, model,
           choices: [{ index: 0, delta: { reasoning_content: clean }, finish_reason: null }] });
       };
 
       const emitToolCallDelta = (tc, idx) => {
+        emittedClientPayload = true;
         send({ id, object: 'chat.completion.chunk', created, model,
           choices: [{ index: 0, delta: {
             tool_calls: [{
@@ -3840,23 +3857,14 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
             log.info(`Chat[${reqId}]: stream reuse entry was invalidated (cascade not_found upstream); not restoring to pool`);
           }
 
-          if (hadSuccess) {
+          if (emittedClientPayload) {
             // We already streamed real assistant content. Injecting
             // "[Error: ...]" as a content delta here would corrupt the
             // assistant message (clients display it verbatim as model
             // output). Close cleanly with a plain stop — the caller saw
             // whatever partial content we produced. Error details only
             // go to the server log.
-            const errType = allInternal
-              ? 'upstream_transient_error'
-              : deadlineExceeded
-                ? 'upstream_deadline_exceeded'
-              : poolExhausted
-                ? 'ls_pool_exhausted'
-              : (temporaryUnavailable.allUnavailable || lastErr?.type === 'rate_limit_exceeded')
-                ? 'rate_limit_exceeded'
-                : 'upstream_error';
-            send(chatStreamError(errMsg, errType, deadlineExceeded ? 'windsurf_provider_deadline' : null));
+            finishPartialStreamAfterError({ id, created, model, send, res });
             log.warn(`Stream: partial response delivered then failed (${errMsg})`);
           } else {
             const errType = allInternal
@@ -3870,7 +3878,7 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
                 : 'upstream_error';
             send(chatStreamError(errMsg, errType, deadlineExceeded ? 'windsurf_provider_deadline' : null));
           }
-          res.write('data: [DONE]\n\n');
+          if (!emittedClientPayload) res.write('data: [DONE]\n\n');
         } catch {}
         if (!res.writableEnded) res.end();
       } finally {
diff --git a/test/stream-error.test.js b/test/stream-error.test.js
@@ -1,10 +1,26 @@
-import { describe, it } from 'node:test';
+import { afterEach, describe, it } from 'node:test';
 import assert from 'node:assert/strict';
 import http2 from 'http2';
 import { isCascadeTransportError } from '../src/client.js';
-import { chatStreamError, isUpstreamDeadlineExceeded, isUpstreamTransientError, redactRequestLogText } from '../src/handlers/chat.js';
+import { addAccountByKey, getApiKey, removeAccount } from '../src/auth.js';
+import {
+  chatStreamError,
+  finishPartialStreamAfterError,
+  handleChatCompletions,
+  isUpstreamDeadlineExceeded,
+  isUpstreamTransientError,
+  redactRequestLogText,
+} from '../src/handlers/chat.js';
 import { handleMessages } from '../src/handlers/messages.js';
 
+const createdAccountIds = [];
+
+afterEach(() => {
+  while (createdAccountIds.length) {
+    removeAccount(createdAccountIds.pop());
+  }
+});
+
 function parseEvents(raw) {
   return raw.trim().split('\n\n').filter(Boolean).map(frame => {
     const lines = frame.split('\n');
@@ -102,6 +118,124 @@ describe('stream error protocol', () => {
     assert.equal(events[0].data.error.type, 'upstream_transient_error');
   });
 
+  it('closes partial OpenAI streams without appending an error JSON frame', () => {
+    const res = fakeRes();
+    const send = (data) => res.write(`data: ${JSON.stringify(data)}\n\n`);
+
+    send({
+      id: 'chatcmpl_partial',
+      object: 'chat.completion.chunk',
+      created: 1,
+      model: 'claude-sonnet-4.6',
+      choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }],
+    });
+    send({
+      id: 'chatcmpl_partial',
+      object: 'chat.completion.chunk',
+      created: 1,
+      model: 'claude-sonnet-4.6',
+      choices: [{ index: 0, delta: { content: 'partial answer' }, finish_reason: null }],
+    });
+
+    finishPartialStreamAfterError({
+      id: 'chatcmpl_partial',
+      created: 1,
+      model: 'claude-sonnet-4.6',
+      send,
+      res,
+    });
+    res.end();
+
+    assert.equal(res.body.includes('"error"'), false);
+    const frames = res.body
+      .split('\n\n')
+      .filter(Boolean)
+      .map(frame => frame.split('\n').find(line => line.startsWith('data: '))?.slice(6))
+      .filter(Boolean);
+    assert.equal(frames.at(-1), '[DONE]');
+    const finish = JSON.parse(frames.at(-2));
+    assert.equal(finish.choices[0].finish_reason, 'stop');
+  });
+
+  it('does not append stream error JSON after content already reached the client', async () => {
+    const account = addAccountByKey(`partial-deadline-${Date.now()}-${Math.random().toString(36).slice(2)}`, 'partial-deadline');
+    createdAccountIds.push(account.id);
+
+    class PartialDeadlineClient {
+      async cascadeChat(_messages, _modelEnum, _modelUid, opts = {}) {
+        opts.onChunk({ text: 'partial answer' });
+        throw new Error('Encountered retryable error from model provider: context deadline exceeded (Client.Timeout or context cancellation while reading body)');
+      }
+    }
+
+    const result = await handleChatCompletions({
+      model: 'gemini-2.5-flash',
+      stream: true,
+      messages: [{ role: 'user', content: 'write a long answer' }],
+    }, {
+      async waitForAccount(tried, _signal, _maxWaitMs, modelKey) {
+        return tried.length === 0 ? getApiKey(tried, modelKey) : null;
+      },
+      async ensureLs() {},
+      getLsFor() {
+        return { port: 17777, csrfToken: 'csrf', generation: 1 };
+      },
+      WindsurfClient: PartialDeadlineClient,
+    });
+
+    assert.equal(result.status, 200);
+    assert.equal(result.stream, true);
+
+    const res = fakeRes();
+    await result.handler(res);
+
+    assert.match(res.body, /partial answer/);
+    assert.equal(res.body.includes('"error"'), false);
+    const frames = res.body
+      .split('\n\n')
+      .filter(Boolean)
+      .filter(frame => !frame.startsWith(':'))
+      .map(frame => frame.split('\n').find(line => line.startsWith('data: '))?.slice(6))
+      .filter(Boolean);
+    assert.equal(frames.at(-1), '[DONE]');
+    const finish = JSON.parse(frames.at(-2));
+    assert.equal(finish.choices[0].finish_reason, 'stop');
+  });
+
+  it('still sends a structured stream error when only an empty role chunk was emitted', async () => {
+    const account = addAccountByKey(`empty-deadline-${Date.now()}-${Math.random().toString(36).slice(2)}`, 'empty-deadline');
+    createdAccountIds.push(account.id);
+
+    class EmptyThenDeadlineClient {
+      async cascadeChat(_messages, _modelEnum, _modelUid, opts = {}) {
+        opts.onChunk({ text: '' });
+        throw new Error('Encountered retryable error from model provider: context deadline exceeded (Client.Timeout or context cancellation while reading body)');
+      }
+    }
+
+    const result = await handleChatCompletions({
+      model: 'gemini-2.5-flash',
+      stream: true,
+      messages: [{ role: 'user', content: 'hi' }],
+    }, {
+      async waitForAccount(tried, _signal, _maxWaitMs, modelKey) {
+        return tried.length === 0 ? getApiKey(tried, modelKey) : null;
+      },
+      async ensureLs() {},
+      getLsFor() {
+        return { port: 17777, csrfToken: 'csrf', generation: 1 };
+      },
+      WindsurfClient: EmptyThenDeadlineClient,
+    });
+
+    const res = fakeRes();
+    await result.handler(res);
+
+    assert.match(res.body, /"error"/);
+    assert.match(res.body, /"type":"upstream_deadline_exceeded"/);
+    assert.match(res.body, /data: \[DONE\]/);
+  });
+
   it('routes oversized Connect frame parser errors to onError without throwing from data handlers', async () => {
     const previousProtocol = process.env.GRPC_PROTOCOL;
     process.env.GRPC_PROTOCOL = 'connect';

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "windsurf-api",`
`3`		`- "version": "2.0.141",`
	`3`	`+ "version": "2.0.142",`
`4`	`4`	`"description": "Windsurf to OpenAI + Anthropic compatible API proxy. Turns Windsurf's 107 AI models (Claude, GPT, Gemini, DeepSeek, Grok, Qwen, Kimi, GLM, SWE) into dual-protocol API endpoints. Zero npm deps.",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "src/index.js",`