Skip to content

Commit 72e1b9c

Browse files
committed
fix: clean partial stream error tails
1 parent 81f6156 commit 72e1b9c

4 files changed

Lines changed: 175 additions & 15 deletions

File tree

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
## v2.0.142 - partial stream cleanup
2+
3+
This release keeps native bridge defaults unchanged.
4+
5+
### Cursor / streaming compatibility
6+
7+
- Fixed the OpenAI streaming error tail after partial assistant content was
8+
already delivered. The stream now finishes with a normal `finish_reason:
9+
"stop"` chunk and `[DONE]` instead of appending a structured `{"error": ...}`
10+
frame after user-visible content.
11+
- Empty streams that fail before any real content/tool/thinking payload is sent
12+
still return the structured stream error frame, so clients keep actionable
13+
diagnostics when no answer was delivered.
14+
15+
### Validation
16+
17+
- Added regression coverage for partial upstream deadline failures after content
18+
is emitted, plus the opposite case where only an empty role chunk was emitted.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "windsurf-api",
3-
"version": "2.0.141",
3+
"version": "2.0.142",
44
"description": "Windsurf to OpenAI + Anthropic compatible API proxy. Turns Windsurf's 107 AI models (Claude, GPT, Gemini, DeepSeek, Grok, Qwen, Kimi, GLM, SWE) into dual-protocol API endpoints. Zero npm deps.",
55
"type": "module",
66
"main": "src/index.js",

src/handlers/chat.js

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,19 @@ export function chatStreamError(message, type = 'upstream_error', code = null) {
292292
return { error: { message: sanitizeText(message || 'Upstream stream error'), type, code } };
293293
}
294294

295+
export function finishPartialStreamAfterError({ id, created, model, send, res }) {
296+
if (typeof send === 'function') {
297+
send({
298+
id,
299+
object: 'chat.completion.chunk',
300+
created,
301+
model,
302+
choices: [{ index: 0, delta: {}, finish_reason: 'stop' }],
303+
});
304+
}
305+
if (res && !res.writableEnded) res.write('data: [DONE]\n\n');
306+
}
307+
295308
/**
296309
* v2.0.71 (#115 server-side fabricate detection): when a tool-emulation
297310
* request comes back with `markers=none` AND the model output looks like
@@ -3119,6 +3132,7 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
31193132
// Accumulate chunks so we can cache a successful response at the end.
31203133
let accText = '';
31213134
let accThinking = '';
3135+
let emittedClientPayload = false;
31223136

31233137
// Cascade conversation pool (stream path). Opus 4.7 tool-emulated
31243138
// requests opt in even when the global experiment toggle is off, because
@@ -3172,17 +3186,20 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
31723186
// middle of a stream (fence might straddle a chunk, and we'd need
31733187
// lookahead). On finish we'll emit one clean JSON payload.
31743188
if (wantJson) return;
3189+
emittedClientPayload = true;
31753190
send({ id, object: 'chat.completion.chunk', created, model,
31763191
choices: [{ index: 0, delta: { content: clean }, finish_reason: null }] });
31773192
};
31783193
const emitThinking = (clean) => {
31793194
if (!clean) return;
31803195
accThinking += clean;
3196+
emittedClientPayload = true;
31813197
send({ id, object: 'chat.completion.chunk', created, model,
31823198
choices: [{ index: 0, delta: { reasoning_content: clean }, finish_reason: null }] });
31833199
};
31843200

31853201
const emitToolCallDelta = (tc, idx) => {
3202+
emittedClientPayload = true;
31863203
send({ id, object: 'chat.completion.chunk', created, model,
31873204
choices: [{ index: 0, delta: {
31883205
tool_calls: [{
@@ -3840,23 +3857,14 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
38403857
log.info(`Chat[${reqId}]: stream reuse entry was invalidated (cascade not_found upstream); not restoring to pool`);
38413858
}
38423859

3843-
if (hadSuccess) {
3860+
if (emittedClientPayload) {
38443861
// We already streamed real assistant content. Injecting
38453862
// "[Error: ...]" as a content delta here would corrupt the
38463863
// assistant message (clients display it verbatim as model
38473864
// output). Close cleanly with a plain stop — the caller saw
38483865
// whatever partial content we produced. Error details only
38493866
// go to the server log.
3850-
const errType = allInternal
3851-
? 'upstream_transient_error'
3852-
: deadlineExceeded
3853-
? 'upstream_deadline_exceeded'
3854-
: poolExhausted
3855-
? 'ls_pool_exhausted'
3856-
: (temporaryUnavailable.allUnavailable || lastErr?.type === 'rate_limit_exceeded')
3857-
? 'rate_limit_exceeded'
3858-
: 'upstream_error';
3859-
send(chatStreamError(errMsg, errType, deadlineExceeded ? 'windsurf_provider_deadline' : null));
3867+
finishPartialStreamAfterError({ id, created, model, send, res });
38603868
log.warn(`Stream: partial response delivered then failed (${errMsg})`);
38613869
} else {
38623870
const errType = allInternal
@@ -3870,7 +3878,7 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
38703878
: 'upstream_error';
38713879
send(chatStreamError(errMsg, errType, deadlineExceeded ? 'windsurf_provider_deadline' : null));
38723880
}
3873-
res.write('data: [DONE]\n\n');
3881+
if (!emittedClientPayload) res.write('data: [DONE]\n\n');
38743882
} catch {}
38753883
if (!res.writableEnded) res.end();
38763884
} finally {

test/stream-error.test.js

Lines changed: 136 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,26 @@
1-
import { describe, it } from 'node:test';
1+
import { afterEach, describe, it } from 'node:test';
22
import assert from 'node:assert/strict';
33
import http2 from 'http2';
44
import { isCascadeTransportError } from '../src/client.js';
5-
import { chatStreamError, isUpstreamDeadlineExceeded, isUpstreamTransientError, redactRequestLogText } from '../src/handlers/chat.js';
5+
import { addAccountByKey, getApiKey, removeAccount } from '../src/auth.js';
6+
import {
7+
chatStreamError,
8+
finishPartialStreamAfterError,
9+
handleChatCompletions,
10+
isUpstreamDeadlineExceeded,
11+
isUpstreamTransientError,
12+
redactRequestLogText,
13+
} from '../src/handlers/chat.js';
614
import { handleMessages } from '../src/handlers/messages.js';
715

16+
const createdAccountIds = [];
17+
18+
afterEach(() => {
19+
while (createdAccountIds.length) {
20+
removeAccount(createdAccountIds.pop());
21+
}
22+
});
23+
824
function parseEvents(raw) {
925
return raw.trim().split('\n\n').filter(Boolean).map(frame => {
1026
const lines = frame.split('\n');
@@ -102,6 +118,124 @@ describe('stream error protocol', () => {
102118
assert.equal(events[0].data.error.type, 'upstream_transient_error');
103119
});
104120

121+
it('closes partial OpenAI streams without appending an error JSON frame', () => {
122+
const res = fakeRes();
123+
const send = (data) => res.write(`data: ${JSON.stringify(data)}\n\n`);
124+
125+
send({
126+
id: 'chatcmpl_partial',
127+
object: 'chat.completion.chunk',
128+
created: 1,
129+
model: 'claude-sonnet-4.6',
130+
choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }],
131+
});
132+
send({
133+
id: 'chatcmpl_partial',
134+
object: 'chat.completion.chunk',
135+
created: 1,
136+
model: 'claude-sonnet-4.6',
137+
choices: [{ index: 0, delta: { content: 'partial answer' }, finish_reason: null }],
138+
});
139+
140+
finishPartialStreamAfterError({
141+
id: 'chatcmpl_partial',
142+
created: 1,
143+
model: 'claude-sonnet-4.6',
144+
send,
145+
res,
146+
});
147+
res.end();
148+
149+
assert.equal(res.body.includes('"error"'), false);
150+
const frames = res.body
151+
.split('\n\n')
152+
.filter(Boolean)
153+
.map(frame => frame.split('\n').find(line => line.startsWith('data: '))?.slice(6))
154+
.filter(Boolean);
155+
assert.equal(frames.at(-1), '[DONE]');
156+
const finish = JSON.parse(frames.at(-2));
157+
assert.equal(finish.choices[0].finish_reason, 'stop');
158+
});
159+
160+
it('does not append stream error JSON after content already reached the client', async () => {
161+
const account = addAccountByKey(`partial-deadline-${Date.now()}-${Math.random().toString(36).slice(2)}`, 'partial-deadline');
162+
createdAccountIds.push(account.id);
163+
164+
class PartialDeadlineClient {
165+
async cascadeChat(_messages, _modelEnum, _modelUid, opts = {}) {
166+
opts.onChunk({ text: 'partial answer' });
167+
throw new Error('Encountered retryable error from model provider: context deadline exceeded (Client.Timeout or context cancellation while reading body)');
168+
}
169+
}
170+
171+
const result = await handleChatCompletions({
172+
model: 'gemini-2.5-flash',
173+
stream: true,
174+
messages: [{ role: 'user', content: 'write a long answer' }],
175+
}, {
176+
async waitForAccount(tried, _signal, _maxWaitMs, modelKey) {
177+
return tried.length === 0 ? getApiKey(tried, modelKey) : null;
178+
},
179+
async ensureLs() {},
180+
getLsFor() {
181+
return { port: 17777, csrfToken: 'csrf', generation: 1 };
182+
},
183+
WindsurfClient: PartialDeadlineClient,
184+
});
185+
186+
assert.equal(result.status, 200);
187+
assert.equal(result.stream, true);
188+
189+
const res = fakeRes();
190+
await result.handler(res);
191+
192+
assert.match(res.body, /partial answer/);
193+
assert.equal(res.body.includes('"error"'), false);
194+
const frames = res.body
195+
.split('\n\n')
196+
.filter(Boolean)
197+
.filter(frame => !frame.startsWith(':'))
198+
.map(frame => frame.split('\n').find(line => line.startsWith('data: '))?.slice(6))
199+
.filter(Boolean);
200+
assert.equal(frames.at(-1), '[DONE]');
201+
const finish = JSON.parse(frames.at(-2));
202+
assert.equal(finish.choices[0].finish_reason, 'stop');
203+
});
204+
205+
it('still sends a structured stream error when only an empty role chunk was emitted', async () => {
206+
const account = addAccountByKey(`empty-deadline-${Date.now()}-${Math.random().toString(36).slice(2)}`, 'empty-deadline');
207+
createdAccountIds.push(account.id);
208+
209+
class EmptyThenDeadlineClient {
210+
async cascadeChat(_messages, _modelEnum, _modelUid, opts = {}) {
211+
opts.onChunk({ text: '' });
212+
throw new Error('Encountered retryable error from model provider: context deadline exceeded (Client.Timeout or context cancellation while reading body)');
213+
}
214+
}
215+
216+
const result = await handleChatCompletions({
217+
model: 'gemini-2.5-flash',
218+
stream: true,
219+
messages: [{ role: 'user', content: 'hi' }],
220+
}, {
221+
async waitForAccount(tried, _signal, _maxWaitMs, modelKey) {
222+
return tried.length === 0 ? getApiKey(tried, modelKey) : null;
223+
},
224+
async ensureLs() {},
225+
getLsFor() {
226+
return { port: 17777, csrfToken: 'csrf', generation: 1 };
227+
},
228+
WindsurfClient: EmptyThenDeadlineClient,
229+
});
230+
231+
const res = fakeRes();
232+
await result.handler(res);
233+
234+
assert.match(res.body, /"error"/);
235+
assert.match(res.body, /"type":"upstream_deadline_exceeded"/);
236+
assert.match(res.body, /data: \[DONE\]/);
237+
});
238+
105239
it('routes oversized Connect frame parser errors to onError without throwing from data handlers', async () => {
106240
const previousProtocol = process.env.GRPC_PROTOCOL;
107241
process.env.GRPC_PROTOCOL = 'connect';

0 commit comments

Comments
 (0)