Skip to content

Commit fc57ae1

Browse files
committed
fix(native): narrow bridge canary defaults
1 parent faeee57 commit fc57ae1

9 files changed

Lines changed: 201 additions & 45 deletions

docs/native-bridge-protocol-notes.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@
33
Status: reverse-engineering notes for the opt-in native bridge. Nothing here
44
is a default production enablement decision.
55

6+
## Production Gate Status
7+
8+
Default production canary scope is intentionally limited to
9+
`Bash` / `shell_command` / `run_command`.
10+
11+
`Read`, `Grep`, and `Glob` stay in `TOOL_MAP` for protocol matrix testing, but
12+
they are not in the default native bridge tool allowlist. To test them, set
13+
`WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS=Read,Bash,Grep,Glob` or a narrower list
14+
for a gated account/API key/model. Do not treat successful protobuf
15+
encode/decode round-trips as production readiness.
16+
617
## Confirmed Tool Config Fields
718

819
`CascadeToolConfig`:
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
## v2.0.123 - Native bridge gate hardening and upstream deadline diagnostics
2+
3+
- Native bridge default production canary scope is now limited to
4+
`Bash` / `shell_command` / `run_command`. `Read`, `Grep`, and `Glob` remain
5+
available for protocol matrix work, but require an explicit
6+
`WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS=...` allowlist before they route through
7+
native bridge.
8+
- Documented the distinction between protobuf encode/decode coverage and
9+
production readiness in `docs/native-bridge-protocol-notes.md`.
10+
- `context deadline exceeded` / `Client.Timeout or context cancellation while
11+
reading body` is now classified as `upstream_deadline_exceeded` with code
12+
`windsurf_provider_deadline`, instead of being folded into generic transient
13+
upstream errors.
14+
- Stream and non-stream paths both keep invalidating half-finished cascade reuse
15+
entries after provider deadline failures.
16+
17+
Verification:
18+
19+
- `node --test test\*.test.js` passes: 1014/1014.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "windsurf-api",
3-
"version": "2.0.122",
3+
"version": "2.0.123",
44
"description": "Windsurf to OpenAI + Anthropic compatible API proxy. Turns Windsurf's 107 AI models (Claude, GPT, Gemini, DeepSeek, Grok, Qwen, Kimi, GLM, SWE) into dual-protocol API endpoints. Zero npm deps.",
55
"type": "module",
66
"main": "src/index.js",

src/cascade-native-bridge.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,12 @@ export const CASCADE_STEP = {
7676
export const CASCADE_STEP_STATUS_DONE = 3;
7777

7878
const DEFAULT_NATIVE_BRIDGE_TOOLS = new Set([
79-
'Read', 'read_file', 'view_file',
79+
// Default scope is intentionally narrow: real smoke has only proven the
80+
// command path stable enough for opt-in production canaries. Read/Grep/Glob
81+
// translators remain in TOOL_MAP for protocol matrix work, but must be
82+
// explicitly allowlisted with WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS before
83+
// they are routed through the native bridge.
8084
'Bash', 'shell_command', 'run_command',
81-
'Grep', 'grep_v2', 'grep_search', 'grep_search_v2',
82-
'Glob', 'find', 'list_dir', 'list_directory',
8385
]);
8486

8587
// ─── argument translators ─────────────────────────────────────────

src/handlers/chat.js

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,19 @@ async function internalErrorBackoff(retryIdx) {
9999
return ms;
100100
}
101101

102+
const UPSTREAM_DEADLINE_RE = /context deadline exceeded|context cancellation while reading body|client\.timeout/i;
103+
104+
export function isUpstreamDeadlineExceeded(errOrMessage) {
105+
const msg = typeof errOrMessage === 'string'
106+
? errOrMessage
107+
: String(errOrMessage?.message || '');
108+
return UPSTREAM_DEADLINE_RE.test(msg);
109+
}
110+
111+
function upstreamDeadlineExceededMessage(model) {
112+
return `${model} hit the upstream Windsurf provider deadline (~240s): model thinking/output ran longer than the single Cascade stream window. This is not controlled by WindsurfAPI timeout env vars. Split the task, lower reasoning/max output, or use a faster model.`;
113+
}
114+
102115
function upstreamTransientErrorMessage(model, triedCount, reason = 'internal_error') {
103116
const detail = reason === 'cascade_transport'
104117
? 'Cascade/语言服务器 HTTP/2 流被取消'
@@ -2100,7 +2113,7 @@ async function _handleChatCompletionsInner(body, context = {}) {
21002113
// rationale (cascade trajectory left half-broken, next reuse hits
21012114
// it and the model "loses" the prior conversation).
21022115
const _resultMsg = String(result.body?.error?.message || '');
2103-
if (/context deadline exceeded|context cancellation while reading body|client\.timeout/i.test(_resultMsg)) {
2116+
if (isUpstreamDeadlineExceeded(_resultMsg)) {
21042117
reuseEntryDead = true;
21052118
}
21062119
lastErr = result;
@@ -2150,6 +2163,9 @@ async function _handleChatCompletionsInner(body, context = {}) {
21502163
continue;
21512164
}
21522165
// Cascade transient 错误通常是上游或本地 LS 短暂抖动,先退避再切账号,避免连续打爆同一热窗口。
2166+
if (errType === 'upstream_deadline_exceeded') {
2167+
break;
2168+
}
21532169
if (errType === 'upstream_internal_error' || errType === 'upstream_transient_error') {
21542170
if (acct?._sticky && isExperimentalEnabled('stickyNoFallback')) {
21552171
log.warn(`Chat[${reqId}]: ${acct.email} (sticky-bound) upstream transient error, stickyNoFallback enabled — not trying other accounts`);
@@ -2663,8 +2679,9 @@ async function nonStreamResponse(client, id, created, model, modelKey, messages,
26632679
const isAuthFail = /unauthenticated|invalid api key|invalid_grant|permission_denied.*account/i.test(err.message);
26642680
const isRateLimit = /rate limit|rate_limit|too many requests|quota/i.test(err.message);
26652681
const isInternal = /internal error occurred.*error id/i.test(err.message);
2682+
const isDeadline = isUpstreamDeadlineExceeded(err);
26662683
const isTransport = isCascadeTransportError(err);
2667-
const isTransient = isUpstreamTransientError(err, isInternal);
2684+
const isTransient = !isDeadline && isUpstreamTransientError(err, isInternal);
26682685
// v2.0.61 (#113): Anthropic / OpenAI content-policy / verification
26692686
// challenges are NOT transient — rotating accounts won't help and
26702687
// wastes quota. Detect and short-circuit with a clean 451 + clear
@@ -2732,6 +2749,20 @@ async function nonStreamResponse(client, id, created, model, modelKey, messages,
27322749
};
27332750
}
27342751
}
2752+
if (isDeadline) {
2753+
return {
2754+
status: 504,
2755+
reuseEntryInvalid: !!err.reuseEntryInvalid,
2756+
body: {
2757+
error: {
2758+
message: upstreamDeadlineExceededMessage(model),
2759+
type: 'upstream_deadline_exceeded',
2760+
code: 'windsurf_provider_deadline',
2761+
upstream_message: sanitizeText(err.message).slice(0, 240),
2762+
},
2763+
},
2764+
};
2765+
}
27352766
return {
27362767
status: isTransient ? 502 : (err.isModelError ? 403 : 502),
27372768
reuseEntryInvalid: !!err.reuseEntryInvalid,
@@ -3452,14 +3483,15 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
34523483
// result with no earlier user prompts ("I can see the
34533484
// content from a previous tool call ... but I don't have
34543485
// the earlier conversation context").
3455-
if (/context deadline exceeded|context cancellation while reading body|client\.timeout/i.test(err.message || '')) {
3486+
const isDeadline = isUpstreamDeadlineExceeded(err);
3487+
if (isDeadline) {
34563488
reuseEntryDead = true;
34573489
}
34583490
const isAuthFail = /unauthenticated|invalid api key|invalid_grant|permission_denied.*account/i.test(err.message);
34593491
const isRateLimit = /rate limit|rate_limit|too many requests|quota/i.test(err.message);
34603492
const isInternal = /internal error occurred.*error id/i.test(err.message);
34613493
const isTransport = isCascadeTransportError(err);
3462-
const isTransient = isUpstreamTransientError(err, isInternal);
3494+
const isTransient = !isDeadline && isUpstreamTransientError(err, isInternal);
34633495
// v2.0.61 (#113) — same policy detection as nonStreamResponse.
34643496
const isPolicyBlocked = /cyber\s*verification|content[\s_-]+policy|policy[\s_-]+(?:violation|blocked|denied)|safety[\s_-]+(?:policy|blocked)|prompt[\s_-]+(?:rejected|blocked)\s+by[\s_-]+policy|usage[\s_-]+policy[\s_-]+violation/i.test(err.message);
34653497
if (isAuthFail) reportError(currentApiKey);
@@ -3511,6 +3543,11 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
35113543
log.warn(`Chat[${reqId}] stream: policy_blocked on ${currentApiKey?.slice(0, 12)}..., not retrying`);
35123544
break;
35133545
}
3546+
if (isDeadline) {
3547+
err.type = 'upstream_deadline_exceeded';
3548+
err.code = 'windsurf_provider_deadline';
3549+
break;
3550+
}
35143551
// Retry only if nothing has been streamed yet AND it's a retryable error
35153552
if (!hadSuccess && (err.isModelError || isRateLimit)) {
35163553
if (acct?._sticky && isExperimentalEnabled('stickyNoFallback')) {
@@ -3546,10 +3583,13 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
35463583
const rl = isAllRateLimited(modelKey);
35473584
const allInternal = streamInternalCount > 0 && tried.length > 0 && streamInternalCount >= tried.length;
35483585
const poolExhausted = isLsPoolExhausted(lastErr);
3586+
const deadlineExceeded = isUpstreamDeadlineExceeded(lastErr) || lastErr?.type === 'upstream_deadline_exceeded';
35493587
// 优先暴露 upstream_transient,避免把 Cascade transport 抖动误报成账号限流。
35503588
const lastIsTransport = isCascadeTransportError(lastErr);
35513589
const errMsg = allInternal
35523590
? upstreamTransientErrorMessage(model, tried.length, lastIsTransport ? 'cascade_transport' : 'internal_error')
3591+
: deadlineExceeded
3592+
? upstreamDeadlineExceededMessage(model)
35533593
: poolExhausted
35543594
? sanitizeText(lastErr?.message || 'language server pool exhausted')
35553595
: temporaryUnavailable.allUnavailable
@@ -3576,22 +3616,26 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad
35763616
// go to the server log.
35773617
const errType = allInternal
35783618
? 'upstream_transient_error'
3619+
: deadlineExceeded
3620+
? 'upstream_deadline_exceeded'
35793621
: poolExhausted
35803622
? 'ls_pool_exhausted'
35813623
: (temporaryUnavailable.allUnavailable || lastErr?.type === 'rate_limit_exceeded')
35823624
? 'rate_limit_exceeded'
35833625
: 'upstream_error';
3584-
send(chatStreamError(errMsg, errType));
3626+
send(chatStreamError(errMsg, errType, deadlineExceeded ? 'windsurf_provider_deadline' : null));
35853627
log.warn(`Stream: partial response delivered then failed (${errMsg})`);
35863628
} else {
35873629
const errType = allInternal
35883630
? 'upstream_transient_error'
3631+
: deadlineExceeded
3632+
? 'upstream_deadline_exceeded'
35893633
: poolExhausted
35903634
? 'ls_pool_exhausted'
35913635
: (temporaryUnavailable.allUnavailable || lastErr?.type === 'rate_limit_exceeded')
35923636
? 'rate_limit_exceeded'
35933637
: 'upstream_error';
3594-
send(chatStreamError(errMsg, errType));
3638+
send(chatStreamError(errMsg, errType, deadlineExceeded ? 'windsurf_provider_deadline' : null));
35953639
}
35963640
res.write('data: [DONE]\n\n');
35973641
} catch {}

test/cascade-native-bridge.test.js

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,16 @@ import {
3434
const fnTool = (name) => ({ type: 'function', function: { name, parameters: { type: 'object' } } });
3535

3636
describe('canMapAllTools', () => {
37-
it('admits a homogeneous mapped set', () => {
38-
assert.equal(canMapAllTools([fnTool('Read'), fnTool('Bash'), fnTool('Glob')]), true);
37+
it('admits only mature default-native tools without an explicit tool allowlist', () => {
38+
const prev = process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
39+
delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
40+
try {
41+
assert.equal(canMapAllTools([fnTool('Bash'), fnTool('shell_command'), fnTool('run_command')]), true);
42+
assert.equal(canMapAllTools([fnTool('Read'), fnTool('Bash'), fnTool('Glob')]), false);
43+
} finally {
44+
if (prev === undefined) delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
45+
else process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = prev;
46+
}
3947
});
4048

4149
it('rejects when ANY tool is unmapped', () => {
@@ -48,12 +56,29 @@ describe('canMapAllTools', () => {
4856
assert.equal(canMapAllTools(undefined), false);
4957
});
5058

51-
it('admits Codex-style cascade-native names', () => {
52-
assert.equal(canMapAllTools([fnTool('view_file'), fnTool('run_command'), fnTool('find')]), true);
59+
it('admits Codex-style command names by default and other native names only when allowlisted', () => {
60+
const prev = process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
61+
delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
62+
try {
63+
assert.equal(canMapAllTools([fnTool('run_command')]), true);
64+
assert.equal(canMapAllTools([fnTool('view_file'), fnTool('run_command'), fnTool('find')]), false);
65+
process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = 'view_file,run_command,find';
66+
assert.equal(canMapAllTools([fnTool('view_file'), fnTool('run_command'), fnTool('find')]), true);
67+
} finally {
68+
if (prev === undefined) delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
69+
else process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = prev;
70+
}
5371
});
5472

55-
it('admits mixed Claude Code + Codex names', () => {
56-
assert.equal(canMapAllTools([fnTool('Read'), fnTool('run_command'), fnTool('Grep')]), true);
73+
it('admits mixed Claude Code + Codex names when explicitly allowlisted', () => {
74+
const prev = process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
75+
process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = 'Read,run_command,Grep';
76+
try {
77+
assert.equal(canMapAllTools([fnTool('Read'), fnTool('run_command'), fnTool('Grep')]), true);
78+
} finally {
79+
if (prev === undefined) delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
80+
else process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = prev;
81+
}
5782
});
5883
});
5984

@@ -119,7 +144,9 @@ describe('shouldUseNativeBridge — auto-on heuristic', () => {
119144

120145
it('explicit env override forces on for any mapped tool set (deployer opting into remote execution)', () => {
121146
const orig = process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE;
147+
const toolsOrig = process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
122148
process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE = '1';
149+
process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = 'Read,Bash';
123150
try {
124151
assert.equal(
125152
shouldUseNativeBridge(tools, { modelKey: 'claude-sonnet-4-6', provider: 'anthropic', route: 'chat' }),
@@ -133,12 +160,16 @@ describe('shouldUseNativeBridge — auto-on heuristic', () => {
133160
} finally {
134161
if (orig === undefined) delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE;
135162
else process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE = orig;
163+
if (toolsOrig === undefined) delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
164+
else process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = toolsOrig;
136165
}
137166
});
138167

139168
it('all_mapped mode enables only when every function tool maps', () => {
140169
const orig = process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE;
170+
const toolsOrig = process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
141171
process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE = 'all_mapped';
172+
process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = 'Read,Bash,Grep,Glob';
142173
try {
143174
assert.equal(
144175
shouldUseNativeBridge([fnTool('Read'), fnTool('Bash'), fnTool('Grep'), fnTool('Glob')], {
@@ -155,6 +186,8 @@ describe('shouldUseNativeBridge — auto-on heuristic', () => {
155186
} finally {
156187
if (orig === undefined) delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE;
157188
else process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE = orig;
189+
if (toolsOrig === undefined) delete process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS;
190+
else process.env.WINDSURFAPI_NATIVE_TOOL_BRIDGE_TOOLS = toolsOrig;
158191
}
159192
});
160193

@@ -639,10 +672,11 @@ describe('partitionTools — v2.0.66 mixed-mapping splitter', () => {
639672
assert.equal(part.unmapped.length, 2);
640673
});
641674

642-
it('Claude Code-style (all mapped) → unmapped is empty', () => {
675+
it('Claude Code-style tools keep Read/Glob out of the default native scope', () => {
643676
const part = partitionTools([fnTool('Read'), fnTool('Bash'), fnTool('Glob')]);
644677
assert.equal(part.hasAny, true);
645-
assert.equal(part.unmapped.length, 0);
678+
assert.deepEqual(part.mapped.map(t => t.function.name), ['Bash']);
679+
assert.deepEqual(part.unmapped.map(t => t.function.name), ['Read', 'Glob']);
646680
});
647681

648682
it('skips non-function entries gracefully', () => {
@@ -652,8 +686,8 @@ describe('partitionTools — v2.0.66 mixed-mapping splitter', () => {
652686
{ type: 'function' }, // missing function.name
653687
{ type: 'function', function: { name: '' } },
654688
]);
655-
assert.equal(part.mapped.length, 1);
656-
assert.equal(part.unmapped.length, 0);
689+
assert.equal(part.mapped.length, 0);
690+
assert.equal(part.unmapped.length, 1);
657691
});
658692
});
659693

@@ -689,7 +723,7 @@ describe('TOOL_MAP — codex CLI 0.128 shell_command mapping (v2.0.66)', () => {
689723
describe('canMapAllTools (legacy strict gate, kept for compat)', () => {
690724
it('still returns false when ANY tool is unmapped', () => {
691725
assert.equal(canMapAllTools([fnTool('Read'), fnTool('get_weather')]), false);
692-
assert.equal(canMapAllTools([fnTool('Read'), fnTool('Bash'), fnTool('Glob')]), true);
726+
assert.equal(canMapAllTools([fnTool('Read'), fnTool('Bash'), fnTool('Glob')]), false);
693727
});
694728
});
695729

test/cascade-timeout-invalidation.test.js

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,8 @@ describe('upstream-timeout cascade invalidation (#101)', () => {
3737
const m = CHAT_JS.match(/lastErr = err;\s+reuseEntry = null;[\s\S]{0,1500}?const isAuthFail = /);
3838
assert.ok(m, 'stream catch block region not found — refactor may have changed shape');
3939
const region = m[0];
40-
assert.match(region, /context deadline exceeded/i,
41-
'stream timeout regex must mention "context deadline exceeded"');
42-
assert.match(region, /context cancellation while reading body/i,
43-
'stream timeout regex must mention "context cancellation while reading body"');
44-
assert.match(region, /client\\?\.timeout/i,
45-
'stream timeout regex must include Client.Timeout fallback');
40+
assert.match(region, /isUpstreamDeadlineExceeded\(err\)/,
41+
'stream timeout branch must use the shared upstream deadline classifier');
4642
assert.match(region, /reuseEntryDead = true/,
4743
'stream timeout branch must set reuseEntryDead = true');
4844
});
@@ -56,12 +52,20 @@ describe('upstream-timeout cascade invalidation (#101)', () => {
5652
const m = CHAT_JS.match(/if \(result\.reuseEntryInvalid\) reuseEntryDead = true;[\s\S]{0,800}?lastErr = result;/);
5753
assert.ok(m, 'non-stream invalidation region not found — refactor may have changed shape');
5854
const region = m[0];
59-
assert.match(region, /context deadline exceeded/i);
60-
assert.match(region, /context cancellation while reading body/i);
61-
assert.match(region, /client\\?\.timeout/i);
55+
assert.match(region, /isUpstreamDeadlineExceeded\(_resultMsg\)/,
56+
'non-stream timeout branch must use the shared upstream deadline classifier');
6257
assert.match(region, /reuseEntryDead = true/);
6358
});
6459

60+
test('shared classifier keeps all upstream deadline patterns', () => {
61+
const m = CHAT_JS.match(/const UPSTREAM_DEADLINE_RE = ([^\n;]+);/);
62+
assert.ok(m, 'shared upstream deadline regex not found');
63+
const pattern = m[1];
64+
assert.match(pattern, /context deadline exceeded/i);
65+
assert.match(pattern, /context cancellation while reading body/i);
66+
assert.match(pattern, /client\\?\.timeout/i);
67+
});
68+
6569
test('regex actually matches the user-reported error message verbatim', () => {
6670
// Real error from #101:
6771
// "Encountered retryable error from model provider: context

0 commit comments

Comments
 (0)