fix: add tool routing diagnostics

dwgx · dwgx · commit 81f61560a22e · 2026-06-07T01:04:31.000+09:00
diff --git a/README.md b/README.md
@@ -403,6 +403,9 @@ A: 主要是 `gemini-2.5-flash`、`glm-4.7` / `5` / `5.1`、`kimi-k2` / `k2.5` /
 **Q: 免费账号调工具稳吗**
 A: 看模型。Claude family `<tool_use>` 协议训练扎实最稳（free 账号若 entitled 也是优选）；GLM-4.7 / Kimi-K2.5 走 NLU 兜底 + `WINDSURFAPI_NLU_RETRY=1` retry-with-correction 多数 case 能调；GLM-5.1 在 cascade 后端经常空回复 proxy 救不动；GPT 系列受 cascade 协议层限制（不传 OpenAI tools[] schema）也不稳。**Claude Code / Cline / Codex 调本地文件 / 跑命令优先 `claude-haiku-4.5` 或 `claude-sonnet-4.6`**。
 
+**Q: 客户端显示“没有调用工具”，怎么排查**
+A: 先看日志里的 `ToolRoute[...]`。它会列出客户端声明的工具、`tool_choice` 过滤后的有效工具、native bridge 映射/未映射工具、preamble 降级层级，以及 `tool_choice_none` / `forced_tool_not_declared` / `preamble_compacted` / `native_bridge_*` 等原因。`/v1/messages` 和 `/v1/responses` 的 server-side 工具（如 Anthropic advisor/code_execution，OpenAI file_search/mcp/computer_use）如果代理没有实现，会在翻译层丢弃；这类工具不是普通 function tool，不等于 WindsurfAPI 已经能替客户端执行。native bridge 也不是“本地 IDE 工具修复开关”：默认安全路径仍是 prompt/tool emulation，由客户端本地执行工具；native bridge 是让 Windsurf 远端 workspace 执行 Cascade 内置工具，只适合有模型/账号/API key gate 的小流量实验。
+
 **Q: 31 个 trial 账号一会儿就全 unavailable**
 A: 八成是用了周限模型 — `claude-opus-4-7-max` / `gpt-5.5-xhigh` / `claude-sonnet-4-7-thinking` 这类高 reasoning effort 变体每个账号每周只有 5 次配额，31 号 × 5 次 ≈ 150 次就到顶。换 `claude-sonnet-4.6` / `claude-haiku-4.5` daily 配额比较宽松。`docker logs windsurfapi-windsurf-api-1 | grep rate_limit` 看每个账号的 cooldown 字段验证。
 
diff --git a/docs/releases/RELEASE_NOTES_2.0.141.md b/docs/releases/RELEASE_NOTES_2.0.141.md
@@ -0,0 +1,31 @@
+## v2.0.141 - tool routing diagnostics
+
+This release does not widen native bridge production defaults.
+
+### Tool routing diagnostics
+
+- Added `ToolRoute[...]` request logs for tool-bearing chat requests. The log
+  records requested tools, `tool_choice`-filtered tools, native mapped/unmapped
+  partitions, native bridge decision reason, tool preamble tier, and compact
+  routing reasons.
+- `/v1/responses` now drops a forced `tool_choice` when that choice points at
+  an unbridged server-side tool such as `file_search`, `computer_use_preview`,
+  or `mcp`. This prevents a translated request from carrying a forced tool that
+  no longer exists after flattening.
+- README now has a short FAQ explaining how to interpret "no tool calls" and
+  why native bridge is not a general local IDE tool fix.
+
+### WebFetch trace canaries
+
+- `scripts/native-bridge-smoke.mjs` now summarizes redacted
+  `webFetchTrace.state` values from proto trace JSONL files. Gated WebFetch
+  canaries can now report whether the LS reached `pending_permission`,
+  `completed_web_document`, `error`, or another known branch without manually
+  inspecting trace records.
+- The trace summary is diagnostic only and does not change smoke pass/fail
+  criteria.
+
+### Validation
+
+- Added regression coverage for Responses server-side `tool_choice` pruning,
+  tool routing diagnostics, and smoke WebFetch trace summaries.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "windsurf-api",
-  "version": "2.0.140",
+  "version": "2.0.141",
   "description": "Windsurf to OpenAI + Anthropic compatible API proxy. Turns Windsurf's 107 AI models (Claude, GPT, Gemini, DeepSeek, Grok, Qwen, Kimi, GLM, SWE) into dual-protocol API endpoints. Zero npm deps.",
   "type": "module",
   "main": "src/index.js",
diff --git a/scripts/native-bridge-smoke.mjs b/scripts/native-bridge-smoke.mjs
@@ -1,5 +1,8 @@
 #!/usr/bin/env node
 
+import { closeSync, existsSync, openSync, readSync, readdirSync, statSync } from 'node:fs';
+import { join } from 'node:path';
+
 const baseUrl = (process.env.BASE_URL || process.env.WINDSURFAPI_BASE_URL || 'http://127.0.0.1:3003').replace(/\/+$/, '');
 const apiKey = process.env.API_KEY || process.env.WINDSURFAPI_API_KEY || '';
 const model = process.env.MODEL || process.env.WINDSURFAPI_SMOKE_MODEL || 'claude-sonnet-4.6';
@@ -15,6 +18,10 @@ const requireNativeBridgeTool = process.env.NATIVE_BRIDGE_SMOKE_REQUIRE_NATIVE !
 const validateToolArgs = process.env.NATIVE_BRIDGE_SMOKE_VALIDATE_ARGS !== '0';
 const enforceLsBudget = process.env.NATIVE_BRIDGE_SMOKE_LS_BUDGET !== '0';
 const requireNativeBridgeEnabled = process.env.NATIVE_BRIDGE_SMOKE_REQUIRE_BRIDGE_ENABLED !== '0';
+const includeProtoTraceSummary = process.env.NATIVE_BRIDGE_SMOKE_PROTO_TRACE_SUMMARY !== '0';
+const protoTraceDir = process.env.NATIVE_BRIDGE_SMOKE_PROTO_TRACE_DIR
+  || process.env.WINDSURFAPI_PROTO_TRACE_DIR
+  || '/data/proto-trace';
 async function sha256Hex(text) {
   const bytes = new TextEncoder().encode(String(text || ''));
   const digest = await crypto.subtle.digest('SHA-256', bytes);
@@ -617,6 +624,84 @@ function nativeBridgeDecisionDelta(before, after) {
   };
 }
 
+function readTailText(file, maxBytes = 2 * 1024 * 1024) {
+  const stat = statSync(file);
+  const size = stat.size;
+  const start = Math.max(0, size - maxBytes);
+  const length = size - start;
+  const fd = openSync(file, 'r');
+  try {
+    const buf = Buffer.alloc(length);
+    readSync(fd, buf, 0, length, start);
+    return buf.toString('utf8');
+  } finally {
+    closeSync(fd);
+  }
+}
+
+function summarizeWebFetchTraceDir(dir = protoTraceDir) {
+  try {
+    if (!includeProtoTraceSummary) return null;
+    if (!dir || !existsSync(dir)) return { available: false, dir, reason: 'trace_dir_missing' };
+    const files = readdirSync(dir)
+      .filter(name => /GetCascadeTrajectorySteps.*\.jsonl$/i.test(name))
+      .map(name => {
+        const path = join(dir, name);
+        const stat = statSync(path);
+        return { name, path, mtimeMs: stat.mtimeMs, size: stat.size };
+      })
+      .sort((a, b) => b.mtimeMs - a.mtimeMs)
+      .slice(0, 6);
+    const stateCounts = {};
+    const recent = [];
+    let records = 0;
+    let parseErrors = 0;
+    for (const file of files) {
+      const lines = readTailText(file.path).split('\n').filter(Boolean);
+      for (const line of lines) {
+        let rec;
+        try {
+          rec = JSON.parse(line);
+        } catch {
+          parseErrors++;
+          continue;
+        }
+        records++;
+        const steps = rec?.semantic?.steps || [];
+        for (const step of steps) {
+          const trace = step?.webFetchTrace;
+          if (!trace?.state) continue;
+          stateCounts[trace.state] = (stateCounts[trace.state] || 0) + 1;
+          recent.push({
+            file: file.name,
+            method: rec.method || '',
+            direction: rec.direction || '',
+            stepIndex: step.index,
+            state: trace.state,
+            stepType: trace.stepType,
+            status: trace.status,
+            hasRequestedInteraction: !!trace.hasRequestedInteraction,
+            hasReadUrlOneof: !!trace.hasReadUrlOneof,
+            hasWebDocument: !!trace.hasWebDocument,
+            errorClassifications: trace.errorClassifications || {},
+          });
+        }
+      }
+    }
+    return {
+      available: true,
+      dir,
+      files: files.map(f => ({ name: f.name, size: f.size })),
+      records,
+      parseErrors,
+      stateCounts,
+      recent: recent.slice(-12),
+    };
+  } catch (error) {
+    return { available: false, dir, reason: 'trace_summary_failed', error: String(error?.message || error) };
+  }
+}
+
 const selected = expandScenarios(requestedScenarios);
 if (!selected.length) {
   console.error(`No valid scenarios selected. Use one or more of: ${Object.keys(SCENARIOS).join(',')},all`);
@@ -655,6 +740,7 @@ if (!failures.length) {
   }
 }
 const healthAfter = await fetchHealthSnapshot('after');
+const protoTraceSummary = summarizeWebFetchTraceDir();
 
 console.log(JSON.stringify({
   ok: failures.length === 0,
@@ -675,6 +761,7 @@ console.log(JSON.stringify({
   results,
   failures,
   nativeBridgeDecisionDelta: nativeBridgeDecisionDelta(healthBefore, healthAfter),
+  protoTraceSummary,
   healthBefore,
   healthAfter,
 }, null, 2));
diff --git a/src/handlers/chat.js b/src/handlers/chat.js
@@ -222,6 +222,54 @@ export function effectiveToolsForToolChoice(tools, toolChoice) {
   return tools.filter(t => (t?.function?.name || t?.name || '') === forced);
 }
 
+function toolNameList(tools) {
+  if (!Array.isArray(tools)) return [];
+  return tools.map(t => t?.function?.name || t?.name || '').filter(Boolean);
+}
+
+export function summarizeToolRoutingDiagnostics({ tools, effectiveTools, toolChoice, toolRouting, preambleBudget = null }) {
+  const requested = toolNameList(tools);
+  const effective = toolNameList(effectiveTools);
+  const forcedName = toolChoice && typeof toolChoice === 'object'
+    ? (toolChoice.function?.name || toolChoice.name || '')
+    : '';
+  const reasons = [];
+
+  if (toolChoice === 'none') reasons.push('tool_choice_none');
+  if (forcedName && requested.length && !requested.includes(forcedName)) reasons.push('forced_tool_not_declared');
+  if (requested.length && effective.length === 0 && toolChoice !== 'none') reasons.push('effective_tools_empty');
+  if (toolRouting?.nativeDecision?.reason) reasons.push(toolRouting.nativeDecision.reason);
+  if (toolRouting?.nativeBridgeOn) reasons.push('native_bridge_on');
+  if (preambleBudget?.tier) reasons.push(`preamble_${preambleBudget.tier}`);
+  if (preambleBudget?.compacted) reasons.push('preamble_compacted');
+  if (preambleBudget && preambleBudget.ok === false) reasons.push('preamble_too_large');
+
+  return {
+    requested,
+    effective,
+    mapped: toolNameList(toolRouting?.partition?.mapped || []),
+    unmapped: toolNameList(toolRouting?.partition?.unmapped || []),
+    nativeBridgeOn: !!toolRouting?.nativeBridgeOn,
+    nativeDecisionReason: toolRouting?.nativeDecision?.reason || '',
+    preambleTier: preambleBudget?.tier || null,
+    preambleBytes: preambleBudget?.finalBytes ?? null,
+    forcedName,
+    reasons: [...new Set(reasons)],
+  };
+}
+
+function logToolRoutingDiagnostics(reqId, diag) {
+  if (!diag || (!diag.requested.length && !diag.reasons.length)) return;
+  log.info(
+    `ToolRoute[${reqId}]: requested=[${diag.requested.join(',') || 'none'}] ` +
+    `effective=[${diag.effective.join(',') || 'none'}] ` +
+    `mapped=[${diag.mapped.join(',') || 'none'}] unmapped=[${diag.unmapped.join(',') || 'none'}] ` +
+    `native=${diag.nativeBridgeOn ? 'on' : 'off'} nativeReason=${diag.nativeDecisionReason || 'none'} ` +
+    `preamble=${diag.preambleTier || 'none'}${diag.preambleBytes != null ? `/${Math.round(diag.preambleBytes / 1024)}KB` : ''} ` +
+    `forced=${diag.forcedName || 'none'} reasons=[${diag.reasons.join(',') || 'none'}]`,
+  );
+}
+
 export function redactRequestLogText(text) {
   return String(text || '')
     .replace(/sk-[A-Za-z0-9_-]{20,}/g, 'sk-***')
@@ -1768,6 +1816,7 @@ async function _handleChatCompletionsInner(body, context = {}) {
   const callerEnv = emulateTools ? extractCallerEnvironment(messages) : '';
   let toolPreamble = '';
   let preambleTier = null;
+  let toolPreambleBudget = null;
   // Payload budget for the proto-level tool preamble. The upstream LS
   // panel state caps total request size at ~30KB; the preamble alone can
   // approach that with 30+ tools (Claude Code, opencode, Cline). Past the
@@ -1801,6 +1850,7 @@ async function _handleChatCompletionsInner(body, context = {}) {
       // route picks the gpt_native dialect (bare-JSON anti-refusal).
       route: body.__route || 'chat',
     });
+    toolPreambleBudget = budget;
     preambleTier = budget.tier;
     if (budget.compacted) {
       log.warn(`Probe[${reqId}]: toolPreamble ${Math.round(budget.fullBytes / 1024)}KB exceeds soft cap ${Math.round(budget.softBytes / 1024)}KB; using ${budget.tier} tier (${Math.round(budget.finalBytes / 1024)}KB, ${budgetTools.length} tools)`);
@@ -1821,6 +1871,13 @@ async function _handleChatCompletionsInner(body, context = {}) {
     }
     toolPreamble = budget.preamble;
   }
+  logToolRoutingDiagnostics(reqId, summarizeToolRoutingDiagnostics({
+    tools,
+    effectiveTools,
+    toolChoice: tool_choice,
+    toolRouting,
+    preambleBudget: toolPreambleBudget,
+  }));
   // Diagnostic: surface whether environment lifting actually fired so a real
   // request log immediately tells us if Claude Code 2.x changed `<env>` block
   // wording, or if the extraction guard rejected a valid hint. Cheap to log,
diff --git a/src/handlers/responses.js b/src/handlers/responses.js
@@ -219,6 +219,40 @@ function normalizeResponseToolChoice(toolChoice) {
   return toolChoice;
 }
 
+function requestedResponseToolChoiceName(toolChoice) {
+  if (!toolChoice || typeof toolChoice !== 'object') return '';
+  if (toolChoice.type === 'function') {
+    return encodeToolName(toolChoice.function?.name || toolChoice.name || '', toolChoice.function?.namespace || toolChoice.namespace || '');
+  }
+  if (toolChoice.type === 'custom' || toolChoice.type === 'namespace') {
+    return encodeToolName(toolChoice.name || toolChoice.function?.name || '', toolChoice.namespace || toolChoice.function?.namespace || '');
+  }
+  if (toolChoice.type === 'web_search' || toolChoice.type === 'web_search_preview') return 'web_search';
+  if (toolChoice.type === 'tool_search') return 'tool_search';
+  return toolChoice.name || toolChoice.function?.name || toolChoice.type || '';
+}
+
+function pruneResponseToolChoice(toolChoice, forwardedTools) {
+  const normalized = normalizeResponseToolChoice(toolChoice);
+  if (normalized == null) return undefined;
+  if (normalized === 'auto' || normalized === 'required' || normalized === 'none') return normalized;
+
+  const requested = requestedResponseToolChoiceName(toolChoice);
+  const availableNames = new Set((forwardedTools || []).map(t => t.function?.name || t.name).filter(Boolean));
+  const forcedName = normalized.function?.name || '';
+  if (forcedName) {
+    if (availableNames.has(forcedName)) return normalized;
+    log.warn(`responses: dropped forced tool_choice "${requested || forcedName}" because the matching tool was not forwarded (available=[${[...availableNames].join(',') || 'none'}])`);
+    return undefined;
+  }
+
+  if (toolChoice && typeof toolChoice === 'object' && UNBRIDGED_SERVER_SIDE_TYPES.has(toolChoice.type)) {
+    log.warn(`responses: dropped forced server-side tool_choice "${toolChoice.type}" because this proxy does not bridge that tool type`);
+    return undefined;
+  }
+  return normalized;
+}
+
 function normalizeResponseTextFormat(format) {
   if (!format || typeof format !== 'object') return null;
   if (format.type === 'json_object') return { type: 'json_object' };
@@ -306,6 +340,9 @@ export function responsesToChat(body) {
 
   const tools = flattenResponseTools(body.tools || []);
   const responseFormat = normalizeResponseTextFormat(body.text?.format);
+  const forwardedToolChoice = body.tool_choice != null
+    ? pruneResponseToolChoice(body.tool_choice, tools)
+    : undefined;
   return {
     model: body.model || 'claude-sonnet-4.6',
     messages,
@@ -315,7 +352,7 @@ export function responsesToChat(body) {
     ...(tools.length ? { tools } : {}),
     ...(body.temperature != null ? { temperature: body.temperature } : {}),
     ...(body.top_p != null ? { top_p: body.top_p } : {}),
-    ...(body.tool_choice != null ? { tool_choice: normalizeResponseToolChoice(body.tool_choice) } : {}),
+    ...(forwardedToolChoice != null ? { tool_choice: forwardedToolChoice } : {}),
     ...(responseFormat ? { response_format: responseFormat } : {}),
   };
 }
diff --git a/test/native-bridge-smoke.test.js b/test/native-bridge-smoke.test.js
diff --git a/test/native-tool-routing.test.js b/test/native-tool-routing.test.js
diff --git a/test/responses.test.js b/test/responses.test.js

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "windsurf-api",`
`3`		`- "version": "2.0.140",`
	`3`	`+ "version": "2.0.141",`
`4`	`4`	`"description": "Windsurf to OpenAI + Anthropic compatible API proxy. Turns Windsurf's 107 AI models (Claude, GPT, Gemini, DeepSeek, Grok, Qwen, Kimi, GLM, SWE) into dual-protocol API endpoints. Zero npm deps.",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "src/index.js",`