dwgx
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/native-bridge-protocol-notes.md‎
Lines changed: 32 additions & 0 deletions b/‎docs/native-bridge-protocol-notes.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎docs/releases/RELEASE_NOTES_2.0.127.md‎
Lines changed: 36 additions & 0 deletions b/‎docs/releases/RELEASE_NOTES_2.0.127.md‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 4 additions & 2 deletions b/‎package.json‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎scripts/lsp-capacity-matrix.mjs‎
Lines changed: 229 additions & 0 deletions b/‎scripts/lsp-capacity-matrix.mjs‎
Lines changed: 229 additions & 0 deletions
diff --git a/‎scripts/native-bridge-smoke.mjs‎
Lines changed: 34 additions & 0 deletions b/‎scripts/native-bridge-smoke.mjs‎
Lines changed: 34 additions & 0 deletions
@@ -7,6 +7,8 @@ sshpass2.sh
 deploy-us.py
 scripts/*
 !scripts/native-bridge-smoke.mjs
+!scripts/lsp-capacity-matrix.mjs
+!scripts/web-search-direct-probe.mjs
 src/get-token.js
 src/test-cascade.js
 src/runtime-config.json
 
@@ -180,6 +180,38 @@ separate first-party API bridge until we find the LS-side web executor
 precondition. The confirmed protobuf fields are useful for tracing and future
 matrix work, but not sufficient for production native bridge rollout.
 
+## Direct Web Search API
+
+`GetWebSearchResults` is confirmed independently of the LS-native tool path:
+
+```text
+POST /exa.api_server_pb.ApiServerService/GetWebSearchResults
+```
+
+Request fields from the descriptor dump:
+
+- `metadata` = field `1`
+- `query` = field `2`
+- `limit` = field `3`
+- `domain` = field `4`
+- `third_party_config` = field `5`
+- `mode` = field `6`
+
+Response fields:
+
+- `results` = repeated field `1` (`KnowledgeBaseItem`)
+- `web_search_url` = field `2`
+- `summary` = field `3`
+
+`src/windsurf-api.js` exposes `getWebSearchResults()` and
+`npm run probe:web-search` exercises it against real accounts. This is the
+preferred WebSearch investigation path for now because it avoids the LS native
+web executor that currently returns `permission_denied`.
+
+There is not yet an equivalent confirmed direct WebFetch/read-url endpoint.
+Do not implement WebFetch direct bridging from guesswork; keep it on emulation
+or native lab traces until a descriptor-backed endpoint is found.
+
 ## Experiment Hooks
 
 `WINDSURFAPI_NATIVE_TOOL_BRIDGE_CONFIG_RAW` can inject exact protobuf bytes
 
@@ -0,0 +1,36 @@
+## v2.0.127 - Native bridge observability and probe tooling
+
+- `/health?verbose=1` now exposes sanitized native bridge decision telemetry:
+  total enabled/disabled decisions, reason counters, last decision, and a small
+  recent-decision ring. It records why a request did or did not use the native
+  bridge without storing caller keys, account IDs, or upstream API keys.
+- The authenticated dashboard overview API now includes the same sanitized
+  native bridge telemetry, and the overview UI shows mode, gray gates, decision
+  totals, top disable reasons, and recent mapped/unmapped tool decisions. This
+  makes "why did this request stay on prompt emulation?" visible without
+  reading logs or exposing the server API key to the browser.
+- `npm run smoke:native-bridge` now includes native bridge decision deltas in
+  its JSON output, so Read/Grep/Glob canaries can prove both the routing path
+  and the emitted tool-call path.
+- Added `npm run smoke:lsp-matrix` for real deployment LSP capacity probes. It
+  runs configurable chat concurrency, snapshots `/health?verbose=1`, and reports
+  RSS, LS pool occupancy, memory-guard state, and admission-stat deltas.
+- Added a direct `GetWebSearchResults` helper and `npm run probe:web-search`.
+  The probe uses explicit upstream account keys or persisted `accounts.json`;
+  it intentionally does not treat the gateway `API_KEY` as a Windsurf account
+  key. This is the safe path for WebSearch investigation while LS-native
+  WebSearch/WebFetch remain outside the default native bridge allowlist.
+- Default production behavior is unchanged: the native bridge still requires
+  explicit env gates, and WebSearch/WebFetch are still lab-only until live
+  LS-native result payloads are confirmed.
+
+Verification:
+
+- `node --check src\cascade-native-bridge.js`
+- `node --check src\native-bridge-stats.js`
+- `node --check src\handlers\chat.js`
+- `node --check src\windsurf-api.js`
+- `node --check scripts\native-bridge-smoke.mjs`
+- `node --check scripts\lsp-capacity-matrix.mjs`
+- `node --check scripts\web-search-direct-probe.mjs`
+- `node --test --test-timeout=120000 --test-force-exit test\*.test.js`
@@ -1,14 +1,16 @@
 {
   "name": "windsurf-api",
-  "version": "2.0.126",
+  "version": "2.0.127",
   "description": "Windsurf to OpenAI + Anthropic compatible API proxy. Turns Windsurf's 107 AI models (Claude, GPT, Gemini, DeepSeek, Grok, Qwen, Kimi, GLM, SWE) into dual-protocol API endpoints. Zero npm deps.",
   "type": "module",
   "main": "src/index.js",
   "scripts": {
     "start": "node src/index.js",
     "dev": "node --watch src/index.js",
     "test": "node --test test/*.test.js",
-    "smoke:native-bridge": "node scripts/native-bridge-smoke.mjs"
+    "smoke:native-bridge": "node scripts/native-bridge-smoke.mjs",
+    "smoke:lsp-matrix": "node scripts/lsp-capacity-matrix.mjs",
+    "probe:web-search": "node scripts/web-search-direct-probe.mjs"
   },
   "engines": {
     "node": ">=20.0.0"
 
@@ -0,0 +1,229 @@
+#!/usr/bin/env node
+
+const baseUrl = (process.env.BASE_URL || process.env.WINDSURFAPI_BASE_URL || 'http://127.0.0.1:3003').replace(/\/+$/, '');
+const apiKey = process.env.API_KEY || process.env.WINDSURFAPI_API_KEY || '';
+const model = process.env.MODEL || process.env.WINDSURFAPI_LSP_MATRIX_MODEL || 'claude-haiku-4.5';
+const concurrencyValues = String(process.env.LSP_MATRIX_CONCURRENCY || '1,2,4')
+  .split(',')
+  .map(s => Number(s.trim()))
+  .filter(n => Number.isFinite(n) && n > 0);
+const rounds = Math.max(1, Number(process.env.LSP_MATRIX_ROUNDS || 1));
+const timeoutMs = Math.max(5_000, Number(process.env.LSP_MATRIX_TIMEOUT_MS || 90_000));
+const stream = process.env.LSP_MATRIX_STREAM === '1';
+const includeHealth = process.env.LSP_MATRIX_HEALTH !== '0';
+const failFast = process.env.LSP_MATRIX_FAIL_FAST === '1';
+const marker = `LSP_MATRIX_${Date.now().toString(36)}`;
+
+if (!apiKey) {
+  console.error('API_KEY is required');
+  process.exit(2);
+}
+if (!concurrencyValues.length) {
+  console.error('LSP_MATRIX_CONCURRENCY must contain at least one positive integer');
+  process.exit(2);
+}
+
+function sleep(ms) {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+function compactText(text, max = 800) {
+  const s = String(text || '').replace(/\s+/g, ' ').trim();
+  return s.length > max ? `${s.slice(0, max)}...<truncated ${s.length - max} chars>` : s;
+}
+
+function percentile(values, p) {
+  const nums = values.filter(n => Number.isFinite(n)).sort((a, b) => a - b);
+  if (!nums.length) return 0;
+  const idx = Math.min(nums.length - 1, Math.max(0, Math.ceil((p / 100) * nums.length) - 1));
+  return nums[idx];
+}
+
+function summarizePool(health) {
+  const pool = health?.lsPool?.pool || {};
+  const guard = pool.memoryGuard || health?.lsPool?.memoryGuard || {};
+  return {
+    running: !!health?.lsPool?.running,
+    maxInstances: health?.lsPool?.maxInstances ?? pool.maxInstances ?? null,
+    totalRssBytes: health?.lsPool?.totalRssBytes ?? null,
+    size: pool.size ?? null,
+    occupancy: pool.occupancy ?? null,
+    effectiveOccupancy: pool.effectiveOccupancy ?? null,
+    ready: pool.ready ?? null,
+    starting: pool.starting ?? null,
+    pending: pool.pending ?? null,
+    reservedPendingStarts: pool.reservedPendingStarts ?? null,
+    stopping: pool.stopping ?? null,
+    activeRequests: pool.activeRequests ?? null,
+    maintenanceRequests: pool.maintenanceRequests ?? null,
+    nonDefaultInstances: pool.nonDefaultInstances ?? null,
+    canStartNewNonDefault: pool.canStartNewNonDefault ?? null,
+    blockReason: pool.blockReason ?? null,
+    memoryGuard: {
+      enabled: guard.enabled ?? null,
+      availableBytes: guard.availableBytes ?? null,
+      minAvailableBytes: guard.minAvailableBytes ?? null,
+      estimatedRssBytesPerInstance: guard.estimatedRssBytesPerInstance ?? null,
+      okToSpawn: guard.okToSpawn ?? null,
+      source: guard.minAvailableBytesSource ?? null,
+    },
+    admissionStats: health?.lsPool?.admissionStats || null,
+  };
+}
+
+async function fetchHealth(label) {
+  if (!includeHealth) return null;
+  try {
+    const res = await fetch(`${baseUrl}/health?verbose=1`, {
+      headers: { authorization: `Bearer ${apiKey}` },
+    });
+    const text = await res.text();
+    let json;
+    try { json = JSON.parse(text); } catch {
+      return { ok: false, label, status: res.status, error: 'health returned non-JSON', rawPreview: compactText(text) };
+    }
+    return {
+      ok: res.ok,
+      label,
+      status: res.status,
+      version: json.version,
+      commit: json.commit,
+      accounts: json.accounts || null,
+      nativeBridgeConfig: json.nativeBridgeConfig || null,
+      pool: summarizePool(json),
+    };
+  } catch (error) {
+    return { ok: false, label, error: String(error?.message || error) };
+  }
+}
+
+function requestBody(user, index) {
+  return {
+    model,
+    stream,
+    user,
+    max_tokens: 16,
+    messages: [
+      { role: 'user', content: `Reply exactly OK. ${marker} request ${index}.` },
+    ],
+  };
+}
+
+async function runOne(user, index) {
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), timeoutMs);
+  const started = Date.now();
+  try {
+    const res = await fetch(`${baseUrl}/v1/chat/completions`, {
+      method: 'POST',
+      signal: controller.signal,
+      headers: {
+        authorization: `Bearer ${apiKey}`,
+        'content-type': 'application/json',
+      },
+      body: JSON.stringify(requestBody(user, index)),
+    });
+    const text = await res.text();
+    const latencyMs = Date.now() - started;
+    const ok = res.status >= 200 && res.status < 300;
+    return {
+      ok,
+      status: res.status,
+      latencyMs,
+      processingMs: Number(res.headers.get('openai-processing-ms') || 0) || null,
+      accountLikeHeaders: {
+        model: res.headers.get('openai-model') || null,
+        requestId: res.headers.get('x-request-id') || null,
+      },
+      preview: ok ? compactText(text, 240) : compactText(text, 800),
+    };
+  } catch (error) {
+    return {
+      ok: false,
+      status: 0,
+      latencyMs: Date.now() - started,
+      error: error?.name === 'AbortError' ? `timeout after ${timeoutMs}ms` : String(error?.message || error),
+    };
+  } finally {
+    clearTimeout(timer);
+  }
+}
+
+function admissionDelta(before, after) {
+  const b = before?.pool?.admissionStats || {};
+  const a = after?.pool?.admissionStats || {};
+  const keys = ['startAttempts', 'startSuccesses', 'startFailures', 'poolWaits', 'memoryWaits', 'poolExhausted', 'memoryGuardBlocks', 'evictions'];
+  const out = {};
+  for (const key of keys) out[key] = Number(a[key] || 0) - Number(b[key] || 0);
+  return out;
+}
+
+function summarizeBatch(concurrency, round, before, after, requests) {
+  const latencies = requests.map(r => r.latencyMs);
+  const ok = requests.filter(r => r.ok).length;
+  const statuses = {};
+  for (const r of requests) statuses[String(r.status)] = (statuses[String(r.status)] || 0) + 1;
+  const beforePool = before?.pool || {};
+  const afterPool = after?.pool || {};
+  return {
+    round,
+    concurrency,
+    ok: ok === requests.length,
+    success: ok,
+    failed: requests.length - ok,
+    statuses,
+    latencyMs: {
+      min: Math.min(...latencies),
+      p50: percentile(latencies, 50),
+      p95: percentile(latencies, 95),
+      max: Math.max(...latencies),
+    },
+    rssDeltaBytes: Number(afterPool.totalRssBytes || 0) - Number(beforePool.totalRssBytes || 0),
+    poolBefore: beforePool,
+    poolAfter: afterPool,
+    admissionDelta: admissionDelta(before, after),
+    failures: requests.filter(r => !r.ok).slice(0, 10),
+  };
+}
+
+const matrix = [];
+const failures = [];
+const overallBefore = await fetchHealth('overall-before');
+
+for (const concurrency of concurrencyValues) {
+  for (let round = 1; round <= rounds; round++) {
+    const before = await fetchHealth(`c${concurrency}-r${round}-before`);
+    const tasks = [];
+    for (let i = 0; i < concurrency; i++) {
+      const user = `${marker}-c${concurrency}-r${round}-u${i}`;
+      tasks.push(runOne(user, `${concurrency}.${round}.${i}`));
+    }
+    const requests = await Promise.all(tasks);
+    await sleep(Number(process.env.LSP_MATRIX_SETTLE_MS || 1000));
+    const after = await fetchHealth(`c${concurrency}-r${round}-after`);
+    const row = summarizeBatch(concurrency, round, before, after, requests);
+    matrix.push(row);
+    if (!row.ok) failures.push(`c=${concurrency} r=${round} failed=${row.failed}`);
+    if (failFast && failures.length) break;
+  }
+  if (failFast && failures.length) break;
+}
+
+const overallAfter = await fetchHealth('overall-after');
+
+console.log(JSON.stringify({
+  ok: failures.length === 0,
+  baseUrl,
+  model,
+  marker,
+  stream,
+  timeoutMs,
+  concurrencyValues,
+  rounds,
+  failures,
+  overallBefore,
+  overallAfter,
+  matrix,
+}, null, 2));
+
+if (failures.length) process.exit(1);
@@ -584,6 +584,39 @@ function assertLsBudgetAvailable(health) {
   });
 }
 
+function counterDelta(before = {}, after = {}) {
+  const keys = new Set([...Object.keys(before || {}), ...Object.keys(after || {})]);
+  const out = {};
+  for (const key of keys) {
+    const delta = Number(after?.[key] || 0) - Number(before?.[key] || 0);
+    if (delta) out[key] = delta;
+  }
+  return out;
+}
+
+function nativeBridgeDecisionDelta(before, after) {
+  const b = before?.nativeBridge || {};
+  const a = after?.nativeBridge || {};
+  const recent = Array.isArray(a.recentDecisions) ? a.recentDecisions.slice(-8) : [];
+  return {
+    decisions: Number(a.decisions || 0) - Number(b.decisions || 0),
+    enabledDecisions: Number(a.enabledDecisions || 0) - Number(b.enabledDecisions || 0),
+    disabledDecisions: Number(a.disabledDecisions || 0) - Number(b.disabledDecisions || 0),
+    reasons: counterDelta(b.decisionReasons || {}, a.decisionReasons || {}),
+    lastDecision: a.lastDecision || null,
+    recentDecisions: recent.map(d => ({
+      at: d.at,
+      enabled: !!d.enabled,
+      reason: d.reason || '',
+      modelKey: d.modelKey || '',
+      route: d.route || '',
+      mappedTools: d.mappedTools || [],
+      unmappedTools: d.unmappedTools || [],
+      toolChoiceFiltered: !!d.toolChoiceFiltered,
+    })),
+  };
+}
+
 const selected = expandScenarios(requestedScenarios);
 if (!selected.length) {
   console.error(`No valid scenarios selected. Use one or more of: ${Object.keys(SCENARIOS).join(',')},all`);
@@ -641,6 +674,7 @@ console.log(JSON.stringify({
   scenarios: selected,
   results,
   failures,
+  nativeBridgeDecisionDelta: nativeBridgeDecisionDelta(healthBefore, healthAfter),
   healthBefore,
   healthAfter,
 }, null, 2));