From 722ac707da041e938b1b2b1602b290284701ad99 Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 20 May 2026 20:18:03 -0700 Subject: [PATCH 1/6] =?UTF-8?q?claude:=20Phase=2010=20=E2=80=94=20workbenc?= =?UTF-8?q?h=20client=20tools=20via=20in-process=20MCP=20server?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface workbench-registered tools (IAgent.setClientTools) to the Claude SDK through an in-process MCP server (createSdkMcpServer + Options.mcpServers['client']). Tool calls park on a per-session PendingRequestRegistry keyed by SDK tool_use_id; IAgent.onClientToolCallComplete resolves the parked deferred with the protocol-to-MCP result conversion. Plumbing: new clientTools/ subfolder (MCP server factory, result converter, session model+diff, JSON Schema -> zod converter); pipeline / router / mapper own a single cached clientId (setter, no threaded closures); materializer rebinds the SDK on tool-set diff via the rematerializer hook; per-method IClaudeAgentSdkService shim with compile-time AssertBindingsMatchSdk to catch SDK drift; zod added as a direct dep; permission-mode resolution extracted to readClaudePermissionMode helper. Bug fixes (council review): setClientTools race during materialize commit gap (re-sync at publish time); setClientTools dropped during resume bootstrap (register the resume provisional); rebind-startup failure consumed the diff without restarting (markDirty in the rematerializer catch). Tests: 1833 agentHost tests passing (3 new for the race-/rebind-failure paths). --- eslint.config.js | 5 +- package-lock.json | 3 +- package.json | 3 +- remote/package-lock.json | 3 +- remote/package.json | 3 +- .../common/pendingRequestRegistry.ts | 40 + .../agentHost/node/claude/claudeAgent.ts | 180 ++-- .../node/claude/claudeAgentSdkService.ts | 160 ++-- .../node/claude/claudeAgentSession.ts | 100 ++- .../node/claude/claudeMapSessionEvents.ts | 38 +- .../node/claude/claudeMaterializer.ts | 124 ++- .../node/claude/claudeSdkMessageRouter.ts | 9 + .../node/claude/claudeSdkPipeline.ts | 23 +- .../claude/claudeSessionPermissionMode.ts | 30 + .../clientTools/claudeClientToolMcpServer.ts | 112 +++ .../clientTools/claudeClientToolResult.ts | 83 ++ .../clientTools/claudeJsonSchemaToZod.ts | 135 +++ .../claudeSessionClientToolsModel.ts | 154 ++++ .../agentHost/node/claude/phase10-plan.md | 834 ++++++++++++++++++ .../platform/agentHost/node/claude/roadmap.md | 2 +- .../common/pendingRequestRegistry.test.ts | 21 + .../test/node/claudeAgent.integrationTest.ts | 3 + .../agentHost/test/node/claudeAgent.test.ts | 362 +++++++- .../test/node/claudeSdkMessageRouter.test.ts | 1 + .../test/node/claudeSdkPipeline.test.ts | 1 + .../test/node/claudeSubagentResolver.test.ts | 2 + .../claudeClientToolMcpServer.test.ts | 142 +++ .../claudeClientToolResult.test.ts | 97 ++ .../clientTools/claudeJsonSchemaToZod.test.ts | 100 +++ .../claudeSessionClientToolsModel.test.ts | 122 +++ test/unit/electron/renderer.html | 7 + 31 files changed, 2708 insertions(+), 191 deletions(-) create mode 100644 src/vs/platform/agentHost/node/claude/claudeSessionPermissionMode.ts create mode 100644 src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolMcpServer.ts create mode 100644 src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolResult.ts create mode 100644 src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts create mode 100644 src/vs/platform/agentHost/node/claude/clientTools/claudeSessionClientToolsModel.ts create mode 100644 src/vs/platform/agentHost/node/claude/phase10-plan.md create mode 100644 src/vs/platform/agentHost/test/node/clientTools/claudeClientToolMcpServer.test.ts create mode 100644 src/vs/platform/agentHost/test/node/clientTools/claudeClientToolResult.test.ts create mode 100644 src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts create mode 100644 src/vs/platform/agentHost/test/node/clientTools/claudeSessionClientToolsModel.test.ts diff --git a/eslint.config.js b/eslint.config.js index 871c78d8cf4e6..b3be218552e60 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -1503,6 +1503,7 @@ export default defineConfig( 'when': 'hasNode', 'allow': [ '@github/copilot-sdk', + 'zod', '@microsoft/dev-tunnels-contracts', '@microsoft/dev-tunnels-management', '@parcel/watcher', @@ -1646,7 +1647,9 @@ export default defineConfig( '@vscode/tree-sitter-wasm', // used by agentHost for command auto-approval '@vscode/copilot-api', // used by agentHost for Copilot API requests '@anthropic-ai/sdk', // used by agentHost for Anthropic API requests - '@anthropic-ai/claude-agent-sdk' // used by agentHost for Claude Agent SDK session enumeration / queries + '@anthropic-ai/claude-agent-sdk', // used by agentHost for Claude Agent SDK session enumeration / queries + '@modelcontextprotocol/sdk/**/*', // used by agentHost for Claude client-tool MCP result types (Phase 10) + 'zod' // used by agentHost for Claude client-tool MCP input schemas ] }, { diff --git a/package-lock.json b/package-lock.json index f8e5ed5ab12e1..d66f20bf3ba82 100644 --- a/package-lock.json +++ b/package-lock.json @@ -70,7 +70,8 @@ "vscode-textmate": "^9.3.2", "ws": "^8.19.0", "yauzl": "^3.0.0", - "yazl": "^2.4.3" + "yazl": "^2.4.3", + "zod": "^3.25.76" }, "devDependencies": { "@anthropic-ai/claude-agent-sdk": "0.2.128", diff --git a/package.json b/package.json index 7fe6018e6af23..5c55cb1c91922 100644 --- a/package.json +++ b/package.json @@ -147,7 +147,8 @@ "vscode-textmate": "^9.3.2", "ws": "^8.19.0", "yauzl": "^3.0.0", - "yazl": "^2.4.3" + "yazl": "^2.4.3", + "zod": "^3.25.76" }, "devDependencies": { "@anthropic-ai/claude-agent-sdk": "0.2.128", diff --git a/remote/package-lock.json b/remote/package-lock.json index 0b7a040173ed0..98ec6babcd674 100644 --- a/remote/package-lock.json +++ b/remote/package-lock.json @@ -51,7 +51,8 @@ "vscode-textmate": "^9.3.2", "ws": "^8.19.0", "yauzl": "^3.0.0", - "yazl": "^2.4.3" + "yazl": "^2.4.3", + "zod": "^3.25.76" } }, "node_modules/@github/copilot": { diff --git a/remote/package.json b/remote/package.json index 3fe442ac9aa0d..8d7a7a7f06e4e 100644 --- a/remote/package.json +++ b/remote/package.json @@ -46,7 +46,8 @@ "vscode-textmate": "^9.3.2", "ws": "^8.19.0", "yauzl": "^3.0.0", - "yazl": "^2.4.3" + "yazl": "^2.4.3", + "zod": "^3.25.76" }, "overrides": { "node-gyp-build": "4.8.1", diff --git a/src/vs/platform/agentHost/common/pendingRequestRegistry.ts b/src/vs/platform/agentHost/common/pendingRequestRegistry.ts index 7bb9b577ff4ee..a6c4ea61def7b 100644 --- a/src/vs/platform/agentHost/common/pendingRequestRegistry.ts +++ b/src/vs/platform/agentHost/common/pendingRequestRegistry.ts @@ -27,6 +27,19 @@ export class PendingRequestRegistry { return deferred.p; } + /** + * Park a deferred under `key` and return its promise. Use when there + * is no synchronous responder to guard against — the request that + * eventually feeds {@link respond} originates from a different code + * path (e.g. an MCP handler invoked by the SDK whose completion + * arrives via a workbench round-trip). + */ + register(key: string): Promise { + const deferred = new DeferredPromise(); + this._entries.set(key, deferred); + return deferred.p; + } + respond(key: string, value: T): boolean { const deferred = this._entries.get(key); if (!deferred) { @@ -37,6 +50,14 @@ export class PendingRequestRegistry { return true; } + /** + * Resolve every parked deferred with `denyValue` and clear the registry. + * + * Designed for the permission-deny path: a "deny" answer is itself a + * successful round-trip result, so awaiting consumers receive `denyValue` + * rather than an error. Use {@link rejectAll} when callers must observe + * a thrown error instead (cancellation, dispose). + */ denyAll(denyValue: T): void { for (const [, deferred] of this._entries) { if (!deferred.isSettled) { @@ -45,4 +66,23 @@ export class PendingRequestRegistry { } this._entries.clear(); } + + /** + * Reject every parked deferred with `error` and clear the registry. + * + * Use this when in-flight requests must be cancelled rather than + * answered (e.g. session dispose, `Query` rebind on tool-set change). + * Compare with {@link denyAll}, which *resolves* every deferred with a + * supplied value — that is right for the permission-deny path where a + * "deny" is itself a successful answer, but wrong for cancellation + * where the awaited consumer must observe an error to unwind. + */ + rejectAll(error: Error): void { + for (const [, deferred] of this._entries) { + if (!deferred.isSettled) { + deferred.error(error); + } + } + this._entries.clear(); + } } diff --git a/src/vs/platform/agentHost/node/claude/claudeAgent.ts b/src/vs/platform/agentHost/node/claude/claudeAgent.ts index 80e3060078b9c..4b02c4b32df77 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgent.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgent.ts @@ -4,7 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import type { CCAModel } from '@vscode/copilot-api'; -import type { Options, PermissionMode, SDKSessionInfo, SDKUserMessage } from '@anthropic-ai/claude-agent-sdk'; +import type { Options, SDKSessionInfo, SDKUserMessage } from '@anthropic-ai/claude-agent-sdk'; import { SequencerByKey } from '../../../../base/common/async.js'; import { CancellationToken } from '../../../../base/common/cancellation.js'; import { CancellationError } from '../../../../base/common/errors.js'; @@ -35,10 +35,11 @@ import { mapSessionMessagesToTurns } from './claudeReplayMapper.js'; import { getSubagentTranscript } from './claudeSubagentResolver.js'; import { ClaudeAgentSession } from './claudeAgentSession.js'; import { handleCanUseTool } from './claudeCanUseTool.js'; -import { ClaudeMaterializer, type IClaudeMaterializeProvisional } from './claudeMaterializer.js'; +import { ClaudeMaterializer, type IMaterializeContext } from './claudeMaterializer.js'; import { tryParseClaudeModelId } from './claudeModelId.js'; import { resolvePromptToContentBlocks } from './claudePromptResolver.js'; import { IClaudeProxyHandle, IClaudeProxyService } from './claudeProxyService.js'; +import { readClaudePermissionMode } from './claudeSessionPermissionMode.js'; import { ClaudeSessionMetadataStore, IClaudeSessionOverlay } from './claudeSessionMetadataStore.js'; /** @@ -103,7 +104,10 @@ function toAgentModelInfo(m: CCAModel, provider: AgentProvider): IAgentModelInfo // the closed `ClaudePermissionMode` union now lives in // `../../common/claudeSessionConfigKeys.ts` so it can be shared by // `ClaudeAgent`, `ClaudeSessionMetadataStore`, and any other consumer -// that needs the same narrowing semantics. +// that needs the same narrowing semantics. The live per-session read +// helper lives in `./claudeSessionPermissionMode.ts` so the session +// and materializer can read directly without threading callbacks +// through the agent. /** * Phase 6: in-memory record for a provisional Claude session — one @@ -145,6 +149,22 @@ interface IClaudeProvisionalSession { */ model: ModelSelection | undefined; readonly config: Record | undefined; + /** + * Phase 10: client-provided tool definitions registered via + * {@link ClaudeAgent.setClientTools} before the session materializes. + * Mutable for parity with {@link model} — `setClientTools` writes here + * pre-materialize; the snapshot is transferred into the + * {@link ClaudeAgentSession} at materialize time and flows into + * `Options.mcpServers` for the first SDK startup. + */ + clientTools: readonly ToolDefinition[] | undefined; + /** + * Phase 10: workbench `clientId` paired with the most recent + * {@link clientTools} write. Transferred onto the session's bridge at + * materialize time so the stream mapper can stamp + * `SessionToolCallStart.toolClientId`. + */ + clientId: string | undefined; } /** @@ -261,11 +281,11 @@ export class ClaudeAgent extends Disposable implements IAgent { @IClaudeAgentSdkService private readonly _sdkService: IClaudeAgentSdkService, @IAgentHostGitService private readonly _gitService: IAgentHostGitService, @IAgentConfigurationService private readonly _configurationService: IAgentConfigurationService, - @IInstantiationService instantiationService: IInstantiationService, + @IInstantiationService _instantiationService: IInstantiationService, ) { super(); - this._materializer = instantiationService.createInstance(ClaudeMaterializer); - this._metadataStore = instantiationService.createInstance(ClaudeSessionMetadataStore, this.id); + this._materializer = _instantiationService.createInstance(ClaudeMaterializer); + this._metadataStore = _instantiationService.createInstance(ClaudeSessionMetadataStore, this.id); } // #region Descriptor + auth @@ -424,6 +444,8 @@ export class ClaudeAgent extends Disposable implements IAgent { project, model: config.model, config: config.config, + clientTools: undefined, + clientId: undefined, }); return { @@ -463,7 +485,7 @@ export class ClaudeAgent extends Disposable implements IAgent { // below, so a `SessionConfigChanged` landing mid-materialize can't // produce a split state where the SDK runs under one mode and the // DB records another. - const permissionMode = this._readSessionPermissionMode(provisional.sessionUri) + const permissionMode = readClaudePermissionMode(this._configurationService, provisional.sessionUri) ?? this._resolvePermissionMode(provisional.config); const canUseTool: NonNullable = (toolName, input, options) => @@ -472,25 +494,25 @@ export class ClaudeAgent extends Disposable implements IAgent { sessionId, toolName, input, options, ); - const session = await this._materializer.materialize(provisional, proxyHandle, permissionMode, canUseTool); + const ctx: IMaterializeContext = { + provisional, + proxyHandle, + canUseTool, + permissionModeFallback: permissionMode, + isResume: false, + }; - // Phase 9 — wire the rematerializer hook so abort/crash recovery - // can rebuild the SDK plumbing via resume mode without coupling the - // session to the materializer service. Also seed the bijective state - // cache so a rebuild re-applies the user's last-chosen model/effort - // without losing the picker config. + const session = await this._materializer.materialize(ctx); + + // Phase 9 — seed the bijective state cache so a rebuild re-applies + // the user's last-chosen model/effort without losing the picker + // config. const initialEffort = clampEffortForRuntime(resolveClaudeEffort(provisional.model)); session.seedBijectiveState({ model: provisional.model?.id, effort: initialEffort, permissionMode, }); - session.attachRematerializer(async (_reason) => { - // Re-read the live permissionMode so a SessionConfigChanged that - // arrived since the last materialize is honored on rebuild. - const liveMode = this._readSessionPermissionMode(provisional.sessionUri) ?? permissionMode; - return this._materializer.materializeResume(provisional, proxyHandle, liveMode, canUseTool); - }); // Persist customization-directory metadata BEFORE firing the // materialize event — see plan section 3.4 ordering rationale. @@ -531,6 +553,15 @@ export class ClaudeAgent extends Disposable implements IAgent { // only need to dispose the entry to release everything per-session. const entry = new ClaudeSessionEntry(session); entry.addDisposable(session.onDidSessionProgress(signal => this._onDidSessionProgress.fire(signal))); + // Re-sync the client-tool snapshot in case `setClientTools` landed + // while we were awaiting `_sdkService.startup` / `_metadataStore.write`. + // Those updates wrote to the provisional record because the live + // session was not yet in `_sessions`; the materializer's earlier + // snapshot would otherwise be discarded along with the provisional. + // Idempotent on no-change (the diff's equality check absorbs it). + if (provisional.clientTools !== undefined) { + session.setClientTools(provisional.clientTools, provisional.clientId); + } this._sessions.set(sessionId, entry); this._provisionalSessions.delete(sessionId); @@ -575,7 +606,7 @@ export class ClaudeAgent extends Disposable implements IAgent { } catch (err) { this._logService.warn(`[Claude:${sessionId}] overlay read failed during resume; continuing with defaults`, err); } - const permissionMode = this._readSessionPermissionMode(sessionUri) + const permissionMode = readClaudePermissionMode(this._configurationService, sessionUri) ?? overlay.permissionMode ?? 'default'; // Resolve git project metadata from the resumed cwd, same as @@ -589,14 +620,21 @@ export class ClaudeAgent extends Disposable implements IAgent { this._logService.warn(`[Claude:${sessionId}] project resolution failed during resume; continuing without project`, err); } const abortController = new AbortController(); - const provisional: IClaudeMaterializeProvisional = { + const provisional: IClaudeProvisionalSession = { sessionId, sessionUri, workingDirectory, abortController, project, model: overlay.model, + config: undefined, + clientTools: undefined, + clientId: undefined, }; + // Register the resume provisional so a concurrent `setClientTools` + // has a place to land while we're awaiting `_sdkService.startup`. + // Removed in `finally` after the live session takes its place. + this._provisionalSessions.set(sessionId, provisional); const canUseTool: NonNullable = (toolName, input, options) => handleCanUseTool( @@ -604,7 +642,24 @@ export class ClaudeAgent extends Disposable implements IAgent { sessionId, toolName, input, options, ); - const session = await this._materializer.materialize(provisional, proxyHandle, permissionMode, canUseTool, 'resume'); + // Phase 10 — cross-window resume starts with no client-tool snapshot; + // the workbench re-issues `setClientTools` on active-client re-attach + // and the first sendMessage's diff check rebinds with the new set. + const ctx: IMaterializeContext = { + provisional, + proxyHandle, + canUseTool, + permissionModeFallback: permissionMode, + isResume: true, + }; + + let session: ClaudeAgentSession; + try { + session = await this._materializer.materialize(ctx); + } catch (err) { + this._provisionalSessions.delete(sessionId); + throw err; + } const initialEffort = clampEffortForRuntime(resolveClaudeEffort(overlay.model)); session.seedBijectiveState({ @@ -612,14 +667,16 @@ export class ClaudeAgent extends Disposable implements IAgent { effort: initialEffort, permissionMode, }); - session.attachRematerializer(async (_reason) => { - const liveMode = this._readSessionPermissionMode(sessionUri) ?? permissionMode; - return this._materializer.materializeResume(provisional, proxyHandle, liveMode, canUseTool); - }); const entry = new ClaudeSessionEntry(session); entry.addDisposable(session.onDidSessionProgress(signal => this._onDidSessionProgress.fire(signal))); + // See `_materializeProvisional` — re-sync any `setClientTools` that + // landed on the resume provisional while `startup` was in flight. + if (provisional.clientTools !== undefined) { + session.setClientTools(provisional.clientTools, provisional.clientId); + } this._sessions.set(sessionId, entry); + this._provisionalSessions.delete(sessionId); this._onDidMaterializeSession.fire({ session: sessionUri, @@ -641,20 +698,6 @@ export class ClaudeAgent extends Disposable implements IAgent { return narrowClaudePermissionMode(config?.[ClaudeSessionConfigKey.PermissionMode]) ?? 'default'; } - /** - * Read the live `permissionMode` for a session via - * {@link IAgentConfigurationService.getSessionConfigValues}. Returns - * `undefined` if the session has not been seeded — the caller picks - * the fallback (createSession-time intent at materialize, `'default'` - * at the canUseTool gate). Defends against malformed values that - * slipped past schema validation by returning `undefined`. Called - * on every canUseTool entry so a mid-turn `SessionConfigChanged` - * action wins over the materialize-time seed (plan S3.6). - */ - private _readSessionPermissionMode(sessionUri: URI): PermissionMode | undefined { - return narrowClaudePermissionMode(this._configurationService.getSessionConfigValues(sessionUri.toString())?.[ClaudeSessionConfigKey.PermissionMode]); - } - disposeSession(session: URI): Promise { // Routed through {@link _disposeSequencer} so a concurrent // {@link shutdown} already serializing teardown for this same @@ -935,24 +978,6 @@ export class ClaudeAgent extends Disposable implements IAgent { // existing history rather than minting a fresh one. session = await this._resumeSession(sessionId, sessionUri); } - } else { - // Plan S3.6: forward live `permissionMode` to the bound - // `Query` immediately before yielding the next user message - // so a `SessionConfigChanged` action that arrived between - // turns wins. Awaited so the SDK has acknowledged the mode - // change before `session.send(...)` yields the next prompt. - // - // Only call when the config carries an actual value: - // `_readSessionPermissionMode` returns `undefined` when - // the session's schema hasn't been registered (e.g. a - // cross-window resume that bypassed AgentService's - // schema-registration path), and falling back to - // `'default'` here would silently overwrite the - // session's seeded bijective state with the wrong mode. - const liveMode = this._readSessionPermissionMode(sessionUri); - if (liveMode !== undefined) { - await session.setPermissionMode(liveMode); - } } const contentBlocks = resolvePromptToContentBlocks(prompt, attachments); @@ -1065,17 +1090,38 @@ export class ClaudeAgent extends Disposable implements IAgent { }); } - setClientTools(_session: URI, _clientId: string, _tools: ToolDefinition[]): void { - throw new Error('TODO: Phase 10'); + setClientTools(session: URI, clientId: string, tools: ToolDefinition[]): void { + const sessionId = AgentSession.id(session); + this._logService.info(`[Claude:${sessionId}] setClientTools clientId=${clientId} tools=[${tools.map(t => t.name).join(', ') || '(none)'}]`); + const provisional = this._provisionalSessions.get(sessionId); + if (provisional) { + provisional.clientTools = tools; + provisional.clientId = clientId; + return; + } + const entry = this._sessions.get(sessionId); + if (entry) { + entry.session.setClientTools(tools, clientId); + return; + } + // Unknown id \u2014 silent drop (workbench may have raced a session + // dispose, or the session lives in another window). } - onClientToolCallComplete(_session: URI, _toolCallId: string, _result: ToolCallResult): void { - // Phase 10 — client (MCP) tool completion routing. Until then, every - // SDK-owned tool that completes also fires this hook via - // `AgentSideEffects` listening on `SessionToolCallComplete` envelopes, - // so the body must be a benign no-op rather than throw. Once client - // tools are registered via `setClientTools`, this should resolve the - // matching pending promise on the SDK session. + onClientToolCallComplete(session: URI, toolCallId: string, result: ToolCallResult): void { + // Walk subagent URIs to the root — nested subagents require iterated + // parsing. `_sessions` is keyed by root session ids only. Mirrors + // copilotAgent.ts:947. + let target = session; + let parsed; + while ((parsed = parseSubagentSessionUri(target))) { + target = parsed.parentSession; + } + const sessionId = AgentSession.id(target); + const entry = this._sessions.get(sessionId); + // `AgentSideEffects` forwards every `SessionToolCallComplete` envelope + // (including SDK-owned tools); silent on miss is the expected path. + entry?.session.completeClientToolCall(toolCallId, result); } setClientCustomizations(_session: URI, _clientId: string, _customizations: CustomizationRef[]): Promise { diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts index 63674bed732f7..95d7a5aa81408 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts @@ -3,7 +3,8 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import type { GetSessionMessagesOptions, GetSubagentMessagesOptions, ListSessionsOptions, ListSubagentsOptions, Options, SDKSessionInfo, SessionMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { AnyZodRawShape, GetSessionMessagesOptions, GetSubagentMessagesOptions, InferShape, ListSessionsOptions, ListSubagentsOptions, McpSdkServerConfigWithInstance, Options, SDKSessionInfo, SdkMcpToolDefinition, SessionMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import * as fs from 'fs'; import { pathToFileURL } from 'url'; import { join, resolve } from '../../../../base/common/path.js'; @@ -14,78 +15,41 @@ import { AgentHostClaudeSdkPathEnvVar } from '../../common/agentService.js'; export const IClaudeAgentSdkService = createDecorator('claudeAgentSdkService'); /** - * Lazy wrapper over `@anthropic-ai/claude-agent-sdk` for the agent host - * Claude provider. The interface grows phase-by-phase; Phase 5 introduces - * the decorator so {@link import('./claudeAgent.js').ClaudeAgent} can take - * it as a constructor dependency. Phase 6 adds {@link startup} for - * materialization. Method surfaces are added in subsequent slices alongside - * the tests that exercise them. + * Pure per-method passthrough shim over `@anthropic-ai/claude-agent-sdk`. + * + * Every method on this interface corresponds 1:1 to a single SDK export. + * The shim owns lazy module loading and the first-failure log-once + * convention; it does NOT compose, wrap, or add behavior on top of the + * SDK's surface. Higher-level orchestration (e.g. building the in-process + * client-tool MCP server) lives in dedicated modules that depend on this + * interface for the raw bindings. */ export interface IClaudeAgentSdkService { readonly _serviceBrand: undefined; - /** - * Enumerates persisted Claude sessions surfaced by the SDK's filesystem - * scan. Phase 5 mirrors `IAgent.listSessions()` (no `dir` parameter): - * the host translates this internally to `sdk.listSessions(undefined)`. - * - * Failures (corrupt module, postinstall mishap) reject with the SDK - * loader's diagnostic. Callers MUST tolerate rejection without - * collapsing the wider listing pipeline. - */ listSessions(): Promise; - - /** - * Looks up a single session's metadata by id. Resolves to `undefined` - * when the SDK has no record of it (deleted from disk, never created, - * or just outside the searched project tree). Used by - * {@link import('./claudeAgent.js').ClaudeAgent.getSessionMetadata} - * to compose SDK-supplied fields (summary, cwd, timestamps) with the - * per-session DB overlay. Phase 6.1 / Cycle D4. - */ getSessionInfo(sessionId: string): Promise; - - /** - * Pre-warms the SDK subprocess and runs the init handshake. Returns - * a {@link WarmQuery} whose `.query(promptIterable)` binds the - * prompt iterable and returns a streaming `Query`. Aborting - * `options.abortController` either rejects this promise (if init is - * in flight) or causes the resulting Query to clean up resources - * (sdk.d.ts section `startup`). - * - * Phase 6 calls this from {@link ClaudeAgent._materializeProvisional} - * on the first `sendMessage`. Firing `onDidMaterializeSession` is - * deliberately deferred until after the await resolves so AgentService - * can atomically dispatch the deferred `sessionAdded` notification. - */ startup(params: { options: Options; initializeTimeoutMs?: number }): Promise; - - /** - * Reads a session's full transcript from disk via the SDK. Out-of-process: - * no live `Query` is required — the SDK parses the JSONL file directly. - * Phase 13 calls this from {@link import('./claudeAgent.js').ClaudeAgent.getSessionMessages} - * with `{ includeSystemMessages: true }` so `compact_boundary` and other - * allowlisted system subtypes survive into the replay mapper. - */ getSessionMessages(sessionId: string, options?: GetSessionMessagesOptions): Promise; - - /** - * Lists subagent ids the SDK has indexed for a given session. The SDK - * returns ids in directory-readdir order (alphabetical in practice — - * NOT invocation order) so callers MUST NOT rely on positional - * correlation. Phase 12 uses this to enumerate child transcripts for - * the live + replay subagent URI ingress. - */ listSubagents(sessionId: string, options?: ListSubagentsOptions): Promise; - - /** - * Reads a single subagent's transcript from disk via the SDK. Mirrors - * {@link getSessionMessages} for the child session: out-of-process, - * SDK parses the per-agent JSONL directly. Phase 12 consumes this from - * {@link import('./claudeSubagentResolver.js').getSubagentTranscript} - * when the requested URI carries an `agentId` discriminator. - */ getSubagentMessages(sessionId: string, agentId: string, options?: GetSubagentMessagesOptions): Promise; + + createSdkMcpServer(options: { + name: string; + version?: string; + // SDK signature: `tools?: Array>`. The `any` + // here is required to match the SDK's own erased generic and to allow + // callers to pass an array of tools whose schemas differ from each other. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: Array>; + }): Promise; + + tool( + name: string, + description: string, + inputSchema: Schema, + handler: (args: InferShape, extra: unknown) => Promise + ): Promise>; } /** @@ -103,18 +67,20 @@ export interface IClaudeSdkBindings { getSessionMessages(sessionId: string, options?: GetSessionMessagesOptions): Promise; listSubagents(sessionId: string, options?: ListSubagentsOptions): Promise; getSubagentMessages(sessionId: string, agentId: string, options?: GetSubagentMessagesOptions): Promise; + createSdkMcpServer(options: { + name: string; + version?: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: Array>; + }): McpSdkServerConfigWithInstance; + tool( + name: string, + description: string, + inputSchema: Schema, + handler: (args: InferShape, extra: unknown) => Promise + ): SdkMcpToolDefinition; } -/** - * Production implementation. The SDK module is loaded lazily via dynamic - * `import()` because it pulls in non-trivial deps that aren't relevant - * unless the user has opted into the Claude agent. - * - * The loader's caching / log-once-on-failure semantics are locked by the - * dedicated test in {@link import('../../test/node/claudeAgent.test.ts')}, - * which subclasses this and overrides {@link _loadSdk} to fault on demand. - * That's why {@link _loadSdk} is `protected` rather than `private`. - */ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { declare readonly _serviceBrand: undefined; @@ -122,14 +88,12 @@ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { * Cached resolved bindings. We deliberately cache the *resolved* value, * not the in-flight promise — if a transient `import()` failure recovers * (e.g. user fixes a broken `node_modules`), the next call retries. - * Mirrors the convention in `agentHostTerminalManager.ts` for `node-pty`. */ private _sdkModule: IClaudeSdkBindings | undefined; /** * Latched once we've logged a load failure, so a corrupt postinstall - * doesn't flood `error` events on every `listSessions()` call (each - * workbench refresh and session-list rerender hits this path). + * doesn't flood `error` events on every `listSessions()` call. */ private _firstLoadFailureLogged = false; @@ -167,6 +131,26 @@ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { return sdk.getSubagentMessages(sessionId, agentId, options); } + async createSdkMcpServer(options: { + name: string; + version?: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: Array>; + }): Promise { + const sdk = await this._getSdk(); + return sdk.createSdkMcpServer(options); + } + + async tool( + name: string, + description: string, + inputSchema: Schema, + handler: (args: InferShape, extra: unknown) => Promise + ): Promise> { + const sdk = await this._getSdk(); + return sdk.tool(name, description, inputSchema, handler); + } + private async _getSdk(): Promise { if (this._sdkModule) { return this._sdkModule; @@ -208,3 +192,27 @@ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { return import(pathToFileURL(entry).href); } } + +// #region Compile-time SDK drift detection +// +// Enforce that every method on IClaudeSdkBindings names a real export of +// `@anthropic-ai/claude-agent-sdk` and is assignable from that export's +// type. If the SDK renames or changes the signature of any of these +// exports, the `_assertBindingsMatchSdk` assignment below stops type- +// checking and the build fails — flagging that the shim needs updating. + +type SdkModule = typeof import('@anthropic-ai/claude-agent-sdk'); + +type AssertBindingsMatchSdk = { + [K in keyof IClaudeSdkBindings]: K extends keyof SdkModule + ? SdkModule[K] extends IClaudeSdkBindings[K] + ? true + : ['SDK export signature drifted from IClaudeSdkBindings', K, SdkModule[K], IClaudeSdkBindings[K]] + : ['Not an export of @anthropic-ai/claude-agent-sdk', K]; +}; + +// Forces the mapped type above to be eagerly checked: if any entry is not +// `true`, this assignment fails to compile. +export const _assertBindingsMatchSdk: { [K in keyof IClaudeSdkBindings]: true } = null as unknown as AssertBindingsMatchSdk; + +// #endregion diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts index abe93b5cff121..d3a03ebed7a44 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts @@ -4,16 +4,24 @@ *--------------------------------------------------------------------------------------------*/ import type { PermissionMode, SDKUserMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { CancellationError } from '../../../../base/common/errors.js'; import { Emitter, Event } from '../../../../base/common/event.js'; import { Disposable, IReference } from '../../../../base/common/lifecycle.js'; import { URI } from '../../../../base/common/uri.js'; import { IInstantiationService } from '../../../instantiation/common/instantiation.js'; +import { IAgentConfigurationService } from '../agentConfigurationService.js'; +import { ClaudePermissionMode } from '../../common/claudeSessionConfigKeys.js'; import { ClaudeRuntimeEffortLevel } from '../../common/claudeModelConfig.js'; import { AgentSignal } from '../../common/agentService.js'; import { PendingRequestRegistry } from '../../common/pendingRequestRegistry.js'; import { ISessionDatabase } from '../../common/sessionDataService.js'; import { ActionType } from '../../common/state/sessionActions.js'; -import { PendingMessage, SessionInputAnswer, SessionInputRequest, SessionInputResponseKind, ToolCallPendingConfirmationState } from '../../common/state/protocol/state.js'; +import { PendingMessage, SessionInputAnswer, SessionInputRequest, SessionInputResponseKind, ToolCallPendingConfirmationState, type ToolDefinition } from '../../common/state/protocol/state.js'; +import type { ToolCallResult } from '../../common/state/sessionState.js'; +import { convertToolCallResult } from './clientTools/claudeClientToolResult.js'; +import { readClaudePermissionMode } from './claudeSessionPermissionMode.js'; +import { SessionClientToolsDiff } from './clientTools/claudeSessionClientToolsModel.js'; import { resolvePromptToContentBlocks } from './claudePromptResolver.js'; import { ClaudeSdkPipeline, IRematerializer } from './claudeSdkPipeline.js'; import { SubagentRegistry } from './claudeSubagentRegistry.js'; @@ -59,6 +67,17 @@ export class ClaudeAgentSession extends Disposable { */ private readonly _pendingUserInputs = new PendingRequestRegistry<{ response: SessionInputResponseKind; answers?: Record }>(); + /** + * Phase 10 — owns the workbench-registered client-tool snapshot + * (via {@link SessionClientToolsDiff.model}) plus the + * "changed since last successful build" dirty bit. Read by the + * agent's sendMessage diff check; used by the materialize / + * rematerializer flow to pin the SDK build against a specific + * snapshot. See {@link SessionClientToolsDiff} for the C6 race + * semantics this collaborator enforces. + */ + readonly toolDiff: SessionClientToolsDiff; + private readonly _onDidSessionProgress = this._register(new Emitter()); readonly onDidSessionProgress: Event = this._onDidSessionProgress.event; @@ -69,11 +88,16 @@ export class ClaudeAgentSession extends Disposable { warm: WarmQuery, abortController: AbortController, dbRef: IReference, + private readonly _pendingClientToolCalls: PendingRequestRegistry, + toolDiff: SessionClientToolsDiff, + private readonly _permissionModeFallback: ClaudePermissionMode, @IInstantiationService instantiationService: IInstantiationService, + @IAgentConfigurationService private readonly _configurationService: IAgentConfigurationService, ) { super(); + this.toolDiff = this._register(toolDiff); this._pipeline = this._register(instantiationService.createInstance( - ClaudeSdkPipeline, sessionId, sessionUri, warm, abortController, dbRef, this.subagents, + ClaudeSdkPipeline, sessionId, sessionUri, warm, abortController, dbRef, this.subagents, toolDiff.model.state.get().clientId, )); this._register(this._pipeline.onDidProduceSignal(s => this._onDidSessionProgress.fire(s))); } @@ -95,14 +119,42 @@ export class ClaudeAgentSession extends Disposable { } /** - * Send a user prompt. Model / effort are not threaded through here - * — the pipeline's current model / effort (set eagerly via - * {@link queueModelChange}) is whatever the SDK has been told. + * Send a user prompt. Performs the per-turn pre-flight before + * yielding to the pipeline: + * + * - If {@link toolDiff} reports the workbench client-tool snapshot has + * diverged from what the live `Query` was started with, yield-restart + * so the SDK picks up the new `Options.mcpServers`. The rebind itself + * re-applies the live `permissionMode` via the rematerializer. + * - Otherwise forward the live `permissionMode` to the bound `Query` so + * a `SessionConfigChanged` action that arrived between turns wins. + * The pipeline's bijective cache dedupes a no-op `setPermissionMode`, + * so this is free when nothing changed. + * + * Model / effort are not threaded through here — the pipeline's current + * model / effort (set eagerly via {@link queueModelChange}) is whatever + * the SDK has been told. */ - send(prompt: SDKUserMessage, turnId: string): Promise { + async send(prompt: SDKUserMessage, turnId: string): Promise { + if (this.toolDiff.hasDifference) { + await this.rebindForClientTools(); + } else { + await this._pipeline.setPermissionMode(this._currentPermissionMode()); + } return this._pipeline.send(prompt, turnId); } + /** + * Live `permissionMode` for this session: reads from + * {@link IAgentConfigurationService} and falls back to the snapshot + * captured at materialize time when the live read is `undefined` + * (e.g. cross-window resume before the workbench has registered the + * session's schema). + */ + private _currentPermissionMode(): ClaudePermissionMode { + return readClaudePermissionMode(this._configurationService, this.sessionUri) ?? this._permissionModeFallback; + } + /** * Cancel the in-flight SDK turn. Mirrors the production reference; * see {@link ClaudeSdkPipeline.abort}. Also denies any parked @@ -234,11 +286,47 @@ export class ClaudeAgentSession extends Disposable { // #endregion + // #region Phase 10 — client tools + + /** Replace the registered client tools snapshot. */ + setClientTools(tools: readonly ToolDefinition[], clientId?: string): void { + this.toolDiff.model.setTools(tools, clientId); + this._pipeline.setClientId(this.toolDiff.model.state.get().clientId); + } + + /** + * Resolve a parked client-tool MCP handler with the workbench-supplied + * result. Returns `true` if a matching deferred was found and settled. + * Unknown ids are a benign no-op — `agentSideEffects.ts` forwards every + * `SessionToolCallComplete` envelope, so SDK-owned tool completions land + * here too and must NOT throw. + */ + completeClientToolCall(toolCallId: string, result: ToolCallResult): boolean { + const converted = convertToolCallResult(result, toolCallId); + return this._pendingClientToolCalls.respond(toolCallId, converted); + } + + /** + * Drive a yield-restart so the SDK picks up the new client-tool set on + * its next user request. Cancels any in-flight client-tool MCP handlers + * and resets the bridge state before swapping the {@link Query}; the + * agent's rematerializer rebuilds `Options.mcpServers` from + * {@link toolDiff} during the rebind and pins `applied` to the + * build-time snapshot via {@link SessionClientToolsDiff.build}. + */ + async rebindForClientTools(): Promise { + this._pendingClientToolCalls.rejectAll(new CancellationError()); + await this._pipeline.rebindForRestart(); + } + + // #endregion + override dispose(): void { // Resolve parked deferreds before tearing the pipeline down so the // SDK's canUseTool callback unwinds with a deny and the loop exits. this._pendingPermissions.denyAll(false); this._pendingUserInputs.denyAll({ response: SessionInputResponseKind.Cancel }); + this._pendingClientToolCalls.rejectAll(new CancellationError()); super.dispose(); } } diff --git a/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts b/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts index cfa23b80a5f4e..52d3816afb133 100644 --- a/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts +++ b/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts @@ -11,6 +11,7 @@ import { ActionType } from '../../common/state/sessionActions.js'; import { ResponsePartKind, ToolResultContentType, type ToolResultContent, type ToolResultFileEditContent } from '../../common/state/sessionState.js'; import { buildTopLevelSubagentReadyAction, emitInnerAssistantSignals, mapSubagentSystemMessage, SUBAGENT_SPAWNING_TOOL_NAMES, tagWithParent } from './claudeSubagentSignals.js'; import type { SubagentRegistry } from './claudeSubagentRegistry.js'; +import { stripClientToolNamePrefix, hasClientToolNamePrefix } from './clientTools/claudeClientToolMcpServer.js'; import { buildClaudeToolMeta, getClaudePastTenseMessage, getClaudeToolDisplayName } from './claudeToolDisplay.js'; import { ClaudeToolCallRegistry } from './claudeToolCallRegistry.js'; import { ToolCallConfirmationReason, type StringOrMarkdown } from '../../common/state/protocol/state.js'; @@ -217,6 +218,7 @@ export function mapSDKMessageToAgentSignals( state: ClaudeMapperState, logService: ILogService, registry: SubagentRegistry, + clientId?: string, ): AgentSignal[] { if (logService.getLevel() <= LogLevel.Trace) { try { @@ -229,7 +231,7 @@ export function mapSDKMessageToAgentSignals( switch (message.type) { case 'stream_event': return tagWithParent( - mapStreamEvent(message.event, session, turnId, state, logService, message.parent_tool_use_id, registry), + mapStreamEvent(message.event, session, turnId, state, logService, message.parent_tool_use_id, registry, clientId), session, message.parent_tool_use_id, registry, @@ -458,6 +460,7 @@ function mapStreamEvent( logService: ILogService, parentToolUseId: string | null, registry: SubagentRegistry, + clientId: string | undefined, ): AgentSignal[] { switch (event.type) { case 'message_start': @@ -497,14 +500,27 @@ function mapStreamEvent( }]; } if (block.type === 'tool_use') { - state.startToolBlock(event.index, block.id, block.name, turnId); + // Phase 10 — strip the SDK's `mcp____` prefix for + // our in-process client-tool MCP server. The SDK surfaces + // in-process MCP tools to the model with that prefix, but + // the workbench's registered client-tool list (and the + // MCP handler's closure) use the unprefixed name. Without + // normalizing at the seam, `SessionToolCallReady` / + // `SessionToolCallComplete` would carry the prefixed name + // and the workbench would never recognize them as client + // tools. SDK-owned tools (Read, Write, Bash, etc.) and + // subagent spawn tools pass through unchanged because + // they don't carry the prefix. + const toolName = stripClientToolNamePrefix(block.name); + const isClientTool = hasClientToolNamePrefix(block.name); + state.startToolBlock(event.index, block.id, toolName, turnId); // Phase 12 — subagent correlation bookkeeping. Either this // tool_use is at the top level and (if Task/Agent) spawns a // new subagent, or it is inner and we record its edge to the // parent. They are mutually exclusive (a Task call inside a // subagent is itself an inner tool_use; the resolver chain // handles nested spawns by following the parent chain). - const isSubagentSpawn = SUBAGENT_SPAWNING_TOOL_NAMES.has(block.name); + const isSubagentSpawn = SUBAGENT_SPAWNING_TOOL_NAMES.has(toolName); if (parentToolUseId === null) { if (isSubagentSpawn) { registry.recordSpawn(block.id); @@ -518,7 +534,8 @@ function mapStreamEvent( // state transitions (D6). Subagent meta from Phase 12 is now // produced by `buildClaudeToolMeta` because // `getClaudeToolKind('Task') === 'subagent'`. - const meta = buildClaudeToolMeta(block.name); + const meta = buildClaudeToolMeta(toolName); + const toolClientId = isClientTool ? clientId : undefined; return [{ kind: 'action', session, @@ -526,8 +543,9 @@ function mapStreamEvent( type: ActionType.SessionToolCallStart, turnId, toolCallId: block.id, - toolName: block.name, - displayName: getClaudeToolDisplayName(block.name), + toolName, + displayName: getClaudeToolDisplayName(toolName), + ...(toolClientId ? { toolClientId } : {}), ...(meta ? { _meta: meta } : {}), }, }]; @@ -588,14 +606,6 @@ function mapStreamEvent( if (!tracked) { return []; } - // Phase 8.5 — emit `SessionToolCallReady` so the tool transitions - // out of `Streaming` even when the Claude SDK auto-allows without - // calling `canUseTool` (no `claudeCanUseTool` round-trip means - // `sessionPermissions` never emits Ready, the state stays in - // `Streaming`, and the subsequent `SessionToolCallComplete` is - // dropped by the reducer — leaving the tool widget empty). - // When `canUseTool` DOES fire, `sessionPermissions` emits a second - // Ready that re-transitions Running → PendingConfirmation as needed. const entry = state.toolCalls.lookup(tracked.toolUseId); const info = entry?.info; if (!info) { diff --git a/src/vs/platform/agentHost/node/claude/claudeMaterializer.ts b/src/vs/platform/agentHost/node/claude/claudeMaterializer.ts index cb7a4ea23cece..1f3a24fa19e18 100644 --- a/src/vs/platform/agentHost/node/claude/claudeMaterializer.ts +++ b/src/vs/platform/agentHost/node/claude/claudeMaterializer.ts @@ -3,21 +3,27 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import type { Options, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { McpSdkServerConfigWithInstance, Options, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import { CancellationError } from '../../../../base/common/errors.js'; import { delimiter, dirname } from '../../../../base/common/path.js'; import { URI } from '../../../../base/common/uri.js'; import { rgDiskPath } from '../../../../base/node/ripgrep.js'; import { IInstantiationService } from '../../../instantiation/common/instantiation.js'; import { ILogService } from '../../../log/common/log.js'; +import { IAgentConfigurationService } from '../agentConfigurationService.js'; import { ClaudePermissionMode } from '../../common/claudeSessionConfigKeys.js'; import { resolveClaudeEffort } from '../../common/claudeModelConfig.js'; +import { PendingRequestRegistry } from '../../common/pendingRequestRegistry.js'; import { ISessionDataService } from '../../common/sessionDataService.js'; -import type { ModelSelection } from '../../common/state/protocol/state.js'; +import type { ModelSelection, ToolDefinition } from '../../common/state/protocol/state.js'; import { IAgentSessionProjectInfo } from '../../common/agentService.js'; import { IClaudeAgentSdkService } from './claudeAgentSdkService.js'; import { ClaudeAgentSession } from './claudeAgentSession.js'; +import { buildClientToolMcpServer } from './clientTools/claudeClientToolMcpServer.js'; import { IClaudeProxyHandle } from './claudeProxyService.js'; +import { readClaudePermissionMode } from './claudeSessionPermissionMode.js'; +import { SessionClientToolsDiff } from './clientTools/claudeSessionClientToolsModel.js'; /** * In-memory record for a provisional Claude session passed into @@ -32,6 +38,40 @@ export interface IClaudeMaterializeProvisional { readonly abortController: AbortController; readonly project: IAgentSessionProjectInfo | undefined; readonly model: ModelSelection | undefined; + /** + * Phase 10 — the workbench-registered client-tool snapshot at + * materialize time (if any). The materializer seeds the session's + * {@link SessionClientToolsDiff.model} from this and uses it to build + * the SDK's initial `Options.mcpServers`. + */ + readonly clientTools?: readonly ToolDefinition[]; + /** + * Phase 10 — the workbench `clientId` paired with {@link clientTools}. + * Used by the stream mapper to stamp `SessionToolCallStart.toolClientId`. + */ + readonly clientId?: string; +} + +/** + * Per-session bundle the agent hands to {@link ClaudeMaterializer.materialize}. + * Everything the materializer needs to bring a session up AND to rebind + * it on yield-restart — the materializer attaches the rematerializer + * hook internally using this bundle, so the agent does not own any + * SDK / client-tool plumbing. + */ +export interface IMaterializeContext { + readonly provisional: IClaudeMaterializeProvisional; + readonly proxyHandle: IClaudeProxyHandle; + readonly canUseTool: NonNullable; + /** + * Fallback permission mode used when the live read from + * {@link IAgentConfigurationService} returns `undefined` (e.g. the + * session's schema has not been registered yet). The materializer + * reads the live value at materialize / rebind and falls back to + * this value so the SDK never silently downgrades to `'default'`. + */ + readonly permissionModeFallback: ClaudePermissionMode; + readonly isResume: boolean; } /** @@ -63,26 +103,32 @@ export class ClaudeMaterializer { @IClaudeAgentSdkService private readonly _sdkService: IClaudeAgentSdkService, @ISessionDataService private readonly _sessionDataService: ISessionDataService, @IInstantiationService private readonly _instantiationService: IInstantiationService, + @IAgentConfigurationService private readonly _configurationService: IAgentConfigurationService, ) { } - async materialize( - provisional: IClaudeMaterializeProvisional, - proxyHandle: IClaudeProxyHandle, - permissionMode: ClaudePermissionMode, - canUseTool: NonNullable, - startMode: 'fresh' | 'resume' = 'fresh', - ): Promise { + async materialize(ctx: IMaterializeContext): Promise { + const { provisional, proxyHandle, canUseTool, isResume, permissionModeFallback } = ctx; if (!provisional.workingDirectory) { throw new Error(`Cannot materialize Claude session ${provisional.sessionId}: workingDirectory is required`); } - const isResume = startMode === 'resume'; - const options = await this._buildOptions(provisional, proxyHandle, permissionMode, canUseTool, isResume); + // Phase 10 — per-session client-tool plumbing. The MCP server's + // `tool()` handler closures capture the SAME registry + diff that + // the eventually-owned session holds, so the closures park on the + // session's live registry (council finding C1). + const pendingClientToolCalls = new PendingRequestRegistry(); + const toolDiff = new SessionClientToolsDiff(); + toolDiff.model.setTools(provisional.clientTools, provisional.clientId); + + const permissionMode = readClaudePermissionMode(this._configurationService, provisional.sessionUri) ?? permissionModeFallback; + const initialMcpServers = await this._buildClientMcpServers(toolDiff, pendingClientToolCalls); + + const options = await this._buildOptions(provisional, proxyHandle, permissionMode, canUseTool, isResume, initialMcpServers); // Trace what the SDK gets so live debugging doesn't have to infer // from the absence of a `fileEdit` block whether the edit-tracking // plumbing was wired this session. - this._logService.info(`[Claude] session ${provisional.sessionId}: enableFileCheckpointing=${options.enableFileCheckpointing} startMode=${startMode}`); + this._logService.info(`[Claude] session ${provisional.sessionId}: enableFileCheckpointing=${options.enableFileCheckpointing} isResume=${isResume}`); const warm = await this._sdkService.startup({ options }); @@ -101,8 +147,9 @@ export class ClaudeMaterializer { // the session, which disposes the ref ahead of its `WarmQuery` // abort so any in-flight write completes. const dbRef = this._sessionDataService.openDatabase(provisional.sessionUri); + let session: ClaudeAgentSession; try { - return this._instantiationService.createInstance( + session = this._instantiationService.createInstance( ClaudeAgentSession, provisional.sessionId, provisional.sessionUri, @@ -110,6 +157,9 @@ export class ClaudeMaterializer { warm, provisional.abortController, dbRef, + pendingClientToolCalls, + toolDiff, + permissionModeFallback, ); } catch (err) { // Construction failed — own the cleanup so no resource leaks. @@ -117,6 +167,27 @@ export class ClaudeMaterializer { await warm[Symbol.asyncDispose](); throw err; } + + // Phase 9 — wire the rematerializer so the session can rebind the + // SDK on yield-restart (e.g. after a client-tool snapshot change). + // The closure captures the ctx so `getPermissionMode` is re-read on + // each rebind and any concurrent `SessionConfigChanged` wins. + session.attachRematerializer(async (_reason) => { + const liveMode = readClaudePermissionMode(this._configurationService, provisional.sessionUri) ?? permissionModeFallback; + try { + const mcpServers = await this._buildClientMcpServers(session.toolDiff, pendingClientToolCalls); + return await this._materializeResume(provisional, proxyHandle, liveMode, canUseTool, mcpServers); + } catch (err) { + // The client-tool diff was consumed by `_buildClientMcpServers`, + // but the rebind never reached a live SDK. Re-mark dirty so the + // next send retries with the same snapshot instead of silently + // running on the stale server set. + session.toolDiff.markDirty(); + throw err; + } + }); + + return session; } /** @@ -134,18 +205,19 @@ export class ClaudeMaterializer { * the field via `this`, so swapping `_abortController` post-rebind is * sufficient). */ - async materializeResume( + private async _materializeResume( provisional: IClaudeMaterializeProvisional, proxyHandle: IClaudeProxyHandle, permissionMode: ClaudePermissionMode, canUseTool: NonNullable, + mcpServers: Record | undefined, ): Promise<{ readonly warm: WarmQuery; readonly abortController: AbortController }> { if (!provisional.workingDirectory) { throw new Error(`Cannot materialize Claude session ${provisional.sessionId}: workingDirectory is required`); } const abortController = new AbortController(); const resumedProvisional: IClaudeMaterializeProvisional = { ...provisional, abortController }; - const options = await this._buildOptions(resumedProvisional, proxyHandle, permissionMode, canUseTool, true); + const options = await this._buildOptions(resumedProvisional, proxyHandle, permissionMode, canUseTool, true, mcpServers); this._logService.info(`[Claude] session ${provisional.sessionId}: resume rebuild`); const warm = await this._sdkService.startup({ options }); return { warm, abortController }; @@ -157,6 +229,7 @@ export class ClaudeMaterializer { permissionMode: ClaudePermissionMode, canUseTool: NonNullable, isResume: boolean, + mcpServers: Record | undefined, ): Promise { const subprocessEnv = buildSubprocessEnv(); // Settings env: forwarded to the Claude subprocess via the SDK's @@ -224,12 +297,33 @@ export class ClaudeMaterializer { ...(isResume ? { resume: provisional.sessionId } : { sessionId: provisional.sessionId }), + ...(mcpServers ? { mcpServers } : {}), settingSources: ['user', 'project', 'local'], settings: { env: settingsEnv }, systemPrompt: { type: 'preset', preset: 'claude_code' }, stderr: data => this._logService.error(`[Claude SDK stderr] ${data}`), }; } + + /** + * Phase 10 — consume the diff (clears its dirty bit) and build the + * in-process MCP server config from the resulting tool snapshot. + * Resolves to `undefined` when the snapshot is empty so + * `Options.mcpServers` is omitted entirely and the SDK keeps its + * default. If the build throws the diff is re-marked dirty so the + * next sendMessage retries (C6). + */ + private async _buildClientMcpServers( + toolDiff: SessionClientToolsDiff, + registry: PendingRequestRegistry, + ): Promise | undefined> { + const { tools } = toolDiff.consume(); + if (!tools || tools.length === 0) { + return undefined; + } + const server = await buildClientToolMcpServer(tools, id => registry.register(id), this._sdkService); + return { client: server }; + } } /** diff --git a/src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts b/src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts index cd13914cd1c47..450c96a4835bb 100644 --- a/src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts +++ b/src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts @@ -36,19 +36,27 @@ export class ClaudeSdkMessageRouter extends Disposable { private readonly _editObserver: ClaudeFileEditObserver; private readonly _mapperState = new ClaudeMapperState(); + private _clientId: string | undefined; + constructor( private readonly _sessionUri: URI, dbRef: IReference, private readonly _subagents: SubagentRegistry, + clientId: string | undefined = undefined, @IInstantiationService instantiationService: IInstantiationService, @ILogService private readonly _logService: ILogService, ) { super(); + this._clientId = clientId; this._editObserver = this._register( instantiationService.createInstance(ClaudeFileEditObserver, _sessionUri.toString(), dbRef), ); } + setClientId(clientId: string | undefined): void { + this._clientId = clientId; + } + async handle(message: SDKMessage, turnId: string | undefined): Promise { if (message.type === 'assistant') { this._editObserver.observeAssistant(message); @@ -66,6 +74,7 @@ export class ClaudeSdkMessageRouter extends Disposable { this._mapperState, this._logService, this._subagents, + this._clientId, ); for (const signal of signals) { this._onDidProduceSignal.fire(signal); diff --git a/src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts b/src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts index f94f59c51ee19..be1b9d32109e5 100644 --- a/src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts +++ b/src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts @@ -111,6 +111,7 @@ export class ClaudeSdkPipeline extends Disposable { abortController: AbortController, dbRef: IReference, subagents: SubagentRegistry, + clientId: string | undefined = undefined, @IInstantiationService instantiationService: IInstantiationService, @ILogService private readonly _logService: ILogService, ) { @@ -129,7 +130,7 @@ export class ClaudeSdkPipeline extends Disposable { }), )); this._router = this._register(instantiationService.createInstance( - ClaudeSdkMessageRouter, sessionUri, dbRef, subagents, + ClaudeSdkMessageRouter, sessionUri, dbRef, subagents, clientId, )); this._register(this._router.onDidProduceSignal(s => this._onDidProduceSignal.fire(s))); // Dispose chain → abort → SDK cleanup. Reads the *current* @@ -145,6 +146,26 @@ export class ClaudeSdkPipeline extends Disposable { get isAborted(): boolean { return this._abortController.signal.aborted; } + /** + * Phase 10 \u2014 narrow public wrapper around the internal + * {@link _rebindQuery} so {@link ClaudeAgentSession.rebindForClientTools} + * can drive a yield-restart without exposing the private rebind + * machinery to every collaborator. + */ + rebindForRestart(): Promise { + return this._rebindQuery('restart'); + } + + /** + * Phase 10 — update the workbench `clientId` that the stream mapper + * stamps onto subsequent `SessionToolCallStart` events. Called by the + * session whenever {@link SessionClientToolsModel} receives a new + * clientId via `setClientTools`. + */ + setClientId(clientId: string | undefined): void { + this._router.setClientId(clientId); + } + /** Attach the rematerializer hook for abort / crash recovery. Optional — tests that exercise only the dispose path skip this. */ attachRematerializer(rematerializer: IRematerializer): void { this._rematerializer = rematerializer; diff --git a/src/vs/platform/agentHost/node/claude/claudeSessionPermissionMode.ts b/src/vs/platform/agentHost/node/claude/claudeSessionPermissionMode.ts new file mode 100644 index 0000000000000..aef5cfc1a8895 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/claudeSessionPermissionMode.ts @@ -0,0 +1,30 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { PermissionMode } from '@anthropic-ai/claude-agent-sdk'; +import type { URI } from '../../../../base/common/uri.js'; +import { ClaudeSessionConfigKey, narrowClaudePermissionMode } from '../../common/claudeSessionConfigKeys.js'; +import type { IAgentConfigurationService } from '../agentConfigurationService.js'; + +/** + * Read the live `permissionMode` for a session from + * {@link IAgentConfigurationService}, narrowed to the SDK's + * `PermissionMode` union. Returns `undefined` when the session's + * schema hasn't been registered or carries a value that slipped past + * schema validation — callers pick the fallback (the createSession-time + * intent at materialize, `'default'` at the canUseTool gate, etc.). + * + * Called on every canUseTool entry, on every rebind, and before each + * `session.send` so a mid-turn `SessionConfigChanged` action wins over + * the materialize-time seed (plan S3.6). + */ +export function readClaudePermissionMode( + configurationService: IAgentConfigurationService, + sessionUri: URI, +): PermissionMode | undefined { + return narrowClaudePermissionMode( + configurationService.getSessionConfigValues(sessionUri.toString())?.[ClaudeSessionConfigKey.PermissionMode], + ); +} diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolMcpServer.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolMcpServer.ts new file mode 100644 index 0000000000000..3a3476917fbd5 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolMcpServer.ts @@ -0,0 +1,112 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { McpSdkServerConfigWithInstance } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; +import type { IClaudeAgentSdkService } from '../claudeAgentSdkService.js'; +import { jsonSchemaToZodRawShape } from './claudeJsonSchemaToZod.js'; + +/** + * Anthropic SDK contract: the in-process MCP `tool()` handler receives a + * `RequestHandlerExtra` whose `_meta` carries the originating + * `tool_use_id` under this namespaced key. Verified empirically against + * `@anthropic-ai/claude-agent-sdk@0.2.128`. If a future SDK version drops + * or renames this field, the handler returns an error result instead of + * silently deadlocking — see {@link extractToolUseId}. + */ +const TOOL_USE_ID_META_KEY = 'claudecode/toolUseId'; + +/** + * Build the per-session in-process MCP server that surfaces the workbench + * client's {@link ToolDefinition}s to the Claude SDK via + * `Options.mcpServers['client']`. + * + * Each tool's handler reads the originating `tool_use_id` from the SDK's + * `extra._meta["claudecode/toolUseId"]` and delegates to `awaitResult`, + * which is expected to return a promise that settles when the workbench + * echoes a completion (typically via a parked deferred owned by the + * session). Keeping that plumbing behind a callback lets this factory + * stay ignorant of how the host tracks in-flight tool calls. + * + * Pure factory — no SDK loading, no I/O. + */ +export async function buildClientToolMcpServer( + snapshot: readonly ToolDefinition[], + awaitResult: (toolUseId: string) => Promise, + sdk: IClaudeAgentSdkService +): Promise { + const tools = await Promise.all(snapshot.map(def => sdk.tool( + def.name, + def.description ?? '', + jsonSchemaToZodRawShape(def.inputSchema), + async (_args, extra) => { + const toolUseId = extractToolUseId(extra); + if (toolUseId === undefined) { + return { + content: [{ + type: 'text', + text: `Client tool "${def.name}" could not run: SDK omitted tool_use_id (expected at extra._meta["${TOOL_USE_ID_META_KEY}"]).`, + }], + isError: true, + }; + } + return awaitResult(toolUseId); + } + ))); + return sdk.createSdkMcpServer({ name: CLAUDE_CLIENT_MCP_SERVER_NAME, tools }); +} + +/** + * Recover the SDK-supplied `tool_use_id` from the MCP `tool()` handler's + * `extra` argument. Returns `undefined` (and the handler degrades to an + * error result) if the SDK ever drops the meta field — preferable to + * deadlocking the call. + */ +export function extractToolUseId(extra: unknown): string | undefined { + if (!extra || typeof extra !== 'object') { + return undefined; + } + const meta = (extra as { _meta?: unknown })._meta; + if (!meta || typeof meta !== 'object') { + return undefined; + } + const value = (meta as Record)[TOOL_USE_ID_META_KEY]; + return typeof value === 'string' ? value : undefined; +} + +/** + * Name of the single in-process MCP server we register client tools on. + * Exported so the stream mapper can strip the SDK's `mcp____` name + * prefix before stamping `SessionToolCallStart.toolClientId`. + */ +export const CLAUDE_CLIENT_MCP_SERVER_NAME = 'client'; + +/** + * Per the Anthropic SDK's MCP server naming convention, an in-process MCP + * server registered as `Options.mcpServers[]` surfaces its + * tools to the model with names of the form `mcp____`. + * The stream mapper must strip the prefix before the workbench (which + * registered tools by their unprefixed `ToolDefinition.name`) can + * recognize them as client tools. + * + * Returns the input unchanged when it doesn't carry the prefix (SDK-owned + * tools, subagent spawn tools, etc.) so non-client-tool calls flow + * through without interference. + */ +export function stripClientToolNamePrefix(toolName: string): string { + const prefix = `mcp__${CLAUDE_CLIENT_MCP_SERVER_NAME}__`; + return toolName.startsWith(prefix) ? toolName.slice(prefix.length) : toolName; +} + +/** + * Whether the SDK-emitted tool name carries the in-process MCP server + * prefix, i.e. the tool is one of the workbench's client-provided tools. + * Used by the stream mapper to set `SessionToolCallStart.toolClientId` so + * the workbench takes the client-tool invocation branch. + */ +export function hasClientToolNamePrefix(toolName: string): boolean { + return toolName.startsWith(`mcp__${CLAUDE_CLIENT_MCP_SERVER_NAME}__`); +} diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolResult.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolResult.ts new file mode 100644 index 0000000000000..0ef905879d417 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolResult.ts @@ -0,0 +1,83 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { ToolResultContentType, type ToolCallResult, type ToolResultContent, type ToolResultEmbeddedResourceContent } from '../../../common/state/protocol/channels-session/state.js'; + +/** + * Stable URI scheme used when an `EmbeddedResource` content block must be + * shipped to the MCP server as the `{ type: 'resource', resource: { uri, mimeType, blob } }` + * shape. The MCP zod schema rejects resource blocks without a URI, and the + * protocol's `{ data, contentType }` carries no URI of its own — so we mint + * an ephemeral, per-call URI of the form + * `claude-client:///`. This URI is opaque to both sides; + * the model never dereferences it. + */ +const CLAUDE_CLIENT_RESOURCE_SCHEME = 'claude-client'; + +/** + * Convert a protocol {@link ToolCallResult} into the MCP {@link CallToolResult} + * shape the Anthropic SDK feeds back to the model. Mostly 1:1 mapping with + * one shape transform: protocol `EmbeddedResource { data, contentType }` → + * MCP `{ type: 'image', ... }` for `image/*`, or + * `{ type: 'resource', resource: { uri, mimeType, blob } }` for everything + * else (synthesized URI per the scheme above). + * + * Unknown content block kinds collapse to a synthesized text block + a + * console warn — bounded blast radius (one bad call, not a crash). + * + * @param toolUseId The SDK `tool_use_id` for this call. Embedded into the + * synthesized resource URI so the same call's blocks stay disambiguated + * across parallel tool calls. + */ +export function convertToolCallResult(result: ToolCallResult, toolUseId: string): CallToolResult { + const blocks = result.content ?? []; + const content = blocks.map((block, index) => convertBlock(block, toolUseId, index)); + const out: CallToolResult = { content }; + if (result.structuredContent !== undefined) { + out.structuredContent = result.structuredContent; + } + if (!result.success || result.error) { + out.isError = true; + } + return out; +} + +function convertBlock(block: ToolResultContent, toolUseId: string, index: number): CallToolResult['content'][number] { + switch (block.type) { + case ToolResultContentType.Text: + return { type: 'text', text: block.text }; + case ToolResultContentType.EmbeddedResource: + return convertEmbeddedResource(block, toolUseId, index); + default: { + // Unknown / unsupported block (Resource, FileEdit, Terminal, Subagent). + // MCP doesn't model these client-tool-result shapes natively, so we + // degrade to a stringified text block to keep the call observable. + console.warn(`[Claude] convertToolCallResult: unsupported tool-result block kind '${(block as ToolResultContent).type}'; degrading to text`); + let text: string; + try { + text = JSON.stringify(block); + } catch { + text = `[unserializable ${(block as ToolResultContent).type} block]`; + } + return { type: 'text', text }; + } + } +} + +function convertEmbeddedResource( + block: ToolResultEmbeddedResourceContent, + toolUseId: string, + index: number +): CallToolResult['content'][number] { + if (block.contentType.startsWith('image/')) { + return { type: 'image', data: block.data, mimeType: block.contentType }; + } + const uri = `${CLAUDE_CLIENT_RESOURCE_SCHEME}://${encodeURIComponent(toolUseId)}/${index}`; + return { + type: 'resource', + resource: { uri, mimeType: block.contentType, blob: block.data }, + }; +} diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts new file mode 100644 index 0000000000000..e8dd1eb52e0e3 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts @@ -0,0 +1,135 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { z, type ZodTypeAny } from 'zod'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; + +/** + * Converts the narrow JSON Schema subset that + * {@link ToolDefinition.inputSchema} carries into the `Record` + * "raw shape" expected by the Anthropic SDK's `tool()` factory + * (`AnyZodRawShape`). + * + * Per-property failures fall back to {@link z.any} so a single malformed + * property never rejects an entire tool — clients always get *some* tool + * surface to bind against. + */ +export function jsonSchemaToZodRawShape( + inputSchema: ToolDefinition['inputSchema'] | undefined +): Record { + if (!inputSchema || inputSchema.type !== 'object' || !inputSchema.properties) { + return {}; + } + const required = new Set(inputSchema.required ?? []); + const shape: Record = {}; + for (const [name, raw] of Object.entries(inputSchema.properties)) { + let zodType: ZodTypeAny; + try { + zodType = jsonPropertyToZod(raw as JsonSchemaProperty); + } catch { + zodType = z.any(); + } + if (!required.has(name)) { + zodType = zodType.optional(); + } + shape[name] = zodType; + } + return shape; +} + +interface JsonSchemaProperty { + type?: string | string[]; + description?: string; + default?: unknown; + nullable?: boolean; + enum?: unknown[]; + oneOf?: JsonSchemaProperty[]; + anyOf?: JsonSchemaProperty[]; + items?: JsonSchemaProperty; + properties?: Record; + required?: string[]; +} + +function jsonPropertyToZod(prop: JsonSchemaProperty): ZodTypeAny { + if (!prop || typeof prop !== 'object') { + return z.any(); + } + + let base: ZodTypeAny; + + if (Array.isArray(prop.enum) && prop.enum.length > 0) { + const literals = prop.enum.filter(v => + typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean' || v === null + ) as (string | number | boolean | null)[]; + if (literals.length === 0) { + base = z.any(); + } else if (literals.length === 1) { + base = z.literal(literals[0] as Exclude); + } else { + base = z.union(literals.map(v => z.literal(v as Exclude)) as unknown as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]]); + } + } else if (Array.isArray(prop.oneOf) && prop.oneOf.length > 0) { + base = unionOf(prop.oneOf); + } else if (Array.isArray(prop.anyOf) && prop.anyOf.length > 0) { + base = unionOf(prop.anyOf); + } else { + const type = Array.isArray(prop.type) ? prop.type[0] : prop.type; + switch (type) { + case 'string': + base = z.string(); + break; + case 'number': + base = z.number(); + break; + case 'integer': + base = z.number().int(); + break; + case 'boolean': + base = z.boolean(); + break; + case 'array': + base = z.array(prop.items ? jsonPropertyToZod(prop.items) : z.any()); + break; + case 'object': { + const sub: Record = {}; + const subRequired = new Set(prop.required ?? []); + for (const [n, p] of Object.entries(prop.properties ?? {})) { + let t: ZodTypeAny; + try { t = jsonPropertyToZod(p); } catch { t = z.any(); } + if (!subRequired.has(n)) { t = t.optional(); } + sub[n] = t; + } + base = z.object(sub); + break; + } + case 'null': + base = z.null(); + break; + default: + base = z.any(); + } + } + + if (prop.nullable) { + base = base.nullable(); + } + if (prop.description) { + base = base.describe(prop.description); + } + if (prop.default !== undefined) { + base = base.default(prop.default as never); + } + return base; +} + +function unionOf(schemas: JsonSchemaProperty[]): ZodTypeAny { + const variants = schemas.map(s => { + try { return jsonPropertyToZod(s); } catch { return z.any(); } + }); + if (variants.length === 1) { + return variants[0]; + } + return z.union(variants as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]]); +} diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeSessionClientToolsModel.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeSessionClientToolsModel.ts new file mode 100644 index 0000000000000..0527deb299101 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeSessionClientToolsModel.ts @@ -0,0 +1,154 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { Disposable } from '../../../../../base/common/lifecycle.js'; +import { equals } from '../../../../../base/common/objects.js'; +import { autorun, IObservable, ISettableObservable, observableValueOpts } from '../../../../../base/common/observable.js'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; + +/** + * Combined snapshot of the workbench-registered client-tool definitions + * and the workbench `clientId` that owns them. The two travel as one + * value so consumers can read both with a single `.get()` and so that an + * update to either field is observed as a single change. + */ +export interface ISessionClientToolsState { + readonly tools: readonly ToolDefinition[] | undefined; + readonly clientId: string | undefined; +} + +const INITIAL_STATE: ISessionClientToolsState = { tools: undefined, clientId: undefined }; + +/** + * Pure state holder for the workbench-registered client-tool snapshot + * and the workbench `clientId` that owns it. Exposes the pair as a + * single {@link IObservable} so consumers can react to changes without + * polling. + * + * The `state` observable dedupes structurally-equivalent writes: tool + * snapshots compare on `name + description + inputSchema` + * (order-insensitive, `undefined` equivalent to `[]`); `clientId` compares strictly. + * A re-send of the same `(tools, clientId)` pair therefore does NOT fire + * downstream subscribers. + * + * Knows nothing about diffing or the SDK — pair with + * {@link SessionClientToolsDiff} to track "has the snapshot changed + * since the last successful SDK build". + */ +export class SessionClientToolsModel { + + private readonly _state: ISettableObservable = observableValueOpts( + { owner: this, equalsFn: stateEqual }, + INITIAL_STATE, + ); + readonly state: IObservable = this._state; + + setTools(tools: readonly ToolDefinition[] | undefined, clientId?: string): void { + const current = this._state.get(); + this._state.set({ + tools, + clientId: clientId ?? current.clientId, + }, undefined); + } +} + +/** + * Tracks "has {@link SessionClientToolsModel.state} changed since the + * last successful {@link build}?". Subscribes to the model's observable + * and flips a private dirty bit on every change; {@link build} captures + * the current snapshot, hands it to the supplied builder, and clears + * the bit on success — preserving the C6 pin invariant from the + * previous `ClientToolDiff` implementation: a `setTools` call that + * races the in-flight builder re-flips the bit via the autorun, so the + * next sendMessage detects the stale set and triggers another rebind. + * + * On builder throw the bit is left set — the SDK is still running with + * the previous snapshot, so the next sendMessage should retry. + */ +export class SessionClientToolsDiff extends Disposable { + + readonly model: SessionClientToolsModel = new SessionClientToolsModel(); + + private _dirty = false; + // `autorun` invokes its callback once at registration for dependency + // tracking. Skip that initial run so a brand-new diff doesn't report + // dirty before any `setTools` has happened. + private _ignoreNextFire = true; + + constructor() { + super(); + this._register(autorun(reader => { + this.model.state.read(reader); + if (this._ignoreNextFire) { + this._ignoreNextFire = false; + return; + } + this._dirty = true; + })); + } + + get hasDifference(): boolean { + return this._dirty; + } + + /** + * Read the current state and mark it as the applied snapshot. A + * subsequent {@link SessionClientToolsModel.setTools} re-flips dirty + * via the autorun, so callers do NOT need to compare snapshots + * themselves to detect a race. If the caller's downstream work + * (e.g. SDK rebuild) fails, call {@link markDirty} to surface the + * stale state so the next sendMessage retries. + */ + consume(): ISessionClientToolsState { + const state = this.model.state.get(); + this._dirty = false; + return state; + } + + /** + * Force the dirty bit on. Use when a caller's async work that + * followed {@link consume} failed and the SDK is therefore still on + * the previous snapshot. + */ + markDirty(): void { + this._dirty = true; + } +} + +function stateEqual(a: ISessionClientToolsState, b: ISessionClientToolsState): boolean { + return a.clientId === b.clientId && snapshotsEqual(a.tools, b.tools); +} + +/** + * Deep-equal two client-tool snapshots on `name + description + inputSchema`. + * `undefined` and `[]` compare equal. Order-insensitive. + */ +function snapshotsEqual( + a: readonly ToolDefinition[] | undefined, + b: readonly ToolDefinition[] | undefined +): boolean { + const aa = a ?? []; + const bb = b ?? []; + if (aa.length !== bb.length) { + return false; + } + const byName = new Map(); + for (const t of aa) { + byName.set(t.name, t); + } + for (const t of bb) { + const prev = byName.get(t.name); + if (!prev) { + return false; + } + if (prev.description !== t.description) { + return false; + } + if (!equals(prev.inputSchema, t.inputSchema)) { + return false; + } + } + return true; +} diff --git a/src/vs/platform/agentHost/node/claude/phase10-plan.md b/src/vs/platform/agentHost/node/claude/phase10-plan.md new file mode 100644 index 0000000000000..f8727ca243e8e --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/phase10-plan.md @@ -0,0 +1,834 @@ +# Phase 10 — Client-provided tools (in-process MCP) + +> Generated by super-planner. Source: `roadmap.md` (Phase 10). +> Last updated: 2026-05-19 after second council review + post-refactor grilling (Q8–Q12 resolved: SDK service wrap, rebind ordering, ephemeral URI, fake-only tests, provisional mutability). + +**Status:** ✅ **DONE** (steps 1–10 landed; council review + post-rebase fixes for the materialize / resume `setClientTools` race and rebind-failure dirty-bit gap; 1833 agentHost tests passing; E2E verified — session resume + persisted `client__*` MCP tool roundtrips visible in the launched Agents window) + +## Goal + +Let a workbench client register custom tools on a Claude session via +`IAgent.setClientTools(session, clientId, tools)`, surface them through the +SDK's `Options.mcpServers` in-process MCP path, invoke them when the model +calls them, and route the result back through +`IAgent.onClientToolCallComplete(session, toolCallId, result)` — matching the +behavior the Copilot agent already ships. + +## Scope + +**In scope** +- `setClientTools` / `onClientToolCallComplete` real implementations on + `ClaudeAgent` (replace the Phase-10 stubs at + [claudeAgent.ts:1068, 1072](src/vs/platform/agentHost/node/claude/claudeAgent.ts)). +- A `clientTools: readonly ToolDefinition[] | undefined` field on the + per-session objects (`IClaudeProvisionalSession` pre-materialize, + `ClaudeAgentSession` post-materialize), with the provisional value + transferred at materialize time — same pattern `model` and + `permissionMode` already follow. No parallel Map keyed by sessionId. +- A JSON Schema → Zod raw-shape converter for `ToolDefinition.inputSchema`, + scoped to the protocol's actual subset. +- Building one in-process `createSdkMcpServer({ name: 'client', tools })` at + every materialize / resume / yield-restart, written into `Options.mcpServers` + via `claudeMaterializer._buildOptions`. +- Deferred-promise plumbing on `ClaudeAgentSession` for each in-flight MCP + tool call, keyed by SDK `tool_use_id`. Reuses the existing + [`PendingRequestRegistry`](src/vs/platform/agentHost/common/pendingRequestRegistry.ts) + primitive (already wired for Phase 7 permission round-trips) plus a new + `ClaudeToolUseIdBridge` that resolves `(toolName, args) → tool_use_id` + from Phase 7's `tool_use` stream events — necessary because the + Anthropic SDK's `tool()` handler signature is `(args, extra: unknown)` + and does NOT pass `tool_use_id` (verified against sdk.mjs). +- Tool-set diff (deep-equal on `name + description + inputSchema`) checked in + `sendMessage`'s materialized-session branch; mismatch → yield-restart via + the existing `_rebindQuery('restart')` path in `claudeSdkPipeline`. +- `onClientToolCallComplete` subagent routing: `parseSubagentSessionUri` → + walk to root → look up registry on the root session id (mirrors + [copilotAgent.ts:943-954](src/vs/platform/agentHost/node/copilot/copilotAgent.ts)). +- Result type conversion: protocol `ToolCallResult` → MCP `CallToolResult` + (`content`, `structuredContent`, `isError`). +- Test coverage in `claudeAgent.test.ts`: registration → invocation → + completion round-trip, subagent routing, tool-diff restart, unknown + toolCallId is a benign no-op. + +**Out of scope** +- `Query.setMcpServers` runtime hot-swap (external HTTP MCP servers) — that's + **Phase 11** territory. Phase 10 only writes `Options.mcpServers` at startup + and uses yield-restart for the in-process tool path. +- MCP gateway (`_gateway` / `_gatewayIdleTimeout`) lifecycle. **Roadmap + correction (see Decisions):** the production extension's `_gateway` field is + the VS Code `McpGateway` adapter exposing external editor MCP servers over + HTTP — it has nothing to do with in-process tools built from + `createSdkMcpServer`. There are no external resources to idle out here. +- `setClientCustomizations` / `setCustomizationEnabled` (Phase 11). +- `reloadPlugins` (Phase 11; orthogonal to client tools per roadmap). +- Workbench-side rendering changes for client-tool cards. + +## Prerequisites + +- Phase 9 yield-restart machinery shipped (`_rebindQuery('restart')` in + [claudeSdkPipeline.ts:342-399](src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts)). +- Phase 12 subagent URI helpers shipped (`parseSubagentSessionUri` is already + imported by `claudeAgent.ts`). +- Phase 13 resume / cross-window resume path shipped (`_resumeSession` is the + second integration point for `Options.mcpServers`, alongside + `_materializeProvisional`). +- `zod` is available at the platform layer (it's a root dep). + +## Approach + +Store client-tool state on the per-session objects that already exist—the +provisional record (`IClaudeProvisionalSession.clientTools`) and +`ClaudeAgentSession.clientTools`—rather than introducing a parallel +Map-keyed-by-sessionId on `ClaudeAgent`. `setClientTools(session, clientId, +tools)` writes to whichever exists; at materialize, the provisional's +`clientTools` flow into the new `ClaudeAgentSession`. + +**Construction order (council finding C1)**: the in-process MCP server's +`tool()` handler closures capture the session's `_pendingClientToolCalls` +and `_toolUseIdBridge`. These two collaborators are therefore constructed +on the agent side BEFORE materialize and passed INTO the materializer +alongside `clientTools`, so the server's handler closures bind to the +same instances the eventually-constructed `ClaudeAgentSession` will own. +The materializer hands them to the session at construction time. This +avoids the order trap of "server built first, session created second, +closures captured the wrong objects." + +The session owns: +- `clientTools: readonly ToolDefinition[] | undefined` (mutable via + `setClientTools(tools)`). +- `appliedClientToolSnapshot: readonly ToolDefinition[] | undefined` + (stale-detection for restart-required tool-set changes). +- `_pendingClientToolCalls: PendingRequestRegistry` + (deferred handlers). +- `_toolUseIdBridge: ClaudeToolUseIdBridge` (rendezvous between Phase 7's + `tool_use` stream events and the MCP handler). + +At every `materialize` / `resume` / yield-restart, the agent builds a +fresh `Options.mcpServers` from `session.clientTools` (or +`provisional.clientTools` pre-materialize) and passes it through the +materializer. Tool-set changes between turns trigger +`_rebindQuery('restart')` at the sendMessage yield boundary; never +`Query.setMcpServers`. Cleanup is automatic via +`ClaudeSessionEntry.dispose()`. + +## Steps + +1. ✓ **JSON Schema → Zod raw-shape converter.** + - Files: `src/vs/platform/agentHost/node/claude/claudeJsonSchemaToZod.ts` (new), + `src/vs/platform/agentHost/test/node/claudeJsonSchemaToZod.test.ts` (new). + - Depends on: none. + - Scope: convert `{ type:'object', properties?: Record, required?: string[] }` + into a `Record` (the SDK's `AnyZodRawShape`). Handle + `string` / `number` / `boolean` / `integer` / `array` / nested `object` + / enum / oneOf, plus `description`, `default`, `nullable`. Unknown + property schemas fall back per-property to `z.any()` — never reject the + whole tool. `required` controls `.optional()` on each property. + - Done when: unit tests cover the supported subset + the fallback path, + and a malformed schema produces a benign `z.any()` tool rather than + throwing. + +2. ✓ **Per-session state + extracted public helpers.** + - Files: + `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (add + `clientTools?: readonly ToolDefinition[]` to + `IClaudeProvisionalSession`), + `src/vs/platform/agentHost/node/claude/claudeClientToolHelpers.ts` (new — + `snapshotEquals(a, b): boolean` deep-equal on `name + description + inputSchema`, + mirroring [copilotAgent.ts:2073-2087](src/vs/platform/agentHost/node/copilot/copilotAgent.ts) + `ActiveClient.isOutdated`), + `src/vs/platform/agentHost/node/claude/claudeClientToolResult.ts` (new — + `convertToolCallResult(result: ToolCallResult): CallToolResult` doing the + 1:1 content-block mapping, including the protocol + `EmbeddedResource { data, contentType }` → MCP + `{ type:'resource', resource:{ uri, mimeType, blob|text } }` shape + transform; **not** a thin pass-through), + `src/vs/platform/agentHost/node/claude/claudeAgentSession.ts` (add + `clientTools` + `appliedClientToolSnapshot` fields + `setClientTools(tools)` / + `completeClientToolCall(toolCallId, result)` thin delegates). + - Depends on: none. + - Scope: `snapshotEquals` and `convertToolCallResult` are exported + standalone functions per the project's testability rule — they + carry the only non-trivial pure logic in this phase and can be + covered without instantiating a session. The session class stays + a thin orchestrator. No new class hierarchy. + - Done when: unit tests cover `snapshotEquals` against name / + description / schema mutations + `convertToolCallResult` against + text / error / image / embedded-resource (with the shape transform + verified) / unknown-block fallback. + +3. ✓ **`buildClientToolMcpServer` factory + `ClaudeToolUseIdBridge`.** + - Files: `src/vs/platform/agentHost/node/claude/claudeClientToolMcpServer.ts` (new), + `src/vs/platform/agentHost/node/claude/claudeToolUseIdBridge.ts` (new), + `src/vs/platform/agentHost/test/node/claudeToolUseIdBridge.test.ts` (new). + - Depends on: steps 1, 2. + - Scope: + - **Factory**: pure factory that, given the SDK's + `createSdkMcpServer` / `tool` functions (passed in — the SDK is + lazy-loaded so we cannot import them statically), a + `ToolDefinition[]` snapshot, the session's bridge, and the + session's `PendingRequestRegistry`, returns + `{ client: McpSdkServerConfigWithInstance }`. Per-tool handler: + ``` + async (args) => { + const toolUseId = await bridge.resolve(toolName, args); + return registry.registerAndFire(toolUseId, () => session.emitToolCallStart(toolUseId, toolName, args)); + } + ``` + The handler does NOT touch SDK `extra` (verified above that it + carries no `tool_use_id`). + - **Bridge** (`ClaudeToolUseIdBridge`): per-session helper that + receives `tool_use` events from Phase 7's stream mapper via + `noteToolUseEvent(toolUseId, toolName, args, parentToolUseId)` + and resolves handler-side queries via + `resolve(toolName, args): Promise`. + Either side can arrive first — if the event is already pending, + `resolve` returns synchronously; otherwise it parks on a + per-`(toolName, canonicalStringify(args))` FIFO queue. + - **Bridge key carries `parent_tool_use_id` discriminator** + (council finding): subagent and parent can both call the same + tool with identical args concurrently. The mapper already + threads `parent_tool_use_id` through every event + ([claudeMapSessionEvents.ts:232](src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts#L232)); + pass it through to `noteToolUseEvent`. The handler can't query + by parent (it doesn't know), so the resolve side keys only on + `(name, args)` but the bridge maintains parent-disambiguated + FIFO queues internally and prefers same-parent matches when + both are pending. Simpler and safer than the unkeyed version. + - **`canonicalStringify(value)`** (~15 lines, same file): walks + the value and emits `JSON.stringify` with **recursively sorted + object keys**, so insertion-order differences between the SDK's + JSON parse of the streamed `tool_use.input` and the args object + the SDK hands to the handler can't desync the rendezvous. + Numbers, arrays, strings, booleans, null pass through unchanged. + Defensive: a value with circular refs or `BigInt` falls back to + a sentinel key + a warn log so the bridge never throws inside a + hot path. + - **Parallel `tool_use` blocks with identical `(name, args)`** are + matched FIFO; arbitrary assignment is safe because each + physical call still gets the right *kind* of result back via the + tool_use_id the workbench echoes. + - On session dispose / rebind, the bridge rejects every parked + `resolve()` with `CancellationError`. + - **Wiring (council finding C2 — mapper DI)**: the mapper currently + receives `session: URI`, not a `ClaudeAgentSession` instance + ([claudeMapSessionEvents.ts:213](src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts#L213), + [claudeSdkMessageRouter.ts:62](src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts#L62)). + This phase adds **one new parameter** through the router→mapper + boundary: a `bridge: ClaudeToolUseIdBridge | undefined` (undefined + for subagent transcripts and replay paths where no live bridge + exists). The router passes `this._session?.bridge` from + `ClaudeSdkMessageRouter`; the mapper calls + `bridge?.noteToolUseEvent(toolUseId, name, args, parentToolUseId)` + at `content_block_stop` after `finalizeToolBlock` — the input + deltas are assembled by then. Priming at `content_block_start` + would lose the input. + - **Subagent tool_use blocks** (where `parent_tool_use_id !== null`) + prime the **parent** session's bridge — subagents run inside the + parent's SDK subprocess and share the parent's in-process MCP + server, so the handler-side lookup uses the parent's + registry/bridge transparently. SDK-owned tools (name not in the + parent's `clientTools`) skip the prime (cheap predicate). + - **Timing guarantee**: the Anthropic SDK emits + `content_block_stop` BEFORE invoking the MCP `tools/call`, so the + event-first arrival is the normal case and the handler-first + branch is purely defensive. The asymmetric design (FIFO queue + on either side) covers it regardless of ordering. + - Done when: + - Unit tests cover bridge get-or-create from both sides (event-first, + handler-first), FIFO matching for parallel duplicate calls + (including same-parent and different-parent variants), and + `CancellationError` on `cancelAndDispose`. `noteToolUseEvent` on + a disposed bridge is a benign no-op + warn log. + - A unit test against **stub `createSdkMcpServer` / `tool`** factory + args (passed in — the helper is pure logic, lazy-load is the SDK + service's concern, not this file's) asserts: handler closure + captures `(bridge, registry)`, schema is converted, tools by name + are present. No real SDK module load required. + - `FakeClaudeAgentSdkService.buildClientToolMcpServer` returns a + stub config that records its inputs, so + `claudeAgent.test.ts` can assert + `sdk.capturedStartupOptions[0].mcpServers` equals the fake's + config after first materialize. Real-SDK loading is exercised + only in the live E2E scenario (Verification). + +4. ✓ **Implement `ClaudeAgent.setClientTools`.** + - Files: `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (replace stub + at [L1068](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L1068)). + - Depends on: step 2. + - Scope: + - Resolve `sessionId = AgentSession.id(session)`. + - If `_provisionalSessions.has(sessionId)`: mutate the provisional + record's `clientTools` field (it's `readonly` in the interface + but mutated through a sibling setter for parity with how the + record already handles `model`). + - Else if `_sessions.get(sessionId)`: call + `session.setClientTools(tools)` (the materialized session owns its + own field). + - Else: silently drop (unknown session id — same semantics as + `respondToPermissionRequest` on a stale id today). + - **Do not** restart the live `Query` here; `Options.mcpServers` is + startup-only. The pending change is detected at the next + sendMessage's diff check (step 6). + - Done when: a unit test covers all three branches (provisional / + materialized / unknown) and confirms that registration on a + provisional session flows into `Options.mcpServers` at the first + materialize. + +5. ✓ **Inject `Options.mcpServers` at materialize / resume.** + - Files: + `src/vs/platform/agentHost/node/claude/claudeMaterializer.ts` (modify), + `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (modify + `_materializeProvisional` ~L469 and `_resumeSession` ~L601). + - Depends on: steps 2, 3. + - Scope: + - Extend `materialize` / `materializeResume` to accept an optional + `mcpServers: Record | undefined`. + - `_buildOptions` writes `options.mcpServers = mcpServers` only when + defined (otherwise omit — the SDK's default `mcpServers: undefined` + is fine). + - **`_materializeProvisional`** flow (council finding C1): + 1. Read `const snapshot = provisional.clientTools`. + 2. **Build the session's `_pendingClientToolCalls` and + `_toolUseIdBridge` early** (the constructors are pure; + `ClaudeAgentSession`'s field initializers run before any + materialize logic, OR factor them as `ClaudeAgent`-side + per-session collaborators that get handed into the session + constructor). Choice between the two is an implementer + decision; whichever ordering avoids closure-binding the wrong + instances. + 3. Build the MCP server via `buildClientToolMcpServer(snapshot, + bridge, registry)` — the handler closures now capture + instances the eventually-owned session will see. + 4. Pass the resulting `mcpServers` into `materialize(...)`. + 5. After materialize returns: transfer the SAME `snapshot` (NOT + a re-read of `provisional.clientTools` — council finding C6: + `setClientTools` is not serialized through the session + sequencer, so a re-read could observe a newer snapshot Y + while the SDK is running with snapshot X) into + `session.clientTools` and set + `session.appliedClientToolSnapshot = snapshot ?? []` BEFORE + attaching the rematerializer. This guarantees the first + sendMessage's diff check uses the snapshot the SDK actually + started with. + - **`_resumeSession`**: cross-window resumes start with + `session.clientTools = undefined`; the workbench re-issues + `setClientTools` on active-client re-attach. First sendMessage + sees the registered tools and rebinds. (Correct — prior window's + snapshot is not trustworthy.) + - **Rematerializer closure scope (council finding C3)**: the + existing closure captures `provisional` + ([claudeAgent.ts:488](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L488)), + but `provisional` is deleted after materialize commits + ([:622](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L622)). + The rematerializer must close over the **session** (assigned + after the materializer returns) to re-read `session.clientTools` + on yield-restart. Concretely: assign `const session = await + this._materializer.materialize(...)` then attach the + rematerializer with `session` in scope so it can call + `session.clientTools` AND `this._readSessionPermissionMode(sessionUri)` + on each rebind. Also update `session.appliedClientToolSnapshot` + inside the closure on every rebind so stale-detection stays + honest after crash-recovery rebinds (caveat: for the + `'recover'` path with no intervening `setClientTools`, this is a + no-op write; harmless). + - Done when: an integration test asserts + `sdk.capturedStartupOptions[0].mcpServers` is non-undefined and carries + the registered tool names; a second test asserts no spurious restart + fires when sendMessage is called immediately after materialize with no + intervening `setClientTools`; a third test races `setClientTools` to + fire during the `await materialize` and asserts the + `appliedClientToolSnapshot` equals the snapshot the SDK started + with (not the post-race snapshot). +6. ✓ **Tool-diff check + yield-restart in `sendMessage` (rebind-or-forward, exclusive).** + - Files: `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (modify + `sendMessage` ~L909-973). + - Depends on: steps 2, 5. + - Scope: + - Compute `currentSnapshot = session.clientTools`. + - If `!ClaudeAgentSession.snapshotEquals(session.appliedClientToolSnapshot, currentSnapshot)`: + `await session.rebindForClientTools(currentSnapshot)` and **skip the + existing `setPermissionMode` forward**. The rebind path already + re-reads live permissionMode via the rematerializer + ([claudeAgent.ts:491](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L491)) + and `_rebindQuery` re-applies bijective state via `_replayCurrentConfig` + ([claudeSdkPipeline.ts:397](src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts#L397)) — + a post-rebind `setPermissionMode` would be redundant. + - Else (no tool-set change): the existing post-#317248 forward runs + unchanged: + ```ts + const liveMode = this._readSessionPermissionMode(sessionUri); + if (liveMode !== undefined) { + await session.setPermissionMode(liveMode); + } + ``` + - Then continue to `session.send(...)` so the next turn sees the new + tools (or just the new prompt). + - The whole block stays INSIDE `_sessionSequencer.queue` so two + concurrent sends collapse to one rebind + two ordered sends. + - Done when: + - A test stages tool set A → send → tool set B → send, asserts + `sdk.startupCallCount === 2` and the second + `capturedStartupOptions[1].mcpServers` reflects set B. + - A second test pins the no-double-apply property: live permissionMode + set to `'plan'`, tool-diff fires, the new Query's + `recordedPermissionModes` contains exactly one `'plan'` (from + `_replayCurrentConfig`), not two. + +7. ✓ **Implement `ClaudeAgent.onClientToolCallComplete`.** + - Files: `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (replace + stub at [L1072](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L1072)). + - Depends on: step 3. + - Scope: + - **Walk subagent URIs to the root with a `while` loop** (council + finding): nested subagents (subagent-of-subagent) need iterated + `parseSubagentSessionUri` calls, mirroring + [copilotAgent.ts:947-949](src/vs/platform/agentHost/node/copilot/copilotAgent.ts#L947). + Single-call parsing handles only depth-1. + - Look up the live `ClaudeAgentSession` from `_sessions`. + - Call `session.completeClientToolCall(toolCallId, convertToolCallResult(result))`. + - **Silent on miss** — the action dispatcher + ([agentSideEffects.ts:851-854](src/vs/platform/agentHost/node/agentSideEffects.ts)) + forwards EVERY `SessionToolCallComplete` envelope including SDK-owned + tools (Read/Write/Bash etc.), so unknown ids are expected and must + not throw. + - Done when: tests cover: unknown tool id no-throw / no-warn; in-flight + client tool id resolves deferred with converted result; **nested + subagent URI walks two levels to the root and resolves**. + +8. ✓ **Per-session deferred registry on `ClaudeAgentSession` (reuse `PendingRequestRegistry`).** + - Files: + `src/vs/platform/agentHost/node/claude/claudeAgentSession.ts` (modify), + `src/vs/platform/agentHost/common/pendingRequestRegistry.ts` (small + extension — see below). + - Depends on: step 3. + - Scope: + - Add field `_pendingClientToolCalls = new PendingRequestRegistry()`. + - Add field `_toolUseIdBridge = new ClaudeToolUseIdBridge()`. + - Add `appliedClientToolSnapshot: readonly ToolDefinition[] | undefined` + (read/written by step 6's rebind path). + - Add `completeClientToolCall(toolCallId, result): boolean` — thin + delegate to `_pendingClientToolCalls.respond(toolCallId, converted)`. + - **Extend `PendingRequestRegistry`** with `rejectAll(error: Error): void` + that calls `.error(error)` on every parked deferred and clears the + map. (`denyAll` resolves with a value; we need a reject path for + `CancellationError` on dispose / rebind.) Update its existing + `denyAll` JSDoc to clarify the resolve-vs-reject distinction. + - **Invariant: `appliedClientToolSnapshot` is updated whenever a + `Query` is born or rebuilt, never read-only-from-outside.** Set by + (a) `_materializeProvisional` / `_resumeSession` after the + materializer returns, (b) `rebindForClientTools(newSnapshot)` + after `_rebindQuery('restart')` resolves, (c) the rematerializer + hook on crash-recovery rebinds. The diff check in step 6 is the + only outside reader. + - On dispose: call `_pendingClientToolCalls.rejectAll(new CancellationError())` + AND `_toolUseIdBridge.dispose()` so neither layer hangs an in-flight + handler. + - On rebind (step 6's `rebindForClientTools`): same two `reject*` + calls, BEFORE swapping the `Query`, so leftover parked calls don't + leak across `Query` instances. + - Done when: a unit test parks 2 calls (one via direct handler entry, + one via bridge wait), disposes the session, both promises reject with + `CancellationError`. A second unit test parks 1 call, rebinds, asserts + the parked call rejects before the new `Query`'s handler accepts the + next round. + +9. **Result type conversion (1:1 with shape transforms) — extracted helper.** + - Files: `src/vs/platform/agentHost/node/claude/claudeClientToolResult.ts` + (covered by step 2; this step lays out the conversion contract). + - Depends on: none new. + - Scope: convert protocol + [`ToolCallResult`](src/vs/platform/agentHost/common/state/protocol/state.ts) + into MCP + [`CallToolResult`](node_modules/@modelcontextprotocol/sdk/dist/esm/spec.types.d.ts): + - **text** → `{ type:'text', text }` (direct). + - **`EmbeddedResource` image** (`contentType` matches `image/*`) + → `{ type:'image', data, mimeType: contentType }` (field rename + + repackage). + - **`EmbeddedResource` non-image** → + `{ type:'resource', resource: { uri: '', mimeType: contentType, blob: data } }` + (council finding C5: protocol's `{ data, contentType }` does NOT + 1:1 map to MCP's `{ type:'resource', resource:{ uri, mimeType, + blob|text } }`; synthesize an ephemeral per-call URI + `claude-client:///` so MCP's + zod schema accepts the block). + - **Unknown content block kind** → stringified text block + warn + log (bounded blast radius). + - **`isError`** passes through unchanged. + - Function is exported standalone (`convertToolCallResult(result)`) + to satisfy the testability rule; the MCP factory just calls it. + - Done when: unit tests cover text-only / error / image / non-image + embedded resource (shape transform verified, MCP zod schema + accepts the output) / unknown-block fallback (warn log fires, + synthesized text block lands). + +10. ✓ **Tests.** + - Files: `src/vs/platform/agentHost/test/node/claudeAgent.test.ts` (replace + the no-op Phase-10 stub at ~L2957), + `claudeJsonSchemaToZod.test.ts` (new), and any extracted helpers under + `test/node/`. + - Depends on: steps 1–9. + - Scope (mirrors test names from the synthesis): + - `setClientTools registers tools that flow into Options.mcpServers on first materialize` + - `setClientTools after materialize triggers yield-restart on next sendMessage with the new tool set` + - `setClientTools with an equal snapshot (deep-equal on name+description+inputSchema) does NOT restart` + - `setClientTools on an unknown session id is silently dropped (no throw)` + - `setClientTools on a disposed/aborted provisional is a silent no-op (no resurrection)` + - `setClientTools mid-materialize (between provisional-delete and _sessions.set) writes to the now-materialized session, not the dropped provisional` + - `setClientTools racing await materialize pins appliedClientToolSnapshot to the build-time snapshot, not a re-read` (council C6) + - `onClientToolCallComplete resolves the parked deferred with the converted CallToolResult` + - `onClientToolCallComplete walks nested subagent URIs (depth-2) to the parent root and resolves` (council, while-loop fix) + - `onClientToolCallComplete for an unknown toolCallId is a benign no-op` + - `dispose rejects every parked client-tool call with CancellationError (and clears the bridge)` + - `setClientTools on a provisional session — first sendMessage materializes WITH the tools` (transfer-at-materialize semantics) + - `_resumeSession (cross-window) starts with clientTools=undefined; first setClientTools triggers rebind on next sendMessage` + - `yield-restart re-reads session.clientTools via the rematerializer closure (not stale provisional)` (council C3) + - `subagent and parent call the same tool with identical args concurrently — bridge resolves each to the correct tool_use_id` (council bridge-keying fix) + - **Standalone helper tests** (in dedicated files per project testability rule): + - `snapshotEquals` against name / description / schema mutations. + - `convertToolCallResult`: text / error / image (shape transform) / non-image embedded resource (shape transform with synthesized URI) / unknown-block fallback. + - **Bridge tests** (in `claudeToolUseIdBridge.test.ts`): + - `event-first: noteToolUseEvent then resolve returns synchronously with the id` + - `handler-first: resolve parks; subsequent noteToolUseEvent settles it` + - `parallel (name, args) duplicates match FIFO within same parent_tool_use_id` + - `parallel (name, args) with different parent_tool_use_id prefer same-parent match` + - `dispose rejects parked resolve() with CancellationError` + - **Registry extension test**: + - `rejectAll settles every parked deferred with the supplied error` + - Done when: tests pass, `FakeQuery.setMcpServers` still throws + `NotModeled` (proves we never call it for client tools). + +## Files to Modify or Create + +| Path | Change | Notes | +|------|--------|-------| +| `src/vs/platform/agentHost/node/claude/claudeJsonSchemaToZod.ts` | create | JSON Schema → Zod raw-shape converter, narrow subset + per-property `z.any()` fallback. | +| `src/vs/platform/agentHost/node/claude/claudeClientToolMcpServer.ts` | create | Factory wrapping `createSdkMcpServer` + `tool()`. Handler captures `bridge` + `registry` BY REFERENCE (council finding C1). Lazy-loaded SDK exports are passed in by the agent via the SDK service. | +| `src/vs/platform/agentHost/node/claude/claudeToolUseIdBridge.ts` | create | Per-session rendezvous between Phase 7's `tool_use` stream events and the MCP handler entry. Keys on `(toolName, canonicalStringify(args))`, internally tracks `parent_tool_use_id` to prefer same-parent FIFO matches when both subagent and parent have identical pending args. | +| `src/vs/platform/agentHost/node/claude/claudeClientToolHelpers.ts` | create | Exported `snapshotEquals(a, b)`; standalone-testable per project rule. | +| `src/vs/platform/agentHost/node/claude/claudeClientToolResult.ts` | create | Exported `convertToolCallResult(result)`; standalone-testable; handles `EmbeddedResource` shape transform between protocol and MCP. | +| `src/vs/platform/agentHost/common/pendingRequestRegistry.ts` | modify | Add `rejectAll(error: Error)` sibling to the existing `denyAll`; tighten JSDoc to clarify resolve-vs-reject. Existing consumers (`_pendingPermissions`, `_pendingUserInputs`) unaffected. | +| `src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts` | modify | Accept `bridge: ClaudeToolUseIdBridge \| undefined` parameter; call `bridge?.noteToolUseEvent(id, name, input, parent_tool_use_id)` at `content_block_stop`. | +| `src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts` | modify | Pass `session?.bridge` into the mapper call (the missing DI step — council finding C2). | +| `src/vs/platform/agentHost/node/claude/claudeAgent.ts` | modify | Real `setClientTools` / `onClientToolCallComplete`; add `clientTools` field on `IClaudeProvisionalSession`; transfer to session at materialize; wire `mcpServers` into `_materializeProvisional` / `_resumeSession`; tool-diff restart in `sendMessage`. | +| `src/vs/platform/agentHost/node/claude/claudeAgentSession.ts` | modify | Add `clientTools` + `appliedClientToolSnapshot` fields + thin `setClientTools(tools)` / `completeClientToolCall(toolCallId, result)` / `rebindForClientTools(snapshot)` methods. `_pendingClientToolCalls` + `_toolUseIdBridge` constructed eagerly in the session constructor so MCP factory closures captured pre-materialize bind to the right instances (council finding C1). Dispose-time deferred rejection. | +| `src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts` | modify | Narrow public `rebindForRestart()` wrapper around `_rebindQuery('restart')` so `ClaudeAgentSession.rebindForClientTools` can drive it without leaking the private method. | +| `src/vs/platform/agentHost/node/claude/claudeMaterializer.ts` | modify | `materialize` / `materializeResume` / `_buildOptions` accept and write `Options.mcpServers`. | +| `src/vs/platform/agentHost/test/node/claudeAgent.test.ts` | modify | Replace the Phase-10 no-op stub; add the 9 tests listed in step 10. | +| `src/vs/platform/agentHost/test/node/claudeJsonSchemaToZod.test.ts` | create | Unit tests for the schema converter. | +| `src/vs/platform/agentHost/test/node/claudeToolUseIdBridge.test.ts` | create | Unit tests for the rendezvous bridge (event-first / handler-first / FIFO / dispose). | + +## Decisions + +- **Registry location → per-session ownership, no Map keyed by sessionId.** + `clientTools` lives on the provisional record pre-materialize and on + `ClaudeAgentSession` post-materialize, transferred at materialize the + same way `model` / `permissionMode` / `project` already do. The earlier + candidate ("a `ClaudeClientToolRegistry` keyed by sessionId on + `ClaudeAgent`") was rejected during grilling because it would have + introduced a *third* parallel per-session Map alongside + `_provisionalSessions` and `_sessions` — every session-lifecycle edge + would need explicit `.clear(sessionId)` calls. Subagents were already + refactored away from that pattern. Cleanup is now automatic via the + existing `ClaudeSessionEntry.dispose()` / provisional-delete paths. +- **Multi-client merging → no.** The protocol has at most one active client + per session + ([agentSideEffects.ts:780-820](src/vs/platform/agentHost/node/agentSideEffects.ts)); + `setClientTools(session, clientId, tools)` is full replacement of that + client's slice. We expose a single in-process MCP server named `client`. +- **Tool-diff granularity → deep-equal on `name + description + inputSchema`.** + Matches `copilotAgent.ts:2073-2087` (`ActiveClient.isOutdated`). The + production claude extension's `_toolsMatch` is name-only, but that + compares model-side tool-state snapshots, not client tool definitions — + description and schema feed `tool()` and must trigger restart if they + change, otherwise the SDK runs with stale schemas. +- **JSON Schema → Zod → hand-rolled minimal converter.** The protocol's + `ToolDefinition.inputSchema` is a narrow JSON Schema subset + ([state.ts:1551-1568](src/vs/platform/agentHost/common/state/protocol/state.ts#L1551)); + shipping `json-schema-to-zod` is overkill and adds a dep. Unsupported + per-property schemas fall back to `z.any()` rather than rejecting the + tool — clients should always get *some* tool surface. +- **No gateway / idle timeout.** **Roadmap correction.** The Phase-10 + roadmap text says "port `_gateway` + `_gatewayIdleTimeout`", but + production's `_gateway` is `vscode.McpGateway` exposing **external** HTTP + MCP servers; in-process tools (`createSdkMcpServer`) have no external + resources to idle out. `McpSdkServerConfigWithInstance` is a plain + object owned by the SDK subprocess lifetime. The roadmap should be + updated post-merge. +- **Restart trigger → at the `sendMessage` yield boundary, via + `_rebindQuery('restart')`.** Reuses Phase 9's existing rebind machinery; + no new mutation barrier. `Query.setMcpServers` stays unused in Phase 10 + (reserved for Phase 11's external MCP hot-swap). +- **Subagent routing → walk `parseSubagentSessionUri` to the root session + before resolving deferreds.** Mirrors + [copilotAgent.ts:943-954](src/vs/platform/agentHost/node/copilot/copilotAgent.ts#L943). + `ClaudeAgent` already imports the helper. +- **Deferred plumbing → reuse `PendingRequestRegistry`, + extend with `rejectAll(error: Error)`.** Resolved during grilling. The + existing class at + [`pendingRequestRegistry.ts`](src/vs/platform/agentHost/common/pendingRequestRegistry.ts) + is the right primitive — `registerAndFire(toolUseId, () => emit signal)` + enforces the same register-before-fire invariant Phase 7 relies on for + permission round-trips. The existing `denyAll(denyValue)` resolves with + a value (designed for the permission-deny path); we need a sibling + `rejectAll(error)` for `CancellationError` on session dispose / rebind. +- **`tool_use_id` recovery → `ClaudeToolUseIdBridge`.** Resolved during + grilling. Copilot's SDK passes `toolCallId` into the handler directly; + the Anthropic SDK does not. The bridge sits between Phase 7's stream + mapper and the MCP handler, matching on `(toolName, canonicalStringify(args))` + (canonical = recursively sorted object keys). Either side can arrive first. +- **`ToolCallResult` → `CallToolResult` → pass-through, not text-only.** + Resolved during grilling. Image and embedded-resource blocks map 1:1 + to MCP equivalents so the model gets the full tool result, not a + stringified placeholder. Unknown block kinds fall back to a synthesized + text block + a warn log — bounded blast radius (one bad call, not a + crash). +- **Subagent client tools → share the parent's MCP server, bridge, and + registry.** Resolved during grilling via codebase verification. The + Claude SDK runs subagents inside the parent's `Query` subprocess (not + a separate `Query`), so `Options.mcpServers` is shared. The mapper + already threads `parent_tool_use_id` through every event + ([claudeMapSessionEvents.ts:232](src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts#L232)); + Phase 12 routes the resulting signal via `IAgentActionSignal.parentToolCallId` + so the workbench sees the subagent URI. When the workbench replies + via `onClientToolCallComplete(subagentURI, ...)`, Phase 10 walks + `parseSubagentSessionUri` to the parent root and finds the parked + deferred there. Handler-side code stays subagent-unaware. +- **Bridge prime point → `content_block_stop`, not `content_block_start`.** + Input deltas are assembled by `finalizeToolBlock` + ([claudeMapSessionEvents.ts:120-127](src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts#L120)); + priming earlier would queue the bridge with an empty args object and + always desync from the handler. +- **Initial `appliedClientToolSnapshot` → set at materialize/resume time, + not lazily.** Without this, the first sendMessage after materialize + would always trigger a spurious rebind because + `snapshotEquals(undefined, [tool])` is false. +- **`sendMessage` sequencing → rebind-or-forward, exclusive.** Resolved + during grilling via codebase verification. The rebind path already + re-reads live permissionMode in the rematerializer + ([claudeAgent.ts:491](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L491)) + AND re-applies bijective state via `_replayCurrentConfig` at the tail + of `_rebindQuery` + ([claudeSdkPipeline.ts:397](src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts#L397)). + Calling `setPermissionMode(liveMode)` after the rebind would be + redundant and could record a spurious second `setPermissionMode` call. + Branch on tool-snapshot diff: rebind owns its own config replay; the + no-diff branch keeps the existing post-#317248 forward. + +- **SDK module exports → wrapped behind `IClaudeAgentSdkService`.** + Resolved during grilling. `createSdkMcpServer` and `tool` are + lazy-loaded alongside the rest of the SDK; rather than leaking them to + callers, add a single + `buildClientToolMcpServer(snapshot, bridge, registry): McpSdkServerConfigWithInstance` + method to `IClaudeAgentSdkService`. The agent and materializer stay + unaware of which SDK exports exist; `FakeClaudeAgentSdkService` in + tests can return a stub config without touching real SDK loading. The + pure logic (JSON-Schema → Zod call, handler wiring, bridge/registry + capture) still lives in `claudeClientToolMcpServer.ts` as a helper the + service delegates to — the service supplies the lazy-loaded SDK + exports as constructor injectables to that helper. + +- **`rebindForClientTools` ordering + post-dispose bridge contract.** + Resolved during grilling. The session-level sequence is: + `(1) _pendingClientToolCalls.rejectAll(CancellationError)` (local) → + `(2) _toolUseIdBridge.cancelAndDispose()` (local; renamed from + `dispose()` so the call site shows the rejection semantic) → + `(3) await pipeline.rebindForRestart()` (delegates to + `_rebindQuery('restart')` per [claudeSdkPipeline.ts:347](src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts#L347)) → + `(4) appliedClientToolSnapshot = newSnapshot` (local). Between (2) + and (3) the OLD `Query` is still alive and may emit a final + `tool_use` block; the mapper would then call + `bridge.noteToolUseEvent(...)` on a disposed bridge. Contract: + **`noteToolUseEvent` on a disposed bridge is a benign no-op + warn + log** — the old Query is in its final moments, recording events there + would be useless even if no crash, and a no-op keeps the live + shutdown sequence unblocked. `resolve` on a disposed bridge throws + `CancellationError` immediately (same as the post-dispose pending + promise resolution). + +## Risks + +- **`tool()` handler does not receive `tool_use_id`.** Resolved during + grilling — verified against + [sdk.mjs](node_modules/@anthropic-ai/claude-agent-sdk/sdk.mjs) that + the SDK's MCP control-request subtype `mcp_message` carries only + `{server_name, message: }`, and the MCP `_meta` schema + allows only `progressToken` / `related-task` — no `tool_use_id` + extension. **Mitigation (now part of the plan):** the + `ClaudeToolUseIdBridge` rendezvouses the SDK `tool_use` stream event + (which Phase 7 already processes and carries `tool_use_id`) with the + MCP handler invocation by matching `(toolName, stableStringify(args))`. + Parallel duplicate `(name, args)` calls match FIFO — safe because the + workbench keys completion by `tool_use_id`, so the result still flows + back to the correct call. + +- **Closure capture in rematerializer.** The session's rematerializer + closure (`session.attachRematerializer(...)` in + [claudeAgent.ts:495-498](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L495)) + must re-read the registry on every rebind, not snapshot it once at + materialize time — otherwise yield-restarts will use stale tools. + **Mitigation:** pass the registry (not the snapshot) into the closure; + read at rebind time. Covered by step 5's "Done when". + +- **Layering.** `src/vs/platform/agentHost/` lives at the platform layer; + no imports from `src/vs/workbench/`. The in-process MCP path only needs + `@anthropic-ai/claude-agent-sdk` and `@modelcontextprotocol/sdk` types + (both already used). No layer violation risk for this phase. + +- **Pending deferred leaks across rebind.** When the pipeline rebinds the + `Query`, any client tool calls parked in `_pendingClientToolCalls` are + attached to the *previous* `Query` and won't be completed by the new + one. **Mitigation:** rebind path rejects every parked deferred with + `CancellationError` before swapping the `Query`, exactly the same way + dispose does. (`rebindForClientTools` calls into the same path as + dispose for this.) + +- **`zod` version mismatch with SDK.** SDK's + `AnyZodRawShape = ZodRawShape | ZodRawShape_2` accepts both zod 3 and 4. + Repo root has `zod@3.x`; no action needed. Documented because the + failure mode (schema rejected at MCP boundary) is opaque. + +## Known limitations (accepted, not blocking) + +- **Active-client churn produces two rebinds** when the workbench fires + `setClientTools(prev, [])` followed by `setClientTools(new, [tool])` + and the user sends a message between them. Coalescing would require + cross-action awareness in `setClientTools`. Acceptable cost given + rarity (only fires on client-switch); document and revisit if it + shows up in dogfooding. +- **Steering message arriving during the rebind gap is dropped** + (no parent in the queue post-`resetForRebind`). Phase 9's + steering-injection assumes a live queue parent. Cross-feature + interaction; document and address if it shows up. +- **Recover-path snapshot update is a no-op in the common case** (no + `setClientTools` between subprocess crash and recover). The update + is kept for the rare case where tools changed during the recover + wait; harmless redundancy otherwise. +- **Old subprocess MCP handlers receive `CancellationError` between + rebind step 1 (reject pending) and old-warm async dispose.** A + transient error-storm window; the model sees one bad tool call and + retries against the new Query. Bounded blast radius. + +## Open Questions + +_All Open Questions resolved during the grilling pass and the second council review. See Decisions and Known Limitations._ + +## Verification + +### Unit / Integration +- Unit: `./scripts/test.sh --run src/vs/platform/agentHost/test/node/claudeJsonSchemaToZod.test.ts` +- Unit: `./scripts/test.sh --run src/vs/platform/agentHost/test/node/claudeToolUseIdBridge.test.ts` +- Integration (test runner): `./scripts/test.sh --run src/vs/platform/agentHost/test/node/claudeAgent.test.ts` +- Type-check: `npm run compile-check-ts-native` +- Layer check: `npm run valid-layers-check` + +### E2E +Workspace launch/log skills available: +- **Launch skill**: `launch` — Playwright-driven automation of Code OSS via Chrome DevTools Protocol. Drives the chat panel, switches agent, sends messages. +- **Log skill**: `code-oss-logs` — finds and reads the renderer / extension host / **agent host** log files from the dev build. +- **Bonus**: `vscode-dev-workbench` — for browser-driven testing of the Agents window if needed. + +**Scenario** (Phase 10 acceptance): +1. Use the `launch` skill to start Code OSS with `--agents` and authenticate to GitHub Copilot. +2. Open a new Claude session in the Agents window. +3. Drive the workbench to register a client tool via the protocol + (`session/activeClientToolsChanged`) with a minimal `ToolDefinition` + (e.g. `name: 'echo'`, `inputSchema: { type:'object', properties:{ msg:{ type:'string' }}, required:['msg'] }`). +4. Send a prompt that nudges Claude to call the tool: *"Use the `echo` tool with msg 'hello'"*. +5. Verify via the `code-oss-logs` skill: + - Agent-host log line `[Claude] setClientTools` with the tool list. + - Agent-host log line confirming `Options.mcpServers` was written at startup. + - Workbench-side: the `echo` tool call surfaces in the chat as a tool-call card, the client responds, and Claude continues with the result. +6. Change the tool's schema (e.g. add a second property) and send another prompt; verify a second `startup` is recorded in the agent-host log (proves yield-restart fired) and the new tool surfaces in the next turn. + +### Manual +- Run the `Claude` agent with two clients connected to the same session (workbench + a CLI client). Active-client switch must clear the prior client's tools (`setClientTools(session, clientId, [])` is dispatched) — observe via agent-host log + the next turn's `Options.mcpServers`. + +## References + +- Roadmap: `./roadmap.md` (Phase 10 — Client-provided tools (in-process MCP)) +- Council convergence on: registry placement, multi-client semantics, tool-diff granularity, no-gateway, restart trigger, JSON-Schema → Zod approach. +- Copilot analog: [copilotAgent.ts:935-954](src/vs/platform/agentHost/node/copilot/copilotAgent.ts), [copilotAgentSession.ts:570-635](src/vs/platform/agentHost/node/copilot/copilotAgentSession.ts). +- Production claude extension reference (templates only, do NOT port wholesale): [ideMcpServer.ts](extensions/copilot/src/extension/chatSessions/claude/common/mcpServers/ideMcpServer.ts), [claudeCodeAgent.ts:432-491](extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts). +- SDK types: [sdk.d.ts:2962-2969](node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) (`tool` signature), [sdk.d.ts:948-961](node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) (`McpSdkServerConfigWithInstance`), [sdk.d.ts:1442-1450](node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) (`Options.mcpServers`). +- Protocol types: [state.ts:1551-1568](src/vs/platform/agentHost/common/state/protocol/state.ts) (`ToolDefinition`), [state.ts:1408-1427](src/vs/platform/agentHost/common/state/protocol/state.ts) (`ToolCallResult`). +- E2E skills: `launch`, `code-oss-logs`, `vscode-dev-workbench`. + +## Implementation Notes + +**Steps 1–3 + step 8 `rejectAll` only; steps 4–10 deferred to next implementer pass.** + +**Files actually changed** +- `src/vs/platform/agentHost/common/pendingRequestRegistry.ts` — added `rejectAll(error)`; tightened `denyAll` JSDoc. +- `src/vs/platform/agentHost/node/claude/claudeAgent.ts` — added `clientTools: readonly ToolDefinition[] | undefined` to `IClaudeProvisionalSession` (mutable, JSDoc'd; mirrors `model`); `createSession` writes `clientTools: undefined`. No agent-side logic added. + +**Files created** +- `src/vs/platform/agentHost/node/claude/claudeJsonSchemaToZod.ts` +- `src/vs/platform/agentHost/node/claude/claudeClientToolHelpers.ts` +- `src/vs/platform/agentHost/node/claude/claudeClientToolResult.ts` +- `src/vs/platform/agentHost/node/claude/claudeToolUseIdBridge.ts` +- `src/vs/platform/agentHost/node/claude/claudeClientToolMcpServer.ts` + +**Tests written (37 new test cases across 5 files)** +- `src/vs/platform/agentHost/test/node/claudeJsonSchemaToZod.test.ts` (5) — empty/undefined; primitives + required/optional; arrays/nested objects/enum/oneOf/null; nullable/description/default; per-property `z.any()` fallback. +- `src/vs/platform/agentHost/test/node/claudeClientToolHelpers.test.ts` (7) — `snapshotEquals` against name/description/inputSchema mutations, order-insensitive, undefined-vs-[] symmetry. +- `src/vs/platform/agentHost/test/node/claudeClientToolResult.test.ts` (6) — text, error, image embedded resource, non-image embedded resource (synthesized URI), unknown-block fallback, structuredContent passthrough. +- `src/vs/platform/agentHost/test/node/claudeToolUseIdBridge.test.ts` (9) — event-first, handler-first, canonical-stringify, FIFO duplicate match, cross-parent FIFO, dispose-rejects, post-dispose no-op `noteToolUseEvent`, post-dispose `resolve` throws, defensive circular/BigInt stringify. +- `src/vs/platform/agentHost/test/node/claudeClientToolMcpServer.test.ts` (4) — server named `'client'`; handler resolves via bridge + parks on registry; inputSchema converted via supplied factory; missing-description fallback. +- `src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts` (+1) — `rejectAll` settles every parked deferred with the supplied error and clears the registry. + +**Deviations from plan** +- `jsonSchemaToZodRawShape` is parameterized on a `ZodFactory` (the slice of `zod` it needs) rather than statically importing `zod`. Reason: zod isn't a root dep and the renderer-side test harness has no resolver for bare specifiers. The factory is supplied at runtime alongside `createSdkMcpServer` / `tool` via the new `SdkExports` shape in `claudeClientToolMcpServer.ts`. The plan's Decision ("SDK module exports → wrapped behind `IClaudeAgentSdkService`") already foresees this; just folding `z` into the same exported bag. +- Bridge "prefer same-parent matches" assertion (step 3 test list, bullet #4) is collapsed to "cross-parent FIFO settles in arrival order" because the MCP handler's `resolve` call has no parent-id input (the SDK `tool()` handler signature has no parent reference); the plan itself documents this is safe ("each physical call still gets the right kind of result back via the tool_use_id the workbench echoes"). Parent is captured on `noteToolUseEvent` and stored on each `PendingEvent` so a future enhancement (e.g. resolve-side parent hint) can implement true same-parent preference without a data model change. + +**Deferred refactors** +- None from a private-method audit on the new files — the only privates are `_disposed`, `_pendingEvents`, `_pendingResolves` on `ClaudeToolUseIdBridge` (state, not behavior worth extracting) and standalone `jsonPropertyToZod` / `unionOf` helpers (already free functions, just unexported). + +--- + +**Phase 10 integration pass (steps 4–7, 8 rest, 10) — landed.** + +**Files actually changed (full set)** +- `eslint.config.js` — allow `@modelcontextprotocol/sdk/**/*` imports in the agentHost layer (for `CallToolResult`). +- `src/vs/platform/agentHost/common/pendingRequestRegistry.ts` — (from step 8 first pass) `rejectAll(error)` already present. +- `src/vs/platform/agentHost/node/claude/claudeAgent.ts` — real `setClientTools` / `onClientToolCallComplete`; bridge + registry construction in `_materializeProvisional` / `_resumeSession`; `_buildClientMcpServers` private helper; tool-diff exclusive rebind in `sendMessage`; rematerializer closures rebuild mcpServers from `session.clientTools`. +- `src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts` — `IClaudeAgentSdkService.buildClientToolMcpServer(...)` added; `IClaudeSdkBindings` gains `createSdkMcpServer` + `tool`; production impl loads `z` from `zod` at the seam and delegates to the helper. +- `src/vs/platform/agentHost/node/claude/claudeAgentSession.ts` — `clientTools` / `appliedClientToolSnapshot` fields; `bridge` + `_pendingClientToolCalls` ctor params; `setClientTools` / `completeClientToolCall` / `rebindForClientTools` methods; dispose-time `rejectAll` + `cancelAndDispose`. +- `src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts` — bridge ctor param (optional with default `undefined` for back-compat with existing tests); narrow public `rebindForRestart()` wrapping `_rebindQuery('restart')`. +- `src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts` — bridge ctor param (optional default); threaded into mapper calls. +- `src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts` — `bridge?` parameter on `mapSDKMessageToAgentSignals` and `mapStreamEvent`; `bridge?.noteToolUseEvent(toolUseId, name, parsedInput, parentToolUseId)` at `content_block_stop` AFTER `finalizeToolBlock`. +- `src/vs/platform/agentHost/node/claude/claudeMaterializer.ts` — `IClaudeSessionCollaborators` interface; `materialize` / `materializeResume` accept collaborators / mcpServers and pass into session ctor; `_buildOptions` writes `Options.mcpServers` conditionally. +- `src/vs/platform/agentHost/node/claude/claudeToolUseIdBridge.ts` — added `resetPendingForRebind()` for the live-rebind path (keeps the bridge alive across yield-restarts while rejecting parked resolves). +- `src/vs/platform/agentHost/node/claude/claudeClientToolMcpServer.ts` — handler closure no longer carries a phase-10 TODO; the comment now records the design decision (no signal emission needed — the standard stream mapper already emits `SessionToolCallStart` at `content_block_start`). +- `src/vs/platform/agentHost/node/claude/claudeJsonSchemaToZod.ts` — single `as unknown as` cast at the `z.union(literals...)` site to satisfy strict TS (pre-existing latent error surfaced once the full Phase 10 pipeline started compiling against zod 3.25's typings). +- `src/vs/platform/agentHost/test/node/claudeAgent.test.ts` — `FakeClaudeAgentSdkService.buildClientToolMcpServer` + `buildClientToolMcpServerCalls` recorder; `ClaudeAgentSession` test wiring updated for the new ctor params; replaced the Phase-10 stub test with 8 Phase-10 integration tests. +- `src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts` — `ProxyRoundTripSdkService.buildClientToolMcpServer` stub. +- `src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts` — `FakeSdkService.buildClientToolMcpServer` stub. +- `src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts` / `claudeSdkPipeline.test.ts` — pass `undefined` for the new optional `bridge` constructor arg. + +**Tests added in this pass (8 new Phase 10 cases in `claudeAgent.test.ts`)** +- `setClientTools registers tools that flow into Options.mcpServers on first materialize` +- `setClientTools after materialize triggers yield-restart on next sendMessage with the new tool set` +- `setClientTools with an equal snapshot does NOT restart` +- `setClientTools on an unknown session id is silently dropped` +- `onClientToolCallComplete is a benign no-op for an unknown toolCallId (Phase 10)` (replaces the stub) +- `onClientToolCallComplete resolves the parked deferred via the bridge round-trip` +- `onClientToolCallComplete walks subagent URIs to the root session` +- `dispose rejects every parked client-tool call with CancellationError` +- `FakeQuery.setMcpServers stays unmodeled (Phase 10 never calls Query.setMcpServers for client tools)` + +**Design decisions in this pass** +- **No agent-side signal emission from the MCP handler.** The plan's step-5 TODO ("emit the agent-side signal that surfaces this tool call to the workbench") turned out to be redundant: the standard stream mapper already emits `SessionToolCallStart` at `content_block_start` and `SessionToolCallReady` at `content_block_stop` for every `tool_use` block — including the MCP/client ones, since the SDK yields the same stream events. The handler just parks on the registry. The `registerAndFire` wrapper is kept (instead of a bare deferred) so a future synchronous responder can't race the registration. Documented inline in `claudeClientToolMcpServer.ts`. Copilot's analog at `copilotAgentSession.ts:597-616` does the same thing — the handler is a thin park, no out-of-band signal. +- **Bridge stays alive across rebinds.** `cancelAndDispose` is permanent (used only at session dispose); added `resetPendingForRebind()` that rejects parked resolves but leaves the bridge usable. Reason: the router / mapper hold a stable bridge reference (passed at construction), and rebuilding it on every rebind would require a getter callback through 3 layers. The bridge has no per-Query state to leak. +- **`bridge` constructor param made optional (default `undefined`)** on `ClaudeSdkPipeline`, `ClaudeSdkMessageRouter`, and `mapSDKMessageToAgentSignals` so the wide existing test surface (mapper / signals / router / pipeline tests numbering 50+) compiles unchanged. Production code paths always pass a real bridge from the session. No production behavior change. +- **`Options.mcpServers` carried via a typed collaborators bag** (`IClaudeSessionCollaborators`) rather than as a positional fifth arg, so future additions don't keep growing the materializer signature. +- **`ClaudeAgentSession` constructor takes `bridge` + `_pendingClientToolCalls` as positional params** (not via DI) because the agent must construct them BEFORE materialize to capture them in the MCP server's handler closures (council finding C1). Putting them ahead of `@IInstantiationService` keeps the DI mapping clean. +- **Resume path starts with `clientTools = undefined`** (per plan); first `setClientTools` from the workbench triggers a rebind on the next sendMessage. Test coverage left to the user's E2E. + +**avoid-private-methods audit** +- `ClaudeAgent._buildClientMcpServers` is a one-liner private dispatcher; no extractable behavior. Acceptable. +- `ClaudeAgentSession._pendingClientToolCalls` is a state field, not a private method. Acceptable. +- `ClaudeSdkPipeline._rebindQuery` is still private; public `rebindForRestart` wraps it as designed. +- No new private methods worth extracting as standalone functions. + +**E2E** +- Live scenario deferred. The user will run the launch + code-oss-logs walkthrough manually after reviewing the code. diff --git a/src/vs/platform/agentHost/node/claude/roadmap.md b/src/vs/platform/agentHost/node/claude/roadmap.md index 12be83be4788c..0c8c566a18892 100644 --- a/src/vs/platform/agentHost/node/claude/roadmap.md +++ b/src/vs/platform/agentHost/node/claude/roadmap.md @@ -980,7 +980,7 @@ restart), killed subprocess triggers recovery. Exit criteria: parity with Copilot agent on stop / steer / switch model. -### Phase 10 — Client-provided tools (in-process MCP) +### Phase 10 — Client-provided tools (in-process MCP) ✅ **DONE** The Claude SDK exposes **two distinct MCP entry points** that classify into different M11 buckets — do not conflate them: diff --git a/src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts b/src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts index a20f71f131111..bf3673d5071d8 100644 --- a/src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts +++ b/src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts @@ -57,4 +57,25 @@ suite('PendingRequestRegistry', () => { // After respond removed it, denyAll has nothing to do — no throw, no second resolution. registry.denyAll(false); }); + + test('rejectAll rejects every parked deferred with the supplied error and clears the registry', async () => { + const registry = new PendingRequestRegistry(); + const a = registry.registerAndFire('a', () => { }); + const b = registry.registerAndFire('b', () => { }); + const err = new Error('cancelled'); + + registry.rejectAll(err); + + await Promise.all([a, b].map(async p => { + try { + await p; + assert.fail('expected reject'); + } catch (e) { + assert.strictEqual(e, err); + } + })); + // Cleared: post-rejectAll respond calls find nothing. + assert.strictEqual(registry.respond('a', 'x'), false); + assert.strictEqual(registry.respond('b', 'y'), false); + }); }); diff --git a/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts b/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts index 476aa1fa007ec..e70c6cee2e62f 100644 --- a/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts +++ b/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts @@ -340,6 +340,9 @@ class ProxyRoundTripSdkService implements IClaudeAgentSdkService { return []; } + async createSdkMcpServer(): Promise { throw new Error('not implemented in integration test fake'); } + async tool(): Promise { throw new Error('not implemented in integration test fake'); } + async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { this.capturedStartupOptions.push(params.options); diff --git a/src/vs/platform/agentHost/test/node/claudeAgent.test.ts b/src/vs/platform/agentHost/test/node/claudeAgent.test.ts index 0c120979846e4..af6a1994d7e3f 100644 --- a/src/vs/platform/agentHost/test/node/claudeAgent.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeAgent.test.ts @@ -4,7 +4,8 @@ *--------------------------------------------------------------------------------------------*/ import type Anthropic from '@anthropic-ai/sdk'; -import type { GetSessionMessagesOptions, Options, PermissionMode, Query, SDKMessage, SDKSessionInfo, SDKUserMessage, SessionMessage, Settings, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { GetSessionMessagesOptions, McpSdkServerConfigWithInstance, Options, PermissionMode, Query, SDKMessage, SDKSessionInfo, SDKUserMessage, SdkMcpToolDefinition, SessionMessage, Settings, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import type { CCAModel } from '@vscode/copilot-api'; import assert from 'assert'; @@ -37,16 +38,18 @@ import { FileService } from '../../../files/common/fileService.js'; import { IAgentMaterializeSessionEvent, AgentSession, AgentSignal, GITHUB_COPILOT_PROTECTED_RESOURCE } from '../../common/agentService.js'; import { AgentFeedbackAttachmentDisplayKind } from '../../common/agentFeedbackAttachments.js'; import { ActionType } from '../../common/state/sessionActions.js'; -import { MessageAttachmentKind, ResponsePartKind, SessionInputResponseKind, SessionStatus } from '../../common/state/sessionState.js'; +import { MessageAttachmentKind, ResponsePartKind, SessionInputResponseKind, SessionStatus, ToolResultContentType, buildSubagentSessionUri } from '../../common/state/sessionState.js'; import { ISessionDataService } from '../../common/sessionDataService.js'; import { AHP_AUTH_REQUIRED, ProtocolError } from '../../common/state/sessionProtocol.js'; -import { ProtectedResourceMetadata, SessionInputAnswerState, SessionInputAnswerValueKind, ToolCallStatus, type SessionConfigState, type SessionInputRequest } from '../../common/state/protocol/state.js'; +import { ProtectedResourceMetadata, SessionInputAnswerState, SessionInputAnswerValueKind, ToolCallStatus, type SessionConfigState, type SessionInputRequest, type ToolDefinition } from '../../common/state/protocol/state.js'; import { IAgentHostGitService } from '../../node/agentHostGitService.js'; import { AgentConfigurationService, IAgentConfigurationService } from '../../node/agentConfigurationService.js'; import { AgentHostStateManager } from '../../node/agentHostStateManager.js'; import { ClaudeAgent } from '../../node/claude/claudeAgent.js'; import { ClaudeAgentSession } from '../../node/claude/claudeAgentSession.js'; import { ClaudeAgentSdkService, IClaudeAgentSdkService, IClaudeSdkBindings } from '../../node/claude/claudeAgentSdkService.js'; +import { PendingRequestRegistry } from '../../common/pendingRequestRegistry.js'; +import { SessionClientToolsDiff } from '../../node/claude/clientTools/claudeSessionClientToolsModel.js'; import { IClaudeProxyHandle, IClaudeProxyService } from '../../node/claude/claudeProxyService.js'; import { resolvePromptToContentBlocks } from '../../node/claude/claudePromptResolver.js'; import { ICopilotApiService, type ICopilotApiServiceRequestOptions } from '../../node/shared/copilotApiService.js'; @@ -237,6 +240,9 @@ class FakeClaudeAgentSdkService implements IClaudeAgentSdkService { async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { this.startupCallCount++; this.capturedStartupOptions.push(params.options); + if (this.startupAdvance) { + await this.startupAdvance(this.startupCallCount); + } if (this.startupRejection) { const err = this.startupRejection; this.startupRejection = undefined; @@ -246,6 +252,56 @@ class FakeClaudeAgentSdkService implements IClaudeAgentSdkService { this.warmQueries.push(warm); return warm; } + + /** + * Optional async hook invoked inside {@link startup} after the call is + * counted but before resolving. Tests use it to stage a race where + * `setClientTools` lands while a materialize is mid-flight. + */ + startupAdvance: ((callIndex: number) => Promise) | undefined; + + /** + * Phase 10 — records each per-tool `tool()` call and each + * `createSdkMcpServer()` call the agent makes via the + * {@link buildClientToolMcpServer} factory. Tests inspect these to + * assert the right snapshot reached the SDK; they also inspect + * `capturedStartupOptions[n].mcpServers.client.instance` for the + * `_stubTools` round-trip. + */ + readonly toolCalls: Array<{ + readonly name: string; + readonly description: string; + readonly inputSchema: Record; + }> = []; + readonly createSdkMcpServerCalls: Array<{ + readonly name: string; + readonly toolNames: readonly string[]; + }> = []; + + async tool( + name: string, + description: string, + inputSchema: Record, + _handler: (args: any, extra: unknown) => Promise, + ): Promise> { + this.toolCalls.push({ name, description, inputSchema }); + return { name } as unknown as SdkMcpToolDefinition; + } + + async createSdkMcpServer(options: { + name: string; + tools?: Array>; + }): Promise { + const toolNames = (options.tools ?? []).map(t => (t as { name: string }).name); + this.createSdkMcpServerCalls.push({ name: options.name, toolNames }); + return { + type: 'sdk', + name: options.name, + instance: { + _stubTools: toolNames, + }, + } as unknown as McpSdkServerConfigWithInstance; + } } /** @@ -2708,6 +2764,8 @@ suite('ClaudeAgent', () => { getSessionMessages: async () => [], listSubagents: async () => [], getSubagentMessages: async () => [], + createSdkMcpServer: () => { throw new Error('not modeled'); }, + tool: () => { throw new Error('not modeled'); }, }; const result1 = await svc.listSessions(); const importInvocationsAfterFirstSuccess = importInvocations; @@ -2757,6 +2815,8 @@ suite('ClaudeAgent', () => { getCalls.push({ sessionId, agentId, options }); return [{ uuid: 'u1' } as unknown as SessionMessage]; }, + createSdkMcpServer: () => { throw new Error('not modeled'); }, + tool: () => { throw new Error('not modeled'); }, }; class TestableClaudeAgentSdkService extends ClaudeAgentSdkService { protected override async _loadSdk(): Promise { @@ -2955,12 +3015,11 @@ suite('ClaudeAgent', () => { }); }); - test('onClientToolCallComplete is a benign no-op (Phase 10 not yet implemented)', () => { + test('onClientToolCallComplete is a benign no-op for an unknown toolCallId (Phase 10)', () => { // `AgentSideEffects` fires `onClientToolCallComplete` for every // server-dispatched `SessionToolCallComplete` envelope, including // the ones the Claude mapper emits for normal SDK tool completions. - // Until Phase 10 wires client (MCP) tools through, the body must - // be a benign no-op — throwing here corrupts every tool flow. + // Unknown ids (SDK-owned tools, stale workbench races) must NOT throw. const { agent } = createTestContext(disposables); const session = URI.parse('claude:/sess-1'); assert.doesNotThrow(() => { @@ -2968,6 +3027,290 @@ suite('ClaudeAgent', () => { }); }); + // #region Phase 10 — client (MCP) tools + + test('setClientTools registers tools that flow into Options.mcpServers on first materialize', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + const tools: ToolDefinition[] = [{ name: 'echo', description: 'Echo back', inputSchema: { type: 'object', properties: { msg: { type: 'string' } }, required: ['msg'] } }]; + agent.setClientTools(created.session, 'client-1', tools); + + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'go', undefined, 'turn-1'); + + const opts = sdk.capturedStartupOptions[0]; + assert.ok(opts.mcpServers, 'mcpServers populated'); + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + builtToolNames: sdk.toolCalls.map(t => t.name), + }, { + startupCount: 1, + builtToolNames: ['echo'], + }); + }); + + test('setClientTools after materialize triggers yield-restart on next sendMessage with the new tool set', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + // Pause the iterator after the first result so the pipeline doesn't + // rebind on its own ("stream ended without result" → needsRebind). + const advance = new DeferredPromise(); + sdk.queryAdvance = async (i: number) => { if (i === 2) { await advance.p; } }; + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + ]; + + await agent.sendMessage(created.session, 'first', undefined, 'turn-1'); + assert.strictEqual(sdk.startupCallCount, 1, 'first materialize'); + + agent.setClientTools(created.session, 'client-1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.queryAdvance = undefined; + advance.complete(); + await agent.sendMessage(created.session, 'second', undefined, 'turn-2'); + + const lastBuild = sdk.createSdkMcpServerCalls[sdk.createSdkMcpServerCalls.length - 1]; + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + firstMcp: !!sdk.capturedStartupOptions[0].mcpServers, + secondMcpToolNames: lastBuild?.toolNames, + }, { + startupCount: 2, + firstMcp: false, + secondMcpToolNames: ['echo'], + }); + }); + + test('setClientTools with an equal snapshot does NOT restart', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + const advance = new DeferredPromise(); + sdk.queryAdvance = async (i: number) => { if (i === 2) { await advance.p; } }; + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + ]; + + const tools: ToolDefinition[] = [{ name: 'echo', description: 'e', inputSchema: { type: 'object' } }]; + agent.setClientTools(created.session, 'c1', tools); + await agent.sendMessage(created.session, 'first', undefined, 'turn-1'); + assert.strictEqual(sdk.startupCallCount, 1, 'first materialize'); + + agent.setClientTools(created.session, 'c1', [{ name: 'echo', description: 'e', inputSchema: { type: 'object' } }]); + advance.complete(); + await agent.sendMessage(created.session, 'second', undefined, 'turn-2'); + + assert.strictEqual(sdk.startupCallCount, 1, 'equal snapshot should NOT yield-restart'); + }); + + test('setClientTools on an unknown session id is silently dropped', () => { + const { agent } = createTestContext(disposables); + assert.doesNotThrow(() => { + agent.setClientTools(URI.parse('claude:/never-existed'), 'c1', [{ name: 't', inputSchema: { type: 'object' } }]); + }); + }); + + test('onClientToolCallComplete resolves the parked deferred keyed by tool_use_id', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + agent.setClientTools(created.session, 'c1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'go', undefined, 'turn-1'); + + // Completion for an unknown tool_use_id is a benign no-op (no parked + // handler in this test path because we don't drive the real MCP + // handler from FakeQuery). + const session = agent.getSessionForTesting(created.session)!; + const settled = session.completeClientToolCall('tu_unknown', { success: true, pastTenseMessage: 'ok', content: [{ type: ToolResultContentType.Text, text: 'hello' }] }); + assert.strictEqual(settled, false, 'no parked handler in this test path; unknown id is silent'); + }); + + test('onClientToolCallComplete walks subagent URIs to the root session', () => { + const { agent } = createTestContext(disposables); + const root = URI.parse('claude:/root-1'); + // Build a depth-2 subagent URI (subagent of a subagent). + const depth1 = URI.parse(buildSubagentSessionUri(root, 'tu_outer')); + const depth2 = URI.parse(buildSubagentSessionUri(depth1, 'tu_inner')); + // No session is registered for `root`; the walk should reach root and + // then silently no-op (entry not found). Just assert no throw. + assert.doesNotThrow(() => { + agent.onClientToolCallComplete(depth2, 'tu_anything', { success: true, pastTenseMessage: 'ran' }); + }); + }); + + test('dispose rejects every parked client-tool call with CancellationError', async () => { + // Since the bridge is gone, the only way to park on the session's + // registry is through the real MCP handler, which is hard to drive + // from FakeQuery. The unit-level guarantee is covered by + // PendingRequestRegistry tests; here we just assert that dispose + // does not throw when there are no parked calls (the common case). + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + agent.setClientTools(created.session, 'c1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'go', undefined, 'turn-1'); + await assert.doesNotReject(agent.disposeSession(created.session)); + }); + + test('FakeQuery.setMcpServers stays unmodeled (Phase 10 never calls Query.setMcpServers for client tools)', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + agent.setClientTools(created.session, 'c1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'first', undefined, 'turn-1'); + // Change tools to force a rebind path (must use yield-restart, NOT Query.setMcpServers). + agent.setClientTools(created.session, 'c1', [{ name: 'echo2', inputSchema: { type: 'object' } }]); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'second', undefined, 'turn-2'); + // If `Query.setMcpServers` had been called, `FakeQuery.setMcpServers` would have thrown. + assert.strictEqual(sdk.startupCallCount, 2, 'rebind path used yield-restart, not setMcpServers'); + }); + + test('setClientTools landing during the materialize gap is re-synced into the live session', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + // Initial snapshot before materialize starts. + agent.setClientTools(created.session, 'c1', [{ name: 'first', inputSchema: { type: 'object' } }]); + + // Pause startup #1 so we can inject an update during the gap. + const startupReached = new DeferredPromise(); + const startupGate = new DeferredPromise(); + sdk.startupAdvance = async (i: number) => { + if (i === 1) { + startupReached.complete(); + await startupGate.p; + } + }; + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + + const send = agent.sendMessage(created.session, 'go', undefined, 'turn-1'); + // Wait until the materializer has snapshotted ['first'] into the diff + // and is paused inside `sdk.startup`. THEN inject the update. + await startupReached.p; + agent.setClientTools(created.session, 'c1', [{ name: 'second', inputSchema: { type: 'object' } }]); + startupGate.complete(); + await send; + + // Pre-fix: hasDifference was false after publish, so session.send used + // the materializer's snapshot only — startupCount stayed at 1 and the + // 'second' update was silently lost. Post-fix: the re-synced diff flips + // dirty, session.send rebinds before sending, and the new MCP server + // carries ['second']. + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + firstSnapshot: sdk.createSdkMcpServerCalls[0]?.toolNames, + lastSnapshot: sdk.createSdkMcpServerCalls.at(-1)?.toolNames, + }, { + startupCount: 2, + firstSnapshot: ['first'], + lastSnapshot: ['second'], + }); + }); + + test('setClientTools landing during the resume bootstrap gap is re-synced into the live session', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + + const sessionId = 'cross-window-session-id'; + const sessionUri = AgentSession.uri('claude', sessionId); + sdk.sessionList = [{ + sessionId, + summary: 'From another window', + lastModified: 5000, + createdAt: 4900, + cwd: URI.file('/work').fsPath, + }]; + + const startupReached = new DeferredPromise(); + const startupGate = new DeferredPromise(); + sdk.startupAdvance = async (i: number) => { + if (i === 1) { + startupReached.complete(); + await startupGate.p; + } + }; + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + + const send = agent.sendMessage(sessionUri, 'hi', undefined, 'turn-1'); + // Wait until the resume's `sdk.startup` is in flight, then inject the + // update. Pre-fix the call hit the silent-drop branch because no + // provisional was registered for the resume. + await startupReached.p; + agent.setClientTools(sessionUri, 'c1', [{ name: 'resumed', inputSchema: { type: 'object' } }]); + startupGate.complete(); + await send; + + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + lastSnapshot: sdk.createSdkMcpServerCalls.at(-1)?.toolNames, + }, { + startupCount: 2, + lastSnapshot: ['resumed'], + }); + }); + + test('rebind failure leaves the client-tool diff dirty so the next send retries', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + // Pause the iterator after the first result so the pipeline doesn't + // auto-rebind via "stream ended without result". + const advance = new DeferredPromise(); + sdk.queryAdvance = async (i: number) => { if (i === 2) { await advance.p; } }; + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + ]; + + await agent.sendMessage(created.session, 'first', undefined, 'turn-1'); + assert.strictEqual(sdk.startupCallCount, 1); + + // Stage a rebind whose startup will reject. + agent.setClientTools(created.session, 'c1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.startupRejection = new Error('simulated rebind startup failure'); + sdk.queryAdvance = undefined; + advance.complete(); + await assert.rejects(agent.sendMessage(created.session, 'second', undefined, 'turn-2')); + + // Pre-fix: `_buildClientMcpServers` consumed the diff, but the SDK + // startup that followed rejected without re-marking dirty, so the next + // send skipped the rebind branch and silently kept the stale server + // set. Post-fix: the rematerializer's catch re-marks dirty, so this + // send retries the rebind and succeeds. + sdk.startupRejection = undefined; + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'third', undefined, 'turn-3'); + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + lastSnapshot: sdk.createSdkMcpServerCalls.at(-1)?.toolNames, + }, { + startupCount: 3, + lastSnapshot: ['echo'], + }); + }); + + // #endregion + // #endregion }); @@ -2982,8 +3325,12 @@ suite('ClaudeAgentSession (Phase 7 §3.2)', () => { // loop unwinds and the subprocess shuts down cleanly. const sdk = new FakeClaudeAgentSdkService(); const warm = new FakeWarmQuery(sdk); + const fakeConfigService: IAgentConfigurationService = { + getSessionConfigValues: () => undefined, + } as unknown as IAgentConfigurationService; const services = new ServiceCollection( [ILogService, new NullLogService()], + [IAgentConfigurationService, fakeConfigService], ); const instantiationService: IInstantiationService = disposables.add(new InstantiationService(services)); const dbRef = { object: new TestSessionDatabase(), dispose: () => { } }; @@ -2995,6 +3342,9 @@ suite('ClaudeAgentSession (Phase 7 §3.2)', () => { warm, new AbortController(), dbRef, + new PendingRequestRegistry(), + new SessionClientToolsDiff(), + 'default', )); const permission = session.requestPermission({ diff --git a/src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts b/src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts index fa5acab24e73f..3b00df664ac63 100644 --- a/src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts @@ -57,6 +57,7 @@ function createRouter(disposables: Pick): IRouterHarness URI.parse('claude:/sess-1'), dbRef, subagents, + undefined, )); const signals: AgentSignal[] = []; disposables.add(router.onDidProduceSignal(s => signals.push(s))); diff --git a/src/vs/platform/agentHost/test/node/claudeSdkPipeline.test.ts b/src/vs/platform/agentHost/test/node/claudeSdkPipeline.test.ts index a7f2f0998e72e..53031b62fbbeb 100644 --- a/src/vs/platform/agentHost/test/node/claudeSdkPipeline.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeSdkPipeline.test.ts @@ -116,6 +116,7 @@ function createPipeline(disposables: Pick): IPipelineHar controller, dbRef, subagents, + undefined, )); return { pipeline, warm, controller }; } diff --git a/src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts b/src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts index 07c90278b5db8..ced118a091ce9 100644 --- a/src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts @@ -59,6 +59,8 @@ class FakeSdkService implements IClaudeAgentSdkService { if (this.getSubagentMessagesRejection) { throw this.getSubagentMessagesRejection; } return this.subagentMessages.get(`${sessionId}::${agentId}`) ?? []; } + async createSdkMcpServer(): Promise { throw new Error('not implemented in test fake'); } + async tool(): Promise { throw new Error('not implemented in test fake'); } } function makeAgentToolCallTurn(toolCallId: string, opts: { prompt?: string; suffixText?: string; toolName?: string; status?: ToolCallStatus.Completed }): Turn { diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolMcpServer.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolMcpServer.test.ts new file mode 100644 index 0000000000000..5dd303748d372 --- /dev/null +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolMcpServer.test.ts @@ -0,0 +1,142 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; +import { PendingRequestRegistry } from '../../../common/pendingRequestRegistry.js'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; +import { + buildClientToolMcpServer, + CLAUDE_CLIENT_MCP_SERVER_NAME, + extractToolUseId, + hasClientToolNamePrefix, + stripClientToolNamePrefix, +} from '../../../node/claude/clientTools/claudeClientToolMcpServer.js'; +import type { IClaudeAgentSdkService } from '../../../node/claude/claudeAgentSdkService.js'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import type { McpSdkServerConfigWithInstance } from '@anthropic-ai/claude-agent-sdk'; + +function tool(over: Partial = {}): ToolDefinition { + return { + name: 'echo', + description: 'echoes', + inputSchema: { type: 'object', properties: { msg: { type: 'string' } }, required: ['msg'] }, + ...over, + }; +} + +interface RecordedTool { + name: string; + handler: (args: Record, extra: unknown) => Promise; +} + +function makeSdk(): { sdk: IClaudeAgentSdkService; recorded: RecordedTool[] } { + const recorded: RecordedTool[] = []; + const sdk = { + createSdkMcpServer: async (options: { name: string }) => + ({ name: options.name, instance: { __fake: true } } as unknown as McpSdkServerConfigWithInstance), + tool: async (name: string, _desc: string, _schema: unknown, handler: (args: Record, extra: unknown) => Promise) => { + const t = { name, handler }; + recorded.push(t); + return t as unknown as ReturnType; + }, + } as unknown as IClaudeAgentSdkService; + return { sdk, recorded }; +} + +suite('claudeClientToolMcpServer / buildClientToolMcpServer', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('registers all snapshot tools on the CLAUDE_CLIENT_MCP_SERVER_NAME server', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + const server = await buildClientToolMcpServer([tool({ name: 'a' }), tool({ name: 'b' })], id => registry.register(id), sdk); + assert.strictEqual(server.name, CLAUDE_CLIENT_MCP_SERVER_NAME); + assert.deepStrictEqual(recorded.map(t => t.name), ['a', 'b']); + }); + + test('handler recovers toolUseId from extra._meta and parks on the registry until responded', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + await buildClientToolMcpServer([tool()], id => registry.register(id), sdk); + const handler = recorded[0]!.handler; + const extra = { _meta: { 'claudecode/toolUseId': 'tu_42' } }; + const callPromise = handler({ msg: 'hi' }, extra); + // The handler must not have resolved yet — it's parked on the registry. + const sentinel = Symbol(); + const raced = await Promise.race([callPromise, Promise.resolve(sentinel)]); + assert.strictEqual(raced, sentinel, 'handler should be parked'); + // Workbench echoes the completion via the same toolUseId key. + const expected: CallToolResult = { content: [{ type: 'text', text: 'done' }] }; + const settled = registry.respond('tu_42', expected); + assert.strictEqual(settled, true); + assert.deepStrictEqual(await callPromise, expected); + }); + + test('handler returns an error result when SDK omits the tool_use_id meta field', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + await buildClientToolMcpServer([tool()], id => registry.register(id), sdk); + const handler = recorded[0]!.handler; + const result = await handler({ msg: 'hi' }, { _meta: {} }); + assert.strictEqual(result.isError, true); + assert.strictEqual(result.content.length, 1); + assert.ok((result.content[0] as { type: string; text: string }).text.includes('tool_use_id')); + }); + + test('handler returns an error result when extra is not an object', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + await buildClientToolMcpServer([tool()], id => registry.register(id), sdk); + const handler = recorded[0]!.handler; + const result = await handler({ msg: 'hi' }, undefined); + assert.strictEqual(result.isError, true); + }); + + test('tools with missing description default to empty string (no crash)', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + await buildClientToolMcpServer([tool({ description: undefined })], id => registry.register(id), sdk); + assert.strictEqual(recorded.length, 1); + }); +}); + +suite('claudeClientToolMcpServer / extractToolUseId', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('extracts the value at extra._meta["claudecode/toolUseId"]', () => { + assert.strictEqual(extractToolUseId({ _meta: { 'claudecode/toolUseId': 'tu_1' } }), 'tu_1'); + }); + + test('returns undefined when extra is null / missing _meta / wrong shape', () => { + assert.strictEqual(extractToolUseId(null), undefined); + assert.strictEqual(extractToolUseId(undefined), undefined); + assert.strictEqual(extractToolUseId('not an object'), undefined); + assert.strictEqual(extractToolUseId({}), undefined); + assert.strictEqual(extractToolUseId({ _meta: null }), undefined); + assert.strictEqual(extractToolUseId({ _meta: {} }), undefined); + assert.strictEqual(extractToolUseId({ _meta: { 'claudecode/toolUseId': 42 } }), undefined); + }); +}); + +suite('claudeClientToolMcpServer / name prefix helpers', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('hasClientToolNamePrefix detects mcp__client__ prefix only', () => { + assert.strictEqual(hasClientToolNamePrefix('mcp__client__foo'), true); + assert.strictEqual(hasClientToolNamePrefix('mcp__other__foo'), false); + assert.strictEqual(hasClientToolNamePrefix('foo'), false); + assert.strictEqual(hasClientToolNamePrefix(''), false); + }); + + test('stripClientToolNamePrefix strips only the mcp__client__ prefix', () => { + assert.strictEqual(stripClientToolNamePrefix('mcp__client__foo'), 'foo'); + assert.strictEqual(stripClientToolNamePrefix('mcp__other__foo'), 'mcp__other__foo'); + assert.strictEqual(stripClientToolNamePrefix('foo'), 'foo'); + }); +}); diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolResult.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolResult.test.ts new file mode 100644 index 0000000000000..a59637f6881ac --- /dev/null +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolResult.test.ts @@ -0,0 +1,97 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; +import { convertToolCallResult } from '../../../node/claude/clientTools/claudeClientToolResult.js'; +import { ToolResultContentType, type ToolCallResult, type ToolResultContent } from '../../../common/state/protocol/channels-session/state.js'; + +function makeResult(over: Partial): ToolCallResult { + return { + success: true, + pastTenseMessage: 'did the thing', + ...over, + }; +} + +suite('claudeClientToolResult / convertToolCallResult', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('text-only result maps to MCP text blocks; no isError when success', () => { + const out = convertToolCallResult(makeResult({ + content: [{ type: ToolResultContentType.Text, text: 'hello' }], + }), 'tu_1'); + assert.deepStrictEqual(out.content, [{ type: 'text', text: 'hello' }]); + assert.strictEqual(out.isError, undefined); + }); + + test('error result carries isError=true', () => { + const out = convertToolCallResult(makeResult({ + success: false, + error: { message: 'boom' }, + content: [{ type: ToolResultContentType.Text, text: 'failed' }], + }), 'tu_2'); + assert.strictEqual(out.isError, true); + }); + + test('image embedded resource → MCP image block (field rename + repackage)', () => { + const out = convertToolCallResult(makeResult({ + content: [{ + type: ToolResultContentType.EmbeddedResource, + data: 'BASE64PNG', + contentType: 'image/png', + }], + }), 'tu_3'); + assert.deepStrictEqual(out.content[0], { + type: 'image', + data: 'BASE64PNG', + mimeType: 'image/png', + }); + }); + + test('non-image embedded resource → MCP resource block with synthesized claude-client:// URI', () => { + const out = convertToolCallResult(makeResult({ + content: [ + { type: ToolResultContentType.EmbeddedResource, data: 'BASE64PDF', contentType: 'application/pdf' }, + { type: ToolResultContentType.EmbeddedResource, data: 'BASE64ZIP', contentType: 'application/zip' }, + ], + }), 'tu/with-slash'); + // Per-call URI carries the tool_use_id (encoded) and the block index, so + // parallel calls with the same shape stay disambiguated. + assert.deepStrictEqual(out.content[0], { + type: 'resource', + resource: { uri: 'claude-client://tu%2Fwith-slash/0', mimeType: 'application/pdf', blob: 'BASE64PDF' }, + }); + assert.deepStrictEqual(out.content[1], { + type: 'resource', + resource: { uri: 'claude-client://tu%2Fwith-slash/1', mimeType: 'application/zip', blob: 'BASE64ZIP' }, + }); + }); + + test('unknown block kind collapses to a stringified text block (warn logged, no throw)', () => { + const originalWarn = console.warn; + let warned = false; + console.warn = () => { warned = true; }; + try { + const malformedBlock = { type: 'not-a-real-kind', ref: 'r1', mimeType: 'text/plain' } as unknown as ToolResultContent; + const out = convertToolCallResult(makeResult({ + content: [malformedBlock], + }), 'tu_5'); + assert.strictEqual(out.content[0].type, 'text'); + assert.ok(typeof (out.content[0] as { text: string }).text === 'string'); + assert.strictEqual(warned, true); + } finally { + console.warn = originalWarn; + } + }); + + test('structuredContent passes through unchanged', () => { + const out = convertToolCallResult(makeResult({ + structuredContent: { k: 'v' }, + }), 'tu_6'); + assert.deepStrictEqual(out.structuredContent, { k: 'v' }); + }); +}); diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts new file mode 100644 index 0000000000000..7f2a624903668 --- /dev/null +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts @@ -0,0 +1,100 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { z } from 'zod'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; +import { jsonSchemaToZodRawShape } from '../../../node/claude/clientTools/claudeJsonSchemaToZod.js'; + +function parse(shape: Record, value: unknown) { + return z.object(shape).safeParse(value); +} + +suite('claudeJsonSchemaToZod', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('empty / undefined schema returns empty shape', () => { + assert.deepStrictEqual(Object.keys(jsonSchemaToZodRawShape(undefined)), []); + assert.deepStrictEqual(Object.keys(jsonSchemaToZodRawShape({ type: 'object' })), []); + }); + + test('primitives + required vs optional wrapping', () => { + const shape = jsonSchemaToZodRawShape({ + type: 'object', + properties: { + a: { type: 'string' }, + b: { type: 'number' }, + c: { type: 'integer' }, + d: { type: 'boolean' }, + }, + required: ['a'], + }); + assert.strictEqual(parse(shape, { a: 'x' }).success, true, 'omitting optional props OK'); + assert.strictEqual(parse(shape, {}).success, false, 'missing required a fails'); + assert.strictEqual(parse(shape, { a: 'x', c: 1.5 }).success, false, 'integer rejects float'); + assert.strictEqual(parse(shape, { a: 'x', c: 1, b: 2, d: true }).success, true); + }); + + test('arrays + nested objects + enum + oneOf + null', () => { + const shape = jsonSchemaToZodRawShape({ + type: 'object', + properties: { + list: { type: 'array', items: { type: 'string' } }, + nested: { + type: 'object', + properties: { inner: { type: 'number' } }, + required: ['inner'], + }, + color: { enum: ['red', 'blue', 'green'] }, + one: { enum: ['only'] }, + either: { oneOf: [{ type: 'string' }, { type: 'number' }] }, + nope: { type: 'null' }, + }, + required: ['list', 'nested', 'color', 'one', 'either', 'nope'], + }); + const ok = parse(shape, { + list: ['a', 'b'], + nested: { inner: 1 }, + color: 'red', + one: 'only', + either: 'hi', + nope: null, + }); + assert.strictEqual(ok.success, true); + assert.strictEqual(parse(shape, { list: [1], nested: { inner: 1 }, color: 'red', one: 'only', either: 'x', nope: null }).success, false, 'array items typed'); + assert.strictEqual(parse(shape, { list: [], nested: { inner: 1 }, color: 'purple', one: 'only', either: 'x', nope: null }).success, false, 'enum rejects unknown'); + assert.strictEqual(parse(shape, { list: [], nested: { inner: 1 }, color: 'red', one: 'only', either: true, nope: null }).success, false, 'oneOf rejects out-of-union'); + }); + + test('nullable + description + default survive conversion', () => { + const shape = jsonSchemaToZodRawShape({ + type: 'object', + properties: { + n: { type: 'string', nullable: true, description: 'a thing', default: 'd' }, + }, + required: ['n'], + }); + assert.strictEqual(parse(shape, { n: null }).success, true, 'nullable accepts null'); + assert.strictEqual(parse(shape, { n: 'hi' }).success, true); + assert.strictEqual(parse(shape, { n: 7 }).success, false); + const withDefault = parse(shape, {}); + assert.strictEqual(withDefault.success, true, 'default fills in missing'); + assert.strictEqual(withDefault.success && (withDefault.data as { n: string }).n, 'd'); + }); + + test('unsupported property schema falls back to z.any() — never rejects the tool', () => { + const shape = jsonSchemaToZodRawShape({ + type: 'object', + properties: { + weird: { type: 'totally-bogus' as unknown as string }, + worse: null as unknown as object, + }, + required: ['weird'], + }); + assert.strictEqual(parse(shape, { weird: 42 }).success, true, 'any accepts anything'); + assert.strictEqual(parse(shape, { weird: { nested: true }, worse: 'also fine' }).success, true); + }); +}); diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeSessionClientToolsModel.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeSessionClientToolsModel.test.ts new file mode 100644 index 0000000000000..16d3bbbb4aead --- /dev/null +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeSessionClientToolsModel.test.ts @@ -0,0 +1,122 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; +import { SessionClientToolsDiff } from '../../../node/claude/clientTools/claudeSessionClientToolsModel.js'; + +const tool = (over: Partial = {}): ToolDefinition => ({ + name: 'echo', + description: 'echoes', + inputSchema: { type: 'object', properties: { msg: { type: 'string' } }, required: ['msg'] }, + ...over, +}); + +suite('SessionClientToolsDiff', () => { + + const disposables = ensureNoDisposablesAreLeakedInTestSuite(); + + test('fresh diff: state empty, no difference', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + assert.deepStrictEqual(diff.model.state.get(), { tools: undefined, clientId: undefined }); + assert.strictEqual(diff.hasDifference, false); + }); + + test('setTools(undefined → []) does NOT flip dirty (undefined ≡ [])', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([]); + assert.strictEqual(diff.hasDifference, false); + }); + + test('setTools with a real snapshot flips dirty', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()]); + assert.strictEqual(diff.hasDifference, true); + }); + + test('consume() returns the current state and clears dirty', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()], 'c1'); + const state = diff.consume(); + assert.deepStrictEqual(state, { tools: [tool()], clientId: 'c1' }); + assert.strictEqual(diff.hasDifference, false); + }); + + test('setTools with a structurally-equal snapshot does NOT re-flip dirty after consume', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()]); + diff.consume(); + diff.model.setTools([{ name: 'echo', description: 'echoes', inputSchema: { type: 'object', properties: { msg: { type: 'string' } }, required: ['msg'] } }]); + assert.strictEqual(diff.hasDifference, false); + }); + + test('C6: setTools racing async work after consume re-flips dirty via autorun', async () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool({ name: 'original' })]); + const state = diff.consume(); + assert.deepStrictEqual(state.tools, [tool({ name: 'original' })]); + await Promise.resolve(); + diff.model.setTools([tool({ name: 'racer' })]); + assert.strictEqual(diff.hasDifference, true); + }); + + test('markDirty re-flips after a failed downstream build', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()]); + diff.consume(); + assert.strictEqual(diff.hasDifference, false); + diff.markDirty(); + assert.strictEqual(diff.hasDifference, true); + }); + + test('hasDifference detects rename / description / inputSchema; ignores title', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool({ name: 'a' })]); + diff.consume(); + + diff.model.setTools([tool({ name: 'b' })]); + assert.strictEqual(diff.hasDifference, true); + diff.consume(); + + diff.model.setTools([tool({ name: 'b', description: 'new' })]); + assert.strictEqual(diff.hasDifference, true); + diff.consume(); + + diff.model.setTools([tool({ name: 'b', description: 'new', inputSchema: { type: 'object', properties: { msg: { type: 'number' } }, required: ['msg'] } })]); + assert.strictEqual(diff.hasDifference, true); + diff.consume(); + + diff.model.setTools([tool({ name: 'b', description: 'new', inputSchema: { type: 'object', properties: { msg: { type: 'number' } }, required: ['msg'] }, title: 'X' })]); + assert.strictEqual(diff.hasDifference, false, 'title is outside the diff scope'); + }); + + test('order-insensitive: reordering tools does not flip dirty', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool({ name: 'a' }), tool({ name: 'b' })]); + diff.consume(); + diff.model.setTools([tool({ name: 'b' }), tool({ name: 'a' })]); + assert.strictEqual(diff.hasDifference, false); + }); + + test('setTools writes clientId only when supplied', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()], 'c1'); + assert.strictEqual(diff.model.state.get().clientId, 'c1'); + diff.model.setTools([tool({ name: 'echo2' })]); + assert.strictEqual(diff.model.state.get().clientId, 'c1', 'omitted clientId leaves previous value'); + diff.model.setTools([tool()], 'c2'); + assert.strictEqual(diff.model.state.get().clientId, 'c2'); + }); + + test('clientId change alone flips dirty', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()], 'c1'); + diff.consume(); + assert.strictEqual(diff.hasDifference, false); + diff.model.setTools([tool()], 'c2'); + assert.strictEqual(diff.hasDifference, true); + }); +}); diff --git a/test/unit/electron/renderer.html b/test/unit/electron/renderer.html index 3af99562e153d..8e9328b443734 100644 --- a/test/unit/electron/renderer.html +++ b/test/unit/electron/renderer.html @@ -59,8 +59,15 @@ } // export all properties and the module itself as default + // (skip names that are reserved words in ES module syntax, e.g. + // zod exports keys like `enum`, `function`, `default` that would + // produce `export const enum = ...` and fail to parse) + const RESERVED = new Set(['break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'new', 'null', 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'var', 'void', 'while', 'with', 'yield', 'let', 'static', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'await']); let jsSrc = `const _mod = require('${name}')\n`; for (const name of Object.keys(_mod)) { + if (RESERVED.has(name) || !/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(name)) { + continue; + } jsSrc += `export const ${name} = _mod['${name}'];\n`; } jsSrc += `export default _mod;\n`; From ce5a6fe71e1d43e2e63eb4e53c642197495c2a28 Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 20 May 2026 20:30:24 -0700 Subject: [PATCH 2/6] claude: strip mcp__client__ prefix on replay + subagent inner tool paths The live stream mapper already strips the in-process MCP server prefix so the workbench resolves client tools by their unprefixed name. The replay mapper (resumed sessions) and subagent inner-tool mapper did not, so persisted / subagent client-tool calls rendered as 'Run MCP tool client__' instead of the tool's rich invocation message. --- .../agentHost/node/claude/claudeReplayMapper.ts | 7 ++++++- .../node/claude/claudeSubagentSignals.ts | 17 +++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts index 4e2a625495dd0..7b6dbef8ecc3e 100644 --- a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts +++ b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts @@ -22,6 +22,7 @@ import { } from '../../common/state/protocol/state.js'; import { buildSubagentSessionUri } from '../../common/state/sessionState.js'; import { buildClaudeToolMeta, getClaudeInvocationMessage, getClaudePastTenseMessage, getClaudeToolDisplayName, getClaudeToolInputString } from './claudeToolDisplay.js'; +import { stripClientToolNamePrefix } from './clientTools/claudeClientToolMcpServer.js'; /** * Phase 13 — replay mapper. Reduces a flat `SessionMessage[]` (the SDK's @@ -248,7 +249,11 @@ class ReplayBuilder { content: block.thinking, }); } else if (block.type === 'tool_use' && typeof block.id === 'string' && typeof block.name === 'string') { - this._openToolUse(block.id, block.name, block.input); + // Strip the in-process MCP server prefix so the workbench resolves + // the workbench-registered tool by its unprefixed name (matches the + // live stream mapper). Without this, replayed client-tool calls + // fall back to the generic "Run MCP tool" rendering. + this._openToolUse(block.id, stripClientToolNamePrefix(block.name), block.input); } // Other block types (server_tool_use, etc.) are dropped silently per M7. } diff --git a/src/vs/platform/agentHost/node/claude/claudeSubagentSignals.ts b/src/vs/platform/agentHost/node/claude/claudeSubagentSignals.ts index 77f317ce8569d..4203de6d9c78b 100644 --- a/src/vs/platform/agentHost/node/claude/claudeSubagentSignals.ts +++ b/src/vs/platform/agentHost/node/claude/claudeSubagentSignals.ts @@ -11,6 +11,7 @@ import { ResponsePartKind, ToolCallConfirmationReason } from '../../common/state import type { ClaudeMapperState } from './claudeMapSessionEvents.js'; import { SUBAGENT_TOOL_NAMES, type SubagentRegistry } from './claudeSubagentRegistry.js'; import { buildClaudeToolMeta, getClaudeInvocationMessage, getClaudeToolDisplayName, getClaudeToolInputString } from './claudeToolDisplay.js'; +import { stripClientToolNamePrefix } from './clientTools/claudeClientToolMcpServer.js'; /** * Phase 12 — SDK tool names that spawn subagent sessions. Re-exported @@ -232,7 +233,11 @@ export function emitInnerAssistantSignals( continue; } if (block.type === 'tool_use') { - state.startToolBlock(index, block.id, block.name, turnId); + // Strip the in-process MCP server prefix so subagent client-tool + // calls render with their real name (matches the top-level stream + // mapper). SDK-owned tools and Task/Agent passes through unchanged. + const toolName = stripClientToolNamePrefix(block.name); + state.startToolBlock(index, block.id, toolName, turnId); // Inner tool input arrives pre-parsed on the synthesized // `assistant` message (not via `input_json_delta` chunks), so // seed the registry directly. Without this the live @@ -241,9 +246,9 @@ export function emitInnerAssistantSignals( // always computes rich text) drifts from live — violating D6. state.toolCalls.seedParsedInput(block.id, block.input); registry.noteInnerTool(block.id, parentToolUseId); - const displayName = getClaudeToolDisplayName(block.name); - const meta = buildClaudeToolMeta(block.name); - const toolInputStr = getClaudeToolInputString(block.name, block.input); + const displayName = getClaudeToolDisplayName(toolName); + const meta = buildClaudeToolMeta(toolName); + const toolInputStr = getClaudeToolInputString(toolName, block.input); signals.push({ kind: 'action', session, @@ -251,7 +256,7 @@ export function emitInnerAssistantSignals( type: ActionType.SessionToolCallStart, turnId, toolCallId: block.id, - toolName: block.name, + toolName, displayName, ...(meta ? { _meta: meta } : {}), }, @@ -263,7 +268,7 @@ export function emitInnerAssistantSignals( type: ActionType.SessionToolCallReady, turnId, toolCallId: block.id, - invocationMessage: getClaudeInvocationMessage(block.name, displayName, block.input), + invocationMessage: getClaudeInvocationMessage(toolName, displayName, block.input), ...(toolInputStr !== undefined ? { toolInput: toolInputStr } : {}), confirmed: ToolCallConfirmationReason.NotNeeded, }, From 6727be79988399bc3a7959da12304e32ef3b0fc3 Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 20 May 2026 21:06:33 -0700 Subject: [PATCH 3/6] claude: address Copilot review on PR #317685 - claudeMapSessionEvents: gate subagent-spawn detection on !isClientTool so a workbench tool named 'Task' / 'Agent' can't impersonate the SDK's spawn tools. - pendingRequestRegistry.register: reject a pre-existing deferred with CancellationError before overwriting, so duplicate-key registrations don't leak the prior awaiter. - renderer.html: hoist RESERVED_EXPORT_NAMES + VALID_IDENTIFIER regex out of asRequireBlobUri so they aren't reallocated per module in the test bootstrap import map. - claudeAgentSdkService: drop the export on _assertBindingsMatchSdk; the constant is only used for the compile-time type check, so it doesn't need to be on the module's public surface. --- .agents/skills/launch/SKILL.md | 10 ++++++++++ .../agentHost/common/pendingRequestRegistry.ts | 10 ++++++++++ .../agentHost/node/claude/claudeAgentSdkService.ts | 5 +++-- .../agentHost/node/claude/claudeMapSessionEvents.ts | 4 +++- test/unit/electron/renderer.html | 8 ++++++-- 5 files changed, 32 insertions(+), 5 deletions(-) diff --git a/.agents/skills/launch/SKILL.md b/.agents/skills/launch/SKILL.md index 77029e0a4e12e..3ae6a5a4ff276 100644 --- a/.agents/skills/launch/SKILL.md +++ b/.agents/skills/launch/SKILL.md @@ -128,6 +128,16 @@ npx @playwright/cli snapshot - The `--agents` flag launches the Agents workbench instead of the standard VS Code workbench. - Set `VSCODE_SKIP_PRELAUNCH=1` to skip the compile step if you've already built. +> **🚨 Driving the Agents window UI is not the same as driving regular Code OSS.** +> The session sidebar, agent picker, and chat input have their own quirks that +> trip up the obvious `click + type` patterns. Before you spend time fighting +> the UI, read **[references/agents-window-guide.md](./references/agents-window-guide.md)** +> — it covers the agent picker (Copilot CLI vs Claude), `[Local]` vs non-`[Local]` +> sessions, the reliable keyboard-navigation trick for the agent dropdown, how +> to send a follow-up turn in the same session, how to restore an existing +> chat, and the kill / relaunch / verify-out-is-fresh dance you need after a +> source code change. + ## Launching VS Code Extensions for Debugging To debug a VS Code extension via npx @playwright/cli, launch VS Code Insiders with `--extensionDevelopmentPath` and `--remote-debugging-port`. Use `--user-data-dir` to avoid conflicting with an already-running instance. diff --git a/src/vs/platform/agentHost/common/pendingRequestRegistry.ts b/src/vs/platform/agentHost/common/pendingRequestRegistry.ts index a6c4ea61def7b..067e50a9c6173 100644 --- a/src/vs/platform/agentHost/common/pendingRequestRegistry.ts +++ b/src/vs/platform/agentHost/common/pendingRequestRegistry.ts @@ -4,6 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import { DeferredPromise } from '../../../base/common/async.js'; +import { CancellationError } from '../../../base/common/errors.js'; /** * Registry of parked deferred promises keyed by string id. Used to @@ -33,8 +34,17 @@ export class PendingRequestRegistry { * eventually feeds {@link respond} originates from a different code * path (e.g. an MCP handler invoked by the SDK whose completion * arrives via a workbench round-trip). + * + * If `key` is already registered (duplicate `tool_use_id` from the + * SDK, retry, or logic bug), the previous deferred is rejected with + * a {@link CancellationError} so its awaiter unwinds instead of + * leaking forever. */ register(key: string): Promise { + const existing = this._entries.get(key); + if (existing && !existing.isSettled) { + existing.error(new CancellationError()); + } const deferred = new DeferredPromise(); this._entries.set(key, deferred); return deferred.p; diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts index 95d7a5aa81408..8136f6618e69f 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts @@ -212,7 +212,8 @@ type AssertBindingsMatchSdk = { }; // Forces the mapped type above to be eagerly checked: if any entry is not -// `true`, this assignment fails to compile. -export const _assertBindingsMatchSdk: { [K in keyof IClaudeSdkBindings]: true } = null as unknown as AssertBindingsMatchSdk; +// `true`, this assignment fails to compile. Module-local so the runtime +// surface stays clean — the only purpose is the type-level assertion. +const _assertBindingsMatchSdk: { [K in keyof IClaudeSdkBindings]: true } = null as unknown as AssertBindingsMatchSdk; // #endregion diff --git a/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts b/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts index 52d3816afb133..33c5c91606a6d 100644 --- a/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts +++ b/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts @@ -520,7 +520,9 @@ function mapStreamEvent( // parent. They are mutually exclusive (a Task call inside a // subagent is itself an inner tool_use; the resolver chain // handles nested spawns by following the parent chain). - const isSubagentSpawn = SUBAGENT_SPAWNING_TOOL_NAMES.has(toolName); + // Gated on `!isClientTool` so a workbench tool named `Task` / + // `Agent` cannot impersonate the SDK's subagent-spawn tools. + const isSubagentSpawn = !isClientTool && SUBAGENT_SPAWNING_TOOL_NAMES.has(toolName); if (parentToolUseId === null) { if (isSubagentSpawn) { registry.recordSpawn(block.id); diff --git a/test/unit/electron/renderer.html b/test/unit/electron/renderer.html index 8e9328b443734..e6d13591fc48f 100644 --- a/test/unit/electron/renderer.html +++ b/test/unit/electron/renderer.html @@ -46,6 +46,11 @@ // supported by blink/chromium. To work around this, we generate an import map that maps all node modules // to a blob that exports all properties of the module as named exports and the module itself as default. + // Hoisted to outer scope so we don't reallocate the Set / regex for + // every dependency the import map covers. + const RESERVED_EXPORT_NAMES = new Set(['break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'new', 'null', 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'var', 'void', 'while', 'with', 'yield', 'let', 'static', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'await']); + const VALID_IDENTIFIER = /^[A-Za-z_$][A-Za-z0-9_$]*$/; + function asRequireBlobUri(name) { let _mod; try { @@ -62,10 +67,9 @@ // (skip names that are reserved words in ES module syntax, e.g. // zod exports keys like `enum`, `function`, `default` that would // produce `export const enum = ...` and fail to parse) - const RESERVED = new Set(['break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'new', 'null', 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'var', 'void', 'while', 'with', 'yield', 'let', 'static', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'await']); let jsSrc = `const _mod = require('${name}')\n`; for (const name of Object.keys(_mod)) { - if (RESERVED.has(name) || !/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(name)) { + if (RESERVED_EXPORT_NAMES.has(name) || !VALID_IDENTIFIER.test(name)) { continue; } jsSrc += `export const ${name} = _mod['${name}'];\n`; From 8efe9205b797c681163db3187437e2dc441b581c Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 20 May 2026 21:15:48 -0700 Subject: [PATCH 4/6] claude: load zod via require() so the unit-test bootstrap stays untouched zod's CJS exports include reserved-word keys (enum, default, function, ...) which the renderer's import-map blob generator turns into invalid ESM ('export const enum = ...'). Earlier commits patched renderer.html to skip those keys; that file isn't Phase 10's business so this reverts the renderer change and instead loads zod at runtime via require(). The bogus blob is still generated but never imported, so it never gets parsed. --- .../node/claude/clientTools/claudeJsonSchemaToZod.ts | 11 ++++++++++- .../node/clientTools/claudeJsonSchemaToZod.test.ts | 8 +++++++- test/unit/electron/renderer.html | 11 ----------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts index e8dd1eb52e0e3..13963fc17d3a4 100644 --- a/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts @@ -3,9 +3,18 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import { z, type ZodTypeAny } from 'zod'; +import type { z as ZodNamespace, ZodTypeAny } from 'zod'; import type { ToolDefinition } from '../../../common/state/protocol/state.js'; +// zod's CJS exports include reserved-word keys (`enum`, `default`, +// `function`, ...). The renderer test bootstrap generates an ESM +// blob per dep that re-exports every key, which fails to parse for +// those names. Use a runtime `require` so the bogus ESM blob is +// never imported — the dependency is still resolved transitively via +// `@anthropic-ai/claude-agent-sdk`. +// eslint-disable-next-line no-restricted-globals +const z = require('zod') as typeof ZodNamespace; + /** * Converts the narrow JSON Schema subset that * {@link ToolDefinition.inputSchema} carries into the `Record` diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts index 7f2a624903668..61098191d64e3 100644 --- a/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts @@ -4,10 +4,16 @@ *--------------------------------------------------------------------------------------------*/ import assert from 'assert'; -import { z } from 'zod'; +import type { z as ZodNamespace } from 'zod'; import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; import { jsonSchemaToZodRawShape } from '../../../node/claude/clientTools/claudeJsonSchemaToZod.js'; +// See `claudeJsonSchemaToZod.ts` for why we use `require` instead of +// `import` for zod (CJS reserved-word exports break the renderer's ESM +// import-map blob generator). +// eslint-disable-next-line no-restricted-globals +const z = require('zod') as typeof ZodNamespace; + function parse(shape: Record, value: unknown) { return z.object(shape).safeParse(value); } diff --git a/test/unit/electron/renderer.html b/test/unit/electron/renderer.html index e6d13591fc48f..3af99562e153d 100644 --- a/test/unit/electron/renderer.html +++ b/test/unit/electron/renderer.html @@ -46,11 +46,6 @@ // supported by blink/chromium. To work around this, we generate an import map that maps all node modules // to a blob that exports all properties of the module as named exports and the module itself as default. - // Hoisted to outer scope so we don't reallocate the Set / regex for - // every dependency the import map covers. - const RESERVED_EXPORT_NAMES = new Set(['break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'new', 'null', 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'var', 'void', 'while', 'with', 'yield', 'let', 'static', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'await']); - const VALID_IDENTIFIER = /^[A-Za-z_$][A-Za-z0-9_$]*$/; - function asRequireBlobUri(name) { let _mod; try { @@ -64,14 +59,8 @@ } // export all properties and the module itself as default - // (skip names that are reserved words in ES module syntax, e.g. - // zod exports keys like `enum`, `function`, `default` that would - // produce `export const enum = ...` and fail to parse) let jsSrc = `const _mod = require('${name}')\n`; for (const name of Object.keys(_mod)) { - if (RESERVED_EXPORT_NAMES.has(name) || !VALID_IDENTIFIER.test(name)) { - continue; - } jsSrc += `export const ${name} = _mod['${name}'];\n`; } jsSrc += `export default _mod;\n`; From 6c0aa97720267ed8075fdb9ccedd3acc5d66a605 Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 20 May 2026 21:43:47 -0700 Subject: [PATCH 5/6] claude: restore normal 'import { z } from "zod"'; fix test renderer to handle reserved-word CJS exports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The renderer test bootstrap generates an ESM blob per dep that re-exports every key (`export const X = _mod[X]`). zod's CJS exports include reserved-word keys (enum, default, function) which produce un-parseable ESM. Skip those keys in the blob generator — generic fix that any future package with reserved-word exports would also need. --- .../node/claude/clientTools/claudeJsonSchemaToZod.ts | 11 +---------- .../node/clientTools/claudeJsonSchemaToZod.test.ts | 8 +------- test/unit/electron/renderer.html | 10 ++++++++++ 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts index 13963fc17d3a4..e8dd1eb52e0e3 100644 --- a/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts @@ -3,18 +3,9 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import type { z as ZodNamespace, ZodTypeAny } from 'zod'; +import { z, type ZodTypeAny } from 'zod'; import type { ToolDefinition } from '../../../common/state/protocol/state.js'; -// zod's CJS exports include reserved-word keys (`enum`, `default`, -// `function`, ...). The renderer test bootstrap generates an ESM -// blob per dep that re-exports every key, which fails to parse for -// those names. Use a runtime `require` so the bogus ESM blob is -// never imported — the dependency is still resolved transitively via -// `@anthropic-ai/claude-agent-sdk`. -// eslint-disable-next-line no-restricted-globals -const z = require('zod') as typeof ZodNamespace; - /** * Converts the narrow JSON Schema subset that * {@link ToolDefinition.inputSchema} carries into the `Record` diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts index 61098191d64e3..7f2a624903668 100644 --- a/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts @@ -4,16 +4,10 @@ *--------------------------------------------------------------------------------------------*/ import assert from 'assert'; -import type { z as ZodNamespace } from 'zod'; +import { z } from 'zod'; import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; import { jsonSchemaToZodRawShape } from '../../../node/claude/clientTools/claudeJsonSchemaToZod.js'; -// See `claudeJsonSchemaToZod.ts` for why we use `require` instead of -// `import` for zod (CJS reserved-word exports break the renderer's ESM -// import-map blob generator). -// eslint-disable-next-line no-restricted-globals -const z = require('zod') as typeof ZodNamespace; - function parse(shape: Record, value: unknown) { return z.object(shape).safeParse(value); } diff --git a/test/unit/electron/renderer.html b/test/unit/electron/renderer.html index 3af99562e153d..4654a620b1775 100644 --- a/test/unit/electron/renderer.html +++ b/test/unit/electron/renderer.html @@ -46,6 +46,13 @@ // supported by blink/chromium. To work around this, we generate an import map that maps all node modules // to a blob that exports all properties of the module as named exports and the module itself as default. + // Some CommonJS modules export keys that are reserved words in ES module syntax + // (e.g. zod exports `enum`, `default`, `function`). Generating `export const enum = ...` + // for those would fail to parse, so skip them. Hoisted to outer scope so we don't + // reallocate the Set / regex for every dependency the import map covers. + const RESERVED_EXPORT_NAMES = new Set(['break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'new', 'null', 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'var', 'void', 'while', 'with', 'yield', 'let', 'static', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'await']); + const VALID_IDENTIFIER = /^[A-Za-z_$][A-Za-z0-9_$]*$/; + function asRequireBlobUri(name) { let _mod; try { @@ -61,6 +68,9 @@ // export all properties and the module itself as default let jsSrc = `const _mod = require('${name}')\n`; for (const name of Object.keys(_mod)) { + if (RESERVED_EXPORT_NAMES.has(name) || !VALID_IDENTIFIER.test(name)) { + continue; + } jsSrc += `export const ${name} = _mod['${name}'];\n`; } jsSrc += `export default _mod;\n`; From bfa63c008650078cacced9c85473a6a77a7055a3 Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Wed, 20 May 2026 22:08:46 -0700 Subject: [PATCH 6/6] claude: void _assertBindingsMatchSdk to satisfy CI's noUnusedLocals --- src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts index 8136f6618e69f..3385475d94d05 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts @@ -215,5 +215,6 @@ type AssertBindingsMatchSdk = { // `true`, this assignment fails to compile. Module-local so the runtime // surface stays clean — the only purpose is the type-level assertion. const _assertBindingsMatchSdk: { [K in keyof IClaudeSdkBindings]: true } = null as unknown as AssertBindingsMatchSdk; +void _assertBindingsMatchSdk; // #endregion