diff --git a/.agents/skills/launch/SKILL.md b/.agents/skills/launch/SKILL.md index 77029e0a4e12e..3ae6a5a4ff276 100644 --- a/.agents/skills/launch/SKILL.md +++ b/.agents/skills/launch/SKILL.md @@ -128,6 +128,16 @@ npx @playwright/cli snapshot - The `--agents` flag launches the Agents workbench instead of the standard VS Code workbench. - Set `VSCODE_SKIP_PRELAUNCH=1` to skip the compile step if you've already built. +> **๐Ÿšจ Driving the Agents window UI is not the same as driving regular Code OSS.** +> The session sidebar, agent picker, and chat input have their own quirks that +> trip up the obvious `click + type` patterns. Before you spend time fighting +> the UI, read **[references/agents-window-guide.md](./references/agents-window-guide.md)** +> โ€” it covers the agent picker (Copilot CLI vs Claude), `[Local]` vs non-`[Local]` +> sessions, the reliable keyboard-navigation trick for the agent dropdown, how +> to send a follow-up turn in the same session, how to restore an existing +> chat, and the kill / relaunch / verify-out-is-fresh dance you need after a +> source code change. + ## Launching VS Code Extensions for Debugging To debug a VS Code extension via npx @playwright/cli, launch VS Code Insiders with `--extensionDevelopmentPath` and `--remote-debugging-port`. Use `--user-data-dir` to avoid conflicting with an already-running instance. diff --git a/eslint.config.js b/eslint.config.js index 871c78d8cf4e6..b3be218552e60 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -1503,6 +1503,7 @@ export default defineConfig( 'when': 'hasNode', 'allow': [ '@github/copilot-sdk', + 'zod', '@microsoft/dev-tunnels-contracts', '@microsoft/dev-tunnels-management', '@parcel/watcher', @@ -1646,7 +1647,9 @@ export default defineConfig( '@vscode/tree-sitter-wasm', // used by agentHost for command auto-approval '@vscode/copilot-api', // used by agentHost for Copilot API requests '@anthropic-ai/sdk', // used by agentHost for Anthropic API requests - '@anthropic-ai/claude-agent-sdk' // used by agentHost for Claude Agent SDK session enumeration / queries + '@anthropic-ai/claude-agent-sdk', // used by agentHost for Claude Agent SDK session enumeration / queries + '@modelcontextprotocol/sdk/**/*', // used by agentHost for Claude client-tool MCP result types (Phase 10) + 'zod' // used by agentHost for Claude client-tool MCP input schemas ] }, { diff --git a/package-lock.json b/package-lock.json index f8e5ed5ab12e1..d66f20bf3ba82 100644 --- a/package-lock.json +++ b/package-lock.json @@ -70,7 +70,8 @@ "vscode-textmate": "^9.3.2", "ws": "^8.19.0", "yauzl": "^3.0.0", - "yazl": "^2.4.3" + "yazl": "^2.4.3", + "zod": "^3.25.76" }, "devDependencies": { "@anthropic-ai/claude-agent-sdk": "0.2.128", diff --git a/package.json b/package.json index 7fe6018e6af23..5c55cb1c91922 100644 --- a/package.json +++ b/package.json @@ -147,7 +147,8 @@ "vscode-textmate": "^9.3.2", "ws": "^8.19.0", "yauzl": "^3.0.0", - "yazl": "^2.4.3" + "yazl": "^2.4.3", + "zod": "^3.25.76" }, "devDependencies": { "@anthropic-ai/claude-agent-sdk": "0.2.128", diff --git a/remote/package-lock.json b/remote/package-lock.json index 0b7a040173ed0..98ec6babcd674 100644 --- a/remote/package-lock.json +++ b/remote/package-lock.json @@ -51,7 +51,8 @@ "vscode-textmate": "^9.3.2", "ws": "^8.19.0", "yauzl": "^3.0.0", - "yazl": "^2.4.3" + "yazl": "^2.4.3", + "zod": "^3.25.76" } }, "node_modules/@github/copilot": { diff --git a/remote/package.json b/remote/package.json index 3fe442ac9aa0d..8d7a7a7f06e4e 100644 --- a/remote/package.json +++ b/remote/package.json @@ -46,7 +46,8 @@ "vscode-textmate": "^9.3.2", "ws": "^8.19.0", "yauzl": "^3.0.0", - "yazl": "^2.4.3" + "yazl": "^2.4.3", + "zod": "^3.25.76" }, "overrides": { "node-gyp-build": "4.8.1", diff --git a/src/vs/platform/agentHost/common/pendingRequestRegistry.ts b/src/vs/platform/agentHost/common/pendingRequestRegistry.ts index 7bb9b577ff4ee..067e50a9c6173 100644 --- a/src/vs/platform/agentHost/common/pendingRequestRegistry.ts +++ b/src/vs/platform/agentHost/common/pendingRequestRegistry.ts @@ -4,6 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import { DeferredPromise } from '../../../base/common/async.js'; +import { CancellationError } from '../../../base/common/errors.js'; /** * Registry of parked deferred promises keyed by string id. Used to @@ -27,6 +28,28 @@ export class PendingRequestRegistry { return deferred.p; } + /** + * Park a deferred under `key` and return its promise. Use when there + * is no synchronous responder to guard against โ€” the request that + * eventually feeds {@link respond} originates from a different code + * path (e.g. an MCP handler invoked by the SDK whose completion + * arrives via a workbench round-trip). + * + * If `key` is already registered (duplicate `tool_use_id` from the + * SDK, retry, or logic bug), the previous deferred is rejected with + * a {@link CancellationError} so its awaiter unwinds instead of + * leaking forever. + */ + register(key: string): Promise { + const existing = this._entries.get(key); + if (existing && !existing.isSettled) { + existing.error(new CancellationError()); + } + const deferred = new DeferredPromise(); + this._entries.set(key, deferred); + return deferred.p; + } + respond(key: string, value: T): boolean { const deferred = this._entries.get(key); if (!deferred) { @@ -37,6 +60,14 @@ export class PendingRequestRegistry { return true; } + /** + * Resolve every parked deferred with `denyValue` and clear the registry. + * + * Designed for the permission-deny path: a "deny" answer is itself a + * successful round-trip result, so awaiting consumers receive `denyValue` + * rather than an error. Use {@link rejectAll} when callers must observe + * a thrown error instead (cancellation, dispose). + */ denyAll(denyValue: T): void { for (const [, deferred] of this._entries) { if (!deferred.isSettled) { @@ -45,4 +76,23 @@ export class PendingRequestRegistry { } this._entries.clear(); } + + /** + * Reject every parked deferred with `error` and clear the registry. + * + * Use this when in-flight requests must be cancelled rather than + * answered (e.g. session dispose, `Query` rebind on tool-set change). + * Compare with {@link denyAll}, which *resolves* every deferred with a + * supplied value โ€” that is right for the permission-deny path where a + * "deny" is itself a successful answer, but wrong for cancellation + * where the awaited consumer must observe an error to unwind. + */ + rejectAll(error: Error): void { + for (const [, deferred] of this._entries) { + if (!deferred.isSettled) { + deferred.error(error); + } + } + this._entries.clear(); + } } diff --git a/src/vs/platform/agentHost/node/claude/claudeAgent.ts b/src/vs/platform/agentHost/node/claude/claudeAgent.ts index 80e3060078b9c..4b02c4b32df77 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgent.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgent.ts @@ -4,7 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import type { CCAModel } from '@vscode/copilot-api'; -import type { Options, PermissionMode, SDKSessionInfo, SDKUserMessage } from '@anthropic-ai/claude-agent-sdk'; +import type { Options, SDKSessionInfo, SDKUserMessage } from '@anthropic-ai/claude-agent-sdk'; import { SequencerByKey } from '../../../../base/common/async.js'; import { CancellationToken } from '../../../../base/common/cancellation.js'; import { CancellationError } from '../../../../base/common/errors.js'; @@ -35,10 +35,11 @@ import { mapSessionMessagesToTurns } from './claudeReplayMapper.js'; import { getSubagentTranscript } from './claudeSubagentResolver.js'; import { ClaudeAgentSession } from './claudeAgentSession.js'; import { handleCanUseTool } from './claudeCanUseTool.js'; -import { ClaudeMaterializer, type IClaudeMaterializeProvisional } from './claudeMaterializer.js'; +import { ClaudeMaterializer, type IMaterializeContext } from './claudeMaterializer.js'; import { tryParseClaudeModelId } from './claudeModelId.js'; import { resolvePromptToContentBlocks } from './claudePromptResolver.js'; import { IClaudeProxyHandle, IClaudeProxyService } from './claudeProxyService.js'; +import { readClaudePermissionMode } from './claudeSessionPermissionMode.js'; import { ClaudeSessionMetadataStore, IClaudeSessionOverlay } from './claudeSessionMetadataStore.js'; /** @@ -103,7 +104,10 @@ function toAgentModelInfo(m: CCAModel, provider: AgentProvider): IAgentModelInfo // the closed `ClaudePermissionMode` union now lives in // `../../common/claudeSessionConfigKeys.ts` so it can be shared by // `ClaudeAgent`, `ClaudeSessionMetadataStore`, and any other consumer -// that needs the same narrowing semantics. +// that needs the same narrowing semantics. The live per-session read +// helper lives in `./claudeSessionPermissionMode.ts` so the session +// and materializer can read directly without threading callbacks +// through the agent. /** * Phase 6: in-memory record for a provisional Claude session โ€” one @@ -145,6 +149,22 @@ interface IClaudeProvisionalSession { */ model: ModelSelection | undefined; readonly config: Record | undefined; + /** + * Phase 10: client-provided tool definitions registered via + * {@link ClaudeAgent.setClientTools} before the session materializes. + * Mutable for parity with {@link model} โ€” `setClientTools` writes here + * pre-materialize; the snapshot is transferred into the + * {@link ClaudeAgentSession} at materialize time and flows into + * `Options.mcpServers` for the first SDK startup. + */ + clientTools: readonly ToolDefinition[] | undefined; + /** + * Phase 10: workbench `clientId` paired with the most recent + * {@link clientTools} write. Transferred onto the session's bridge at + * materialize time so the stream mapper can stamp + * `SessionToolCallStart.toolClientId`. + */ + clientId: string | undefined; } /** @@ -261,11 +281,11 @@ export class ClaudeAgent extends Disposable implements IAgent { @IClaudeAgentSdkService private readonly _sdkService: IClaudeAgentSdkService, @IAgentHostGitService private readonly _gitService: IAgentHostGitService, @IAgentConfigurationService private readonly _configurationService: IAgentConfigurationService, - @IInstantiationService instantiationService: IInstantiationService, + @IInstantiationService _instantiationService: IInstantiationService, ) { super(); - this._materializer = instantiationService.createInstance(ClaudeMaterializer); - this._metadataStore = instantiationService.createInstance(ClaudeSessionMetadataStore, this.id); + this._materializer = _instantiationService.createInstance(ClaudeMaterializer); + this._metadataStore = _instantiationService.createInstance(ClaudeSessionMetadataStore, this.id); } // #region Descriptor + auth @@ -424,6 +444,8 @@ export class ClaudeAgent extends Disposable implements IAgent { project, model: config.model, config: config.config, + clientTools: undefined, + clientId: undefined, }); return { @@ -463,7 +485,7 @@ export class ClaudeAgent extends Disposable implements IAgent { // below, so a `SessionConfigChanged` landing mid-materialize can't // produce a split state where the SDK runs under one mode and the // DB records another. - const permissionMode = this._readSessionPermissionMode(provisional.sessionUri) + const permissionMode = readClaudePermissionMode(this._configurationService, provisional.sessionUri) ?? this._resolvePermissionMode(provisional.config); const canUseTool: NonNullable = (toolName, input, options) => @@ -472,25 +494,25 @@ export class ClaudeAgent extends Disposable implements IAgent { sessionId, toolName, input, options, ); - const session = await this._materializer.materialize(provisional, proxyHandle, permissionMode, canUseTool); + const ctx: IMaterializeContext = { + provisional, + proxyHandle, + canUseTool, + permissionModeFallback: permissionMode, + isResume: false, + }; - // Phase 9 โ€” wire the rematerializer hook so abort/crash recovery - // can rebuild the SDK plumbing via resume mode without coupling the - // session to the materializer service. Also seed the bijective state - // cache so a rebuild re-applies the user's last-chosen model/effort - // without losing the picker config. + const session = await this._materializer.materialize(ctx); + + // Phase 9 โ€” seed the bijective state cache so a rebuild re-applies + // the user's last-chosen model/effort without losing the picker + // config. const initialEffort = clampEffortForRuntime(resolveClaudeEffort(provisional.model)); session.seedBijectiveState({ model: provisional.model?.id, effort: initialEffort, permissionMode, }); - session.attachRematerializer(async (_reason) => { - // Re-read the live permissionMode so a SessionConfigChanged that - // arrived since the last materialize is honored on rebuild. - const liveMode = this._readSessionPermissionMode(provisional.sessionUri) ?? permissionMode; - return this._materializer.materializeResume(provisional, proxyHandle, liveMode, canUseTool); - }); // Persist customization-directory metadata BEFORE firing the // materialize event โ€” see plan section 3.4 ordering rationale. @@ -531,6 +553,15 @@ export class ClaudeAgent extends Disposable implements IAgent { // only need to dispose the entry to release everything per-session. const entry = new ClaudeSessionEntry(session); entry.addDisposable(session.onDidSessionProgress(signal => this._onDidSessionProgress.fire(signal))); + // Re-sync the client-tool snapshot in case `setClientTools` landed + // while we were awaiting `_sdkService.startup` / `_metadataStore.write`. + // Those updates wrote to the provisional record because the live + // session was not yet in `_sessions`; the materializer's earlier + // snapshot would otherwise be discarded along with the provisional. + // Idempotent on no-change (the diff's equality check absorbs it). + if (provisional.clientTools !== undefined) { + session.setClientTools(provisional.clientTools, provisional.clientId); + } this._sessions.set(sessionId, entry); this._provisionalSessions.delete(sessionId); @@ -575,7 +606,7 @@ export class ClaudeAgent extends Disposable implements IAgent { } catch (err) { this._logService.warn(`[Claude:${sessionId}] overlay read failed during resume; continuing with defaults`, err); } - const permissionMode = this._readSessionPermissionMode(sessionUri) + const permissionMode = readClaudePermissionMode(this._configurationService, sessionUri) ?? overlay.permissionMode ?? 'default'; // Resolve git project metadata from the resumed cwd, same as @@ -589,14 +620,21 @@ export class ClaudeAgent extends Disposable implements IAgent { this._logService.warn(`[Claude:${sessionId}] project resolution failed during resume; continuing without project`, err); } const abortController = new AbortController(); - const provisional: IClaudeMaterializeProvisional = { + const provisional: IClaudeProvisionalSession = { sessionId, sessionUri, workingDirectory, abortController, project, model: overlay.model, + config: undefined, + clientTools: undefined, + clientId: undefined, }; + // Register the resume provisional so a concurrent `setClientTools` + // has a place to land while we're awaiting `_sdkService.startup`. + // Removed in `finally` after the live session takes its place. + this._provisionalSessions.set(sessionId, provisional); const canUseTool: NonNullable = (toolName, input, options) => handleCanUseTool( @@ -604,7 +642,24 @@ export class ClaudeAgent extends Disposable implements IAgent { sessionId, toolName, input, options, ); - const session = await this._materializer.materialize(provisional, proxyHandle, permissionMode, canUseTool, 'resume'); + // Phase 10 โ€” cross-window resume starts with no client-tool snapshot; + // the workbench re-issues `setClientTools` on active-client re-attach + // and the first sendMessage's diff check rebinds with the new set. + const ctx: IMaterializeContext = { + provisional, + proxyHandle, + canUseTool, + permissionModeFallback: permissionMode, + isResume: true, + }; + + let session: ClaudeAgentSession; + try { + session = await this._materializer.materialize(ctx); + } catch (err) { + this._provisionalSessions.delete(sessionId); + throw err; + } const initialEffort = clampEffortForRuntime(resolveClaudeEffort(overlay.model)); session.seedBijectiveState({ @@ -612,14 +667,16 @@ export class ClaudeAgent extends Disposable implements IAgent { effort: initialEffort, permissionMode, }); - session.attachRematerializer(async (_reason) => { - const liveMode = this._readSessionPermissionMode(sessionUri) ?? permissionMode; - return this._materializer.materializeResume(provisional, proxyHandle, liveMode, canUseTool); - }); const entry = new ClaudeSessionEntry(session); entry.addDisposable(session.onDidSessionProgress(signal => this._onDidSessionProgress.fire(signal))); + // See `_materializeProvisional` โ€” re-sync any `setClientTools` that + // landed on the resume provisional while `startup` was in flight. + if (provisional.clientTools !== undefined) { + session.setClientTools(provisional.clientTools, provisional.clientId); + } this._sessions.set(sessionId, entry); + this._provisionalSessions.delete(sessionId); this._onDidMaterializeSession.fire({ session: sessionUri, @@ -641,20 +698,6 @@ export class ClaudeAgent extends Disposable implements IAgent { return narrowClaudePermissionMode(config?.[ClaudeSessionConfigKey.PermissionMode]) ?? 'default'; } - /** - * Read the live `permissionMode` for a session via - * {@link IAgentConfigurationService.getSessionConfigValues}. Returns - * `undefined` if the session has not been seeded โ€” the caller picks - * the fallback (createSession-time intent at materialize, `'default'` - * at the canUseTool gate). Defends against malformed values that - * slipped past schema validation by returning `undefined`. Called - * on every canUseTool entry so a mid-turn `SessionConfigChanged` - * action wins over the materialize-time seed (plan S3.6). - */ - private _readSessionPermissionMode(sessionUri: URI): PermissionMode | undefined { - return narrowClaudePermissionMode(this._configurationService.getSessionConfigValues(sessionUri.toString())?.[ClaudeSessionConfigKey.PermissionMode]); - } - disposeSession(session: URI): Promise { // Routed through {@link _disposeSequencer} so a concurrent // {@link shutdown} already serializing teardown for this same @@ -935,24 +978,6 @@ export class ClaudeAgent extends Disposable implements IAgent { // existing history rather than minting a fresh one. session = await this._resumeSession(sessionId, sessionUri); } - } else { - // Plan S3.6: forward live `permissionMode` to the bound - // `Query` immediately before yielding the next user message - // so a `SessionConfigChanged` action that arrived between - // turns wins. Awaited so the SDK has acknowledged the mode - // change before `session.send(...)` yields the next prompt. - // - // Only call when the config carries an actual value: - // `_readSessionPermissionMode` returns `undefined` when - // the session's schema hasn't been registered (e.g. a - // cross-window resume that bypassed AgentService's - // schema-registration path), and falling back to - // `'default'` here would silently overwrite the - // session's seeded bijective state with the wrong mode. - const liveMode = this._readSessionPermissionMode(sessionUri); - if (liveMode !== undefined) { - await session.setPermissionMode(liveMode); - } } const contentBlocks = resolvePromptToContentBlocks(prompt, attachments); @@ -1065,17 +1090,38 @@ export class ClaudeAgent extends Disposable implements IAgent { }); } - setClientTools(_session: URI, _clientId: string, _tools: ToolDefinition[]): void { - throw new Error('TODO: Phase 10'); + setClientTools(session: URI, clientId: string, tools: ToolDefinition[]): void { + const sessionId = AgentSession.id(session); + this._logService.info(`[Claude:${sessionId}] setClientTools clientId=${clientId} tools=[${tools.map(t => t.name).join(', ') || '(none)'}]`); + const provisional = this._provisionalSessions.get(sessionId); + if (provisional) { + provisional.clientTools = tools; + provisional.clientId = clientId; + return; + } + const entry = this._sessions.get(sessionId); + if (entry) { + entry.session.setClientTools(tools, clientId); + return; + } + // Unknown id \u2014 silent drop (workbench may have raced a session + // dispose, or the session lives in another window). } - onClientToolCallComplete(_session: URI, _toolCallId: string, _result: ToolCallResult): void { - // Phase 10 โ€” client (MCP) tool completion routing. Until then, every - // SDK-owned tool that completes also fires this hook via - // `AgentSideEffects` listening on `SessionToolCallComplete` envelopes, - // so the body must be a benign no-op rather than throw. Once client - // tools are registered via `setClientTools`, this should resolve the - // matching pending promise on the SDK session. + onClientToolCallComplete(session: URI, toolCallId: string, result: ToolCallResult): void { + // Walk subagent URIs to the root โ€” nested subagents require iterated + // parsing. `_sessions` is keyed by root session ids only. Mirrors + // copilotAgent.ts:947. + let target = session; + let parsed; + while ((parsed = parseSubagentSessionUri(target))) { + target = parsed.parentSession; + } + const sessionId = AgentSession.id(target); + const entry = this._sessions.get(sessionId); + // `AgentSideEffects` forwards every `SessionToolCallComplete` envelope + // (including SDK-owned tools); silent on miss is the expected path. + entry?.session.completeClientToolCall(toolCallId, result); } setClientCustomizations(_session: URI, _clientId: string, _customizations: CustomizationRef[]): Promise { diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts index 63674bed732f7..3385475d94d05 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts @@ -3,7 +3,8 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import type { GetSessionMessagesOptions, GetSubagentMessagesOptions, ListSessionsOptions, ListSubagentsOptions, Options, SDKSessionInfo, SessionMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { AnyZodRawShape, GetSessionMessagesOptions, GetSubagentMessagesOptions, InferShape, ListSessionsOptions, ListSubagentsOptions, McpSdkServerConfigWithInstance, Options, SDKSessionInfo, SdkMcpToolDefinition, SessionMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import * as fs from 'fs'; import { pathToFileURL } from 'url'; import { join, resolve } from '../../../../base/common/path.js'; @@ -14,78 +15,41 @@ import { AgentHostClaudeSdkPathEnvVar } from '../../common/agentService.js'; export const IClaudeAgentSdkService = createDecorator('claudeAgentSdkService'); /** - * Lazy wrapper over `@anthropic-ai/claude-agent-sdk` for the agent host - * Claude provider. The interface grows phase-by-phase; Phase 5 introduces - * the decorator so {@link import('./claudeAgent.js').ClaudeAgent} can take - * it as a constructor dependency. Phase 6 adds {@link startup} for - * materialization. Method surfaces are added in subsequent slices alongside - * the tests that exercise them. + * Pure per-method passthrough shim over `@anthropic-ai/claude-agent-sdk`. + * + * Every method on this interface corresponds 1:1 to a single SDK export. + * The shim owns lazy module loading and the first-failure log-once + * convention; it does NOT compose, wrap, or add behavior on top of the + * SDK's surface. Higher-level orchestration (e.g. building the in-process + * client-tool MCP server) lives in dedicated modules that depend on this + * interface for the raw bindings. */ export interface IClaudeAgentSdkService { readonly _serviceBrand: undefined; - /** - * Enumerates persisted Claude sessions surfaced by the SDK's filesystem - * scan. Phase 5 mirrors `IAgent.listSessions()` (no `dir` parameter): - * the host translates this internally to `sdk.listSessions(undefined)`. - * - * Failures (corrupt module, postinstall mishap) reject with the SDK - * loader's diagnostic. Callers MUST tolerate rejection without - * collapsing the wider listing pipeline. - */ listSessions(): Promise; - - /** - * Looks up a single session's metadata by id. Resolves to `undefined` - * when the SDK has no record of it (deleted from disk, never created, - * or just outside the searched project tree). Used by - * {@link import('./claudeAgent.js').ClaudeAgent.getSessionMetadata} - * to compose SDK-supplied fields (summary, cwd, timestamps) with the - * per-session DB overlay. Phase 6.1 / Cycle D4. - */ getSessionInfo(sessionId: string): Promise; - - /** - * Pre-warms the SDK subprocess and runs the init handshake. Returns - * a {@link WarmQuery} whose `.query(promptIterable)` binds the - * prompt iterable and returns a streaming `Query`. Aborting - * `options.abortController` either rejects this promise (if init is - * in flight) or causes the resulting Query to clean up resources - * (sdk.d.ts section `startup`). - * - * Phase 6 calls this from {@link ClaudeAgent._materializeProvisional} - * on the first `sendMessage`. Firing `onDidMaterializeSession` is - * deliberately deferred until after the await resolves so AgentService - * can atomically dispatch the deferred `sessionAdded` notification. - */ startup(params: { options: Options; initializeTimeoutMs?: number }): Promise; - - /** - * Reads a session's full transcript from disk via the SDK. Out-of-process: - * no live `Query` is required โ€” the SDK parses the JSONL file directly. - * Phase 13 calls this from {@link import('./claudeAgent.js').ClaudeAgent.getSessionMessages} - * with `{ includeSystemMessages: true }` so `compact_boundary` and other - * allowlisted system subtypes survive into the replay mapper. - */ getSessionMessages(sessionId: string, options?: GetSessionMessagesOptions): Promise; - - /** - * Lists subagent ids the SDK has indexed for a given session. The SDK - * returns ids in directory-readdir order (alphabetical in practice โ€” - * NOT invocation order) so callers MUST NOT rely on positional - * correlation. Phase 12 uses this to enumerate child transcripts for - * the live + replay subagent URI ingress. - */ listSubagents(sessionId: string, options?: ListSubagentsOptions): Promise; - - /** - * Reads a single subagent's transcript from disk via the SDK. Mirrors - * {@link getSessionMessages} for the child session: out-of-process, - * SDK parses the per-agent JSONL directly. Phase 12 consumes this from - * {@link import('./claudeSubagentResolver.js').getSubagentTranscript} - * when the requested URI carries an `agentId` discriminator. - */ getSubagentMessages(sessionId: string, agentId: string, options?: GetSubagentMessagesOptions): Promise; + + createSdkMcpServer(options: { + name: string; + version?: string; + // SDK signature: `tools?: Array>`. The `any` + // here is required to match the SDK's own erased generic and to allow + // callers to pass an array of tools whose schemas differ from each other. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: Array>; + }): Promise; + + tool( + name: string, + description: string, + inputSchema: Schema, + handler: (args: InferShape, extra: unknown) => Promise + ): Promise>; } /** @@ -103,18 +67,20 @@ export interface IClaudeSdkBindings { getSessionMessages(sessionId: string, options?: GetSessionMessagesOptions): Promise; listSubagents(sessionId: string, options?: ListSubagentsOptions): Promise; getSubagentMessages(sessionId: string, agentId: string, options?: GetSubagentMessagesOptions): Promise; + createSdkMcpServer(options: { + name: string; + version?: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: Array>; + }): McpSdkServerConfigWithInstance; + tool( + name: string, + description: string, + inputSchema: Schema, + handler: (args: InferShape, extra: unknown) => Promise + ): SdkMcpToolDefinition; } -/** - * Production implementation. The SDK module is loaded lazily via dynamic - * `import()` because it pulls in non-trivial deps that aren't relevant - * unless the user has opted into the Claude agent. - * - * The loader's caching / log-once-on-failure semantics are locked by the - * dedicated test in {@link import('../../test/node/claudeAgent.test.ts')}, - * which subclasses this and overrides {@link _loadSdk} to fault on demand. - * That's why {@link _loadSdk} is `protected` rather than `private`. - */ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { declare readonly _serviceBrand: undefined; @@ -122,14 +88,12 @@ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { * Cached resolved bindings. We deliberately cache the *resolved* value, * not the in-flight promise โ€” if a transient `import()` failure recovers * (e.g. user fixes a broken `node_modules`), the next call retries. - * Mirrors the convention in `agentHostTerminalManager.ts` for `node-pty`. */ private _sdkModule: IClaudeSdkBindings | undefined; /** * Latched once we've logged a load failure, so a corrupt postinstall - * doesn't flood `error` events on every `listSessions()` call (each - * workbench refresh and session-list rerender hits this path). + * doesn't flood `error` events on every `listSessions()` call. */ private _firstLoadFailureLogged = false; @@ -167,6 +131,26 @@ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { return sdk.getSubagentMessages(sessionId, agentId, options); } + async createSdkMcpServer(options: { + name: string; + version?: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: Array>; + }): Promise { + const sdk = await this._getSdk(); + return sdk.createSdkMcpServer(options); + } + + async tool( + name: string, + description: string, + inputSchema: Schema, + handler: (args: InferShape, extra: unknown) => Promise + ): Promise> { + const sdk = await this._getSdk(); + return sdk.tool(name, description, inputSchema, handler); + } + private async _getSdk(): Promise { if (this._sdkModule) { return this._sdkModule; @@ -208,3 +192,29 @@ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { return import(pathToFileURL(entry).href); } } + +// #region Compile-time SDK drift detection +// +// Enforce that every method on IClaudeSdkBindings names a real export of +// `@anthropic-ai/claude-agent-sdk` and is assignable from that export's +// type. If the SDK renames or changes the signature of any of these +// exports, the `_assertBindingsMatchSdk` assignment below stops type- +// checking and the build fails โ€” flagging that the shim needs updating. + +type SdkModule = typeof import('@anthropic-ai/claude-agent-sdk'); + +type AssertBindingsMatchSdk = { + [K in keyof IClaudeSdkBindings]: K extends keyof SdkModule + ? SdkModule[K] extends IClaudeSdkBindings[K] + ? true + : ['SDK export signature drifted from IClaudeSdkBindings', K, SdkModule[K], IClaudeSdkBindings[K]] + : ['Not an export of @anthropic-ai/claude-agent-sdk', K]; +}; + +// Forces the mapped type above to be eagerly checked: if any entry is not +// `true`, this assignment fails to compile. Module-local so the runtime +// surface stays clean โ€” the only purpose is the type-level assertion. +const _assertBindingsMatchSdk: { [K in keyof IClaudeSdkBindings]: true } = null as unknown as AssertBindingsMatchSdk; +void _assertBindingsMatchSdk; + +// #endregion diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts index abe93b5cff121..d3a03ebed7a44 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts @@ -4,16 +4,24 @@ *--------------------------------------------------------------------------------------------*/ import type { PermissionMode, SDKUserMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { CancellationError } from '../../../../base/common/errors.js'; import { Emitter, Event } from '../../../../base/common/event.js'; import { Disposable, IReference } from '../../../../base/common/lifecycle.js'; import { URI } from '../../../../base/common/uri.js'; import { IInstantiationService } from '../../../instantiation/common/instantiation.js'; +import { IAgentConfigurationService } from '../agentConfigurationService.js'; +import { ClaudePermissionMode } from '../../common/claudeSessionConfigKeys.js'; import { ClaudeRuntimeEffortLevel } from '../../common/claudeModelConfig.js'; import { AgentSignal } from '../../common/agentService.js'; import { PendingRequestRegistry } from '../../common/pendingRequestRegistry.js'; import { ISessionDatabase } from '../../common/sessionDataService.js'; import { ActionType } from '../../common/state/sessionActions.js'; -import { PendingMessage, SessionInputAnswer, SessionInputRequest, SessionInputResponseKind, ToolCallPendingConfirmationState } from '../../common/state/protocol/state.js'; +import { PendingMessage, SessionInputAnswer, SessionInputRequest, SessionInputResponseKind, ToolCallPendingConfirmationState, type ToolDefinition } from '../../common/state/protocol/state.js'; +import type { ToolCallResult } from '../../common/state/sessionState.js'; +import { convertToolCallResult } from './clientTools/claudeClientToolResult.js'; +import { readClaudePermissionMode } from './claudeSessionPermissionMode.js'; +import { SessionClientToolsDiff } from './clientTools/claudeSessionClientToolsModel.js'; import { resolvePromptToContentBlocks } from './claudePromptResolver.js'; import { ClaudeSdkPipeline, IRematerializer } from './claudeSdkPipeline.js'; import { SubagentRegistry } from './claudeSubagentRegistry.js'; @@ -59,6 +67,17 @@ export class ClaudeAgentSession extends Disposable { */ private readonly _pendingUserInputs = new PendingRequestRegistry<{ response: SessionInputResponseKind; answers?: Record }>(); + /** + * Phase 10 โ€” owns the workbench-registered client-tool snapshot + * (via {@link SessionClientToolsDiff.model}) plus the + * "changed since last successful build" dirty bit. Read by the + * agent's sendMessage diff check; used by the materialize / + * rematerializer flow to pin the SDK build against a specific + * snapshot. See {@link SessionClientToolsDiff} for the C6 race + * semantics this collaborator enforces. + */ + readonly toolDiff: SessionClientToolsDiff; + private readonly _onDidSessionProgress = this._register(new Emitter()); readonly onDidSessionProgress: Event = this._onDidSessionProgress.event; @@ -69,11 +88,16 @@ export class ClaudeAgentSession extends Disposable { warm: WarmQuery, abortController: AbortController, dbRef: IReference, + private readonly _pendingClientToolCalls: PendingRequestRegistry, + toolDiff: SessionClientToolsDiff, + private readonly _permissionModeFallback: ClaudePermissionMode, @IInstantiationService instantiationService: IInstantiationService, + @IAgentConfigurationService private readonly _configurationService: IAgentConfigurationService, ) { super(); + this.toolDiff = this._register(toolDiff); this._pipeline = this._register(instantiationService.createInstance( - ClaudeSdkPipeline, sessionId, sessionUri, warm, abortController, dbRef, this.subagents, + ClaudeSdkPipeline, sessionId, sessionUri, warm, abortController, dbRef, this.subagents, toolDiff.model.state.get().clientId, )); this._register(this._pipeline.onDidProduceSignal(s => this._onDidSessionProgress.fire(s))); } @@ -95,14 +119,42 @@ export class ClaudeAgentSession extends Disposable { } /** - * Send a user prompt. Model / effort are not threaded through here - * โ€” the pipeline's current model / effort (set eagerly via - * {@link queueModelChange}) is whatever the SDK has been told. + * Send a user prompt. Performs the per-turn pre-flight before + * yielding to the pipeline: + * + * - If {@link toolDiff} reports the workbench client-tool snapshot has + * diverged from what the live `Query` was started with, yield-restart + * so the SDK picks up the new `Options.mcpServers`. The rebind itself + * re-applies the live `permissionMode` via the rematerializer. + * - Otherwise forward the live `permissionMode` to the bound `Query` so + * a `SessionConfigChanged` action that arrived between turns wins. + * The pipeline's bijective cache dedupes a no-op `setPermissionMode`, + * so this is free when nothing changed. + * + * Model / effort are not threaded through here โ€” the pipeline's current + * model / effort (set eagerly via {@link queueModelChange}) is whatever + * the SDK has been told. */ - send(prompt: SDKUserMessage, turnId: string): Promise { + async send(prompt: SDKUserMessage, turnId: string): Promise { + if (this.toolDiff.hasDifference) { + await this.rebindForClientTools(); + } else { + await this._pipeline.setPermissionMode(this._currentPermissionMode()); + } return this._pipeline.send(prompt, turnId); } + /** + * Live `permissionMode` for this session: reads from + * {@link IAgentConfigurationService} and falls back to the snapshot + * captured at materialize time when the live read is `undefined` + * (e.g. cross-window resume before the workbench has registered the + * session's schema). + */ + private _currentPermissionMode(): ClaudePermissionMode { + return readClaudePermissionMode(this._configurationService, this.sessionUri) ?? this._permissionModeFallback; + } + /** * Cancel the in-flight SDK turn. Mirrors the production reference; * see {@link ClaudeSdkPipeline.abort}. Also denies any parked @@ -234,11 +286,47 @@ export class ClaudeAgentSession extends Disposable { // #endregion + // #region Phase 10 โ€” client tools + + /** Replace the registered client tools snapshot. */ + setClientTools(tools: readonly ToolDefinition[], clientId?: string): void { + this.toolDiff.model.setTools(tools, clientId); + this._pipeline.setClientId(this.toolDiff.model.state.get().clientId); + } + + /** + * Resolve a parked client-tool MCP handler with the workbench-supplied + * result. Returns `true` if a matching deferred was found and settled. + * Unknown ids are a benign no-op โ€” `agentSideEffects.ts` forwards every + * `SessionToolCallComplete` envelope, so SDK-owned tool completions land + * here too and must NOT throw. + */ + completeClientToolCall(toolCallId: string, result: ToolCallResult): boolean { + const converted = convertToolCallResult(result, toolCallId); + return this._pendingClientToolCalls.respond(toolCallId, converted); + } + + /** + * Drive a yield-restart so the SDK picks up the new client-tool set on + * its next user request. Cancels any in-flight client-tool MCP handlers + * and resets the bridge state before swapping the {@link Query}; the + * agent's rematerializer rebuilds `Options.mcpServers` from + * {@link toolDiff} during the rebind and pins `applied` to the + * build-time snapshot via {@link SessionClientToolsDiff.build}. + */ + async rebindForClientTools(): Promise { + this._pendingClientToolCalls.rejectAll(new CancellationError()); + await this._pipeline.rebindForRestart(); + } + + // #endregion + override dispose(): void { // Resolve parked deferreds before tearing the pipeline down so the // SDK's canUseTool callback unwinds with a deny and the loop exits. this._pendingPermissions.denyAll(false); this._pendingUserInputs.denyAll({ response: SessionInputResponseKind.Cancel }); + this._pendingClientToolCalls.rejectAll(new CancellationError()); super.dispose(); } } diff --git a/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts b/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts index cfa23b80a5f4e..33c5c91606a6d 100644 --- a/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts +++ b/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts @@ -11,6 +11,7 @@ import { ActionType } from '../../common/state/sessionActions.js'; import { ResponsePartKind, ToolResultContentType, type ToolResultContent, type ToolResultFileEditContent } from '../../common/state/sessionState.js'; import { buildTopLevelSubagentReadyAction, emitInnerAssistantSignals, mapSubagentSystemMessage, SUBAGENT_SPAWNING_TOOL_NAMES, tagWithParent } from './claudeSubagentSignals.js'; import type { SubagentRegistry } from './claudeSubagentRegistry.js'; +import { stripClientToolNamePrefix, hasClientToolNamePrefix } from './clientTools/claudeClientToolMcpServer.js'; import { buildClaudeToolMeta, getClaudePastTenseMessage, getClaudeToolDisplayName } from './claudeToolDisplay.js'; import { ClaudeToolCallRegistry } from './claudeToolCallRegistry.js'; import { ToolCallConfirmationReason, type StringOrMarkdown } from '../../common/state/protocol/state.js'; @@ -217,6 +218,7 @@ export function mapSDKMessageToAgentSignals( state: ClaudeMapperState, logService: ILogService, registry: SubagentRegistry, + clientId?: string, ): AgentSignal[] { if (logService.getLevel() <= LogLevel.Trace) { try { @@ -229,7 +231,7 @@ export function mapSDKMessageToAgentSignals( switch (message.type) { case 'stream_event': return tagWithParent( - mapStreamEvent(message.event, session, turnId, state, logService, message.parent_tool_use_id, registry), + mapStreamEvent(message.event, session, turnId, state, logService, message.parent_tool_use_id, registry, clientId), session, message.parent_tool_use_id, registry, @@ -458,6 +460,7 @@ function mapStreamEvent( logService: ILogService, parentToolUseId: string | null, registry: SubagentRegistry, + clientId: string | undefined, ): AgentSignal[] { switch (event.type) { case 'message_start': @@ -497,14 +500,29 @@ function mapStreamEvent( }]; } if (block.type === 'tool_use') { - state.startToolBlock(event.index, block.id, block.name, turnId); + // Phase 10 โ€” strip the SDK's `mcp____` prefix for + // our in-process client-tool MCP server. The SDK surfaces + // in-process MCP tools to the model with that prefix, but + // the workbench's registered client-tool list (and the + // MCP handler's closure) use the unprefixed name. Without + // normalizing at the seam, `SessionToolCallReady` / + // `SessionToolCallComplete` would carry the prefixed name + // and the workbench would never recognize them as client + // tools. SDK-owned tools (Read, Write, Bash, etc.) and + // subagent spawn tools pass through unchanged because + // they don't carry the prefix. + const toolName = stripClientToolNamePrefix(block.name); + const isClientTool = hasClientToolNamePrefix(block.name); + state.startToolBlock(event.index, block.id, toolName, turnId); // Phase 12 โ€” subagent correlation bookkeeping. Either this // tool_use is at the top level and (if Task/Agent) spawns a // new subagent, or it is inner and we record its edge to the // parent. They are mutually exclusive (a Task call inside a // subagent is itself an inner tool_use; the resolver chain // handles nested spawns by following the parent chain). - const isSubagentSpawn = SUBAGENT_SPAWNING_TOOL_NAMES.has(block.name); + // Gated on `!isClientTool` so a workbench tool named `Task` / + // `Agent` cannot impersonate the SDK's subagent-spawn tools. + const isSubagentSpawn = !isClientTool && SUBAGENT_SPAWNING_TOOL_NAMES.has(toolName); if (parentToolUseId === null) { if (isSubagentSpawn) { registry.recordSpawn(block.id); @@ -518,7 +536,8 @@ function mapStreamEvent( // state transitions (D6). Subagent meta from Phase 12 is now // produced by `buildClaudeToolMeta` because // `getClaudeToolKind('Task') === 'subagent'`. - const meta = buildClaudeToolMeta(block.name); + const meta = buildClaudeToolMeta(toolName); + const toolClientId = isClientTool ? clientId : undefined; return [{ kind: 'action', session, @@ -526,8 +545,9 @@ function mapStreamEvent( type: ActionType.SessionToolCallStart, turnId, toolCallId: block.id, - toolName: block.name, - displayName: getClaudeToolDisplayName(block.name), + toolName, + displayName: getClaudeToolDisplayName(toolName), + ...(toolClientId ? { toolClientId } : {}), ...(meta ? { _meta: meta } : {}), }, }]; @@ -588,14 +608,6 @@ function mapStreamEvent( if (!tracked) { return []; } - // Phase 8.5 โ€” emit `SessionToolCallReady` so the tool transitions - // out of `Streaming` even when the Claude SDK auto-allows without - // calling `canUseTool` (no `claudeCanUseTool` round-trip means - // `sessionPermissions` never emits Ready, the state stays in - // `Streaming`, and the subsequent `SessionToolCallComplete` is - // dropped by the reducer โ€” leaving the tool widget empty). - // When `canUseTool` DOES fire, `sessionPermissions` emits a second - // Ready that re-transitions Running โ†’ PendingConfirmation as needed. const entry = state.toolCalls.lookup(tracked.toolUseId); const info = entry?.info; if (!info) { diff --git a/src/vs/platform/agentHost/node/claude/claudeMaterializer.ts b/src/vs/platform/agentHost/node/claude/claudeMaterializer.ts index cb7a4ea23cece..1f3a24fa19e18 100644 --- a/src/vs/platform/agentHost/node/claude/claudeMaterializer.ts +++ b/src/vs/platform/agentHost/node/claude/claudeMaterializer.ts @@ -3,21 +3,27 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import type { Options, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { McpSdkServerConfigWithInstance, Options, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import { CancellationError } from '../../../../base/common/errors.js'; import { delimiter, dirname } from '../../../../base/common/path.js'; import { URI } from '../../../../base/common/uri.js'; import { rgDiskPath } from '../../../../base/node/ripgrep.js'; import { IInstantiationService } from '../../../instantiation/common/instantiation.js'; import { ILogService } from '../../../log/common/log.js'; +import { IAgentConfigurationService } from '../agentConfigurationService.js'; import { ClaudePermissionMode } from '../../common/claudeSessionConfigKeys.js'; import { resolveClaudeEffort } from '../../common/claudeModelConfig.js'; +import { PendingRequestRegistry } from '../../common/pendingRequestRegistry.js'; import { ISessionDataService } from '../../common/sessionDataService.js'; -import type { ModelSelection } from '../../common/state/protocol/state.js'; +import type { ModelSelection, ToolDefinition } from '../../common/state/protocol/state.js'; import { IAgentSessionProjectInfo } from '../../common/agentService.js'; import { IClaudeAgentSdkService } from './claudeAgentSdkService.js'; import { ClaudeAgentSession } from './claudeAgentSession.js'; +import { buildClientToolMcpServer } from './clientTools/claudeClientToolMcpServer.js'; import { IClaudeProxyHandle } from './claudeProxyService.js'; +import { readClaudePermissionMode } from './claudeSessionPermissionMode.js'; +import { SessionClientToolsDiff } from './clientTools/claudeSessionClientToolsModel.js'; /** * In-memory record for a provisional Claude session passed into @@ -32,6 +38,40 @@ export interface IClaudeMaterializeProvisional { readonly abortController: AbortController; readonly project: IAgentSessionProjectInfo | undefined; readonly model: ModelSelection | undefined; + /** + * Phase 10 โ€” the workbench-registered client-tool snapshot at + * materialize time (if any). The materializer seeds the session's + * {@link SessionClientToolsDiff.model} from this and uses it to build + * the SDK's initial `Options.mcpServers`. + */ + readonly clientTools?: readonly ToolDefinition[]; + /** + * Phase 10 โ€” the workbench `clientId` paired with {@link clientTools}. + * Used by the stream mapper to stamp `SessionToolCallStart.toolClientId`. + */ + readonly clientId?: string; +} + +/** + * Per-session bundle the agent hands to {@link ClaudeMaterializer.materialize}. + * Everything the materializer needs to bring a session up AND to rebind + * it on yield-restart โ€” the materializer attaches the rematerializer + * hook internally using this bundle, so the agent does not own any + * SDK / client-tool plumbing. + */ +export interface IMaterializeContext { + readonly provisional: IClaudeMaterializeProvisional; + readonly proxyHandle: IClaudeProxyHandle; + readonly canUseTool: NonNullable; + /** + * Fallback permission mode used when the live read from + * {@link IAgentConfigurationService} returns `undefined` (e.g. the + * session's schema has not been registered yet). The materializer + * reads the live value at materialize / rebind and falls back to + * this value so the SDK never silently downgrades to `'default'`. + */ + readonly permissionModeFallback: ClaudePermissionMode; + readonly isResume: boolean; } /** @@ -63,26 +103,32 @@ export class ClaudeMaterializer { @IClaudeAgentSdkService private readonly _sdkService: IClaudeAgentSdkService, @ISessionDataService private readonly _sessionDataService: ISessionDataService, @IInstantiationService private readonly _instantiationService: IInstantiationService, + @IAgentConfigurationService private readonly _configurationService: IAgentConfigurationService, ) { } - async materialize( - provisional: IClaudeMaterializeProvisional, - proxyHandle: IClaudeProxyHandle, - permissionMode: ClaudePermissionMode, - canUseTool: NonNullable, - startMode: 'fresh' | 'resume' = 'fresh', - ): Promise { + async materialize(ctx: IMaterializeContext): Promise { + const { provisional, proxyHandle, canUseTool, isResume, permissionModeFallback } = ctx; if (!provisional.workingDirectory) { throw new Error(`Cannot materialize Claude session ${provisional.sessionId}: workingDirectory is required`); } - const isResume = startMode === 'resume'; - const options = await this._buildOptions(provisional, proxyHandle, permissionMode, canUseTool, isResume); + // Phase 10 โ€” per-session client-tool plumbing. The MCP server's + // `tool()` handler closures capture the SAME registry + diff that + // the eventually-owned session holds, so the closures park on the + // session's live registry (council finding C1). + const pendingClientToolCalls = new PendingRequestRegistry(); + const toolDiff = new SessionClientToolsDiff(); + toolDiff.model.setTools(provisional.clientTools, provisional.clientId); + + const permissionMode = readClaudePermissionMode(this._configurationService, provisional.sessionUri) ?? permissionModeFallback; + const initialMcpServers = await this._buildClientMcpServers(toolDiff, pendingClientToolCalls); + + const options = await this._buildOptions(provisional, proxyHandle, permissionMode, canUseTool, isResume, initialMcpServers); // Trace what the SDK gets so live debugging doesn't have to infer // from the absence of a `fileEdit` block whether the edit-tracking // plumbing was wired this session. - this._logService.info(`[Claude] session ${provisional.sessionId}: enableFileCheckpointing=${options.enableFileCheckpointing} startMode=${startMode}`); + this._logService.info(`[Claude] session ${provisional.sessionId}: enableFileCheckpointing=${options.enableFileCheckpointing} isResume=${isResume}`); const warm = await this._sdkService.startup({ options }); @@ -101,8 +147,9 @@ export class ClaudeMaterializer { // the session, which disposes the ref ahead of its `WarmQuery` // abort so any in-flight write completes. const dbRef = this._sessionDataService.openDatabase(provisional.sessionUri); + let session: ClaudeAgentSession; try { - return this._instantiationService.createInstance( + session = this._instantiationService.createInstance( ClaudeAgentSession, provisional.sessionId, provisional.sessionUri, @@ -110,6 +157,9 @@ export class ClaudeMaterializer { warm, provisional.abortController, dbRef, + pendingClientToolCalls, + toolDiff, + permissionModeFallback, ); } catch (err) { // Construction failed โ€” own the cleanup so no resource leaks. @@ -117,6 +167,27 @@ export class ClaudeMaterializer { await warm[Symbol.asyncDispose](); throw err; } + + // Phase 9 โ€” wire the rematerializer so the session can rebind the + // SDK on yield-restart (e.g. after a client-tool snapshot change). + // The closure captures the ctx so `getPermissionMode` is re-read on + // each rebind and any concurrent `SessionConfigChanged` wins. + session.attachRematerializer(async (_reason) => { + const liveMode = readClaudePermissionMode(this._configurationService, provisional.sessionUri) ?? permissionModeFallback; + try { + const mcpServers = await this._buildClientMcpServers(session.toolDiff, pendingClientToolCalls); + return await this._materializeResume(provisional, proxyHandle, liveMode, canUseTool, mcpServers); + } catch (err) { + // The client-tool diff was consumed by `_buildClientMcpServers`, + // but the rebind never reached a live SDK. Re-mark dirty so the + // next send retries with the same snapshot instead of silently + // running on the stale server set. + session.toolDiff.markDirty(); + throw err; + } + }); + + return session; } /** @@ -134,18 +205,19 @@ export class ClaudeMaterializer { * the field via `this`, so swapping `_abortController` post-rebind is * sufficient). */ - async materializeResume( + private async _materializeResume( provisional: IClaudeMaterializeProvisional, proxyHandle: IClaudeProxyHandle, permissionMode: ClaudePermissionMode, canUseTool: NonNullable, + mcpServers: Record | undefined, ): Promise<{ readonly warm: WarmQuery; readonly abortController: AbortController }> { if (!provisional.workingDirectory) { throw new Error(`Cannot materialize Claude session ${provisional.sessionId}: workingDirectory is required`); } const abortController = new AbortController(); const resumedProvisional: IClaudeMaterializeProvisional = { ...provisional, abortController }; - const options = await this._buildOptions(resumedProvisional, proxyHandle, permissionMode, canUseTool, true); + const options = await this._buildOptions(resumedProvisional, proxyHandle, permissionMode, canUseTool, true, mcpServers); this._logService.info(`[Claude] session ${provisional.sessionId}: resume rebuild`); const warm = await this._sdkService.startup({ options }); return { warm, abortController }; @@ -157,6 +229,7 @@ export class ClaudeMaterializer { permissionMode: ClaudePermissionMode, canUseTool: NonNullable, isResume: boolean, + mcpServers: Record | undefined, ): Promise { const subprocessEnv = buildSubprocessEnv(); // Settings env: forwarded to the Claude subprocess via the SDK's @@ -224,12 +297,33 @@ export class ClaudeMaterializer { ...(isResume ? { resume: provisional.sessionId } : { sessionId: provisional.sessionId }), + ...(mcpServers ? { mcpServers } : {}), settingSources: ['user', 'project', 'local'], settings: { env: settingsEnv }, systemPrompt: { type: 'preset', preset: 'claude_code' }, stderr: data => this._logService.error(`[Claude SDK stderr] ${data}`), }; } + + /** + * Phase 10 โ€” consume the diff (clears its dirty bit) and build the + * in-process MCP server config from the resulting tool snapshot. + * Resolves to `undefined` when the snapshot is empty so + * `Options.mcpServers` is omitted entirely and the SDK keeps its + * default. If the build throws the diff is re-marked dirty so the + * next sendMessage retries (C6). + */ + private async _buildClientMcpServers( + toolDiff: SessionClientToolsDiff, + registry: PendingRequestRegistry, + ): Promise | undefined> { + const { tools } = toolDiff.consume(); + if (!tools || tools.length === 0) { + return undefined; + } + const server = await buildClientToolMcpServer(tools, id => registry.register(id), this._sdkService); + return { client: server }; + } } /** diff --git a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts index 4e2a625495dd0..7b6dbef8ecc3e 100644 --- a/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts +++ b/src/vs/platform/agentHost/node/claude/claudeReplayMapper.ts @@ -22,6 +22,7 @@ import { } from '../../common/state/protocol/state.js'; import { buildSubagentSessionUri } from '../../common/state/sessionState.js'; import { buildClaudeToolMeta, getClaudeInvocationMessage, getClaudePastTenseMessage, getClaudeToolDisplayName, getClaudeToolInputString } from './claudeToolDisplay.js'; +import { stripClientToolNamePrefix } from './clientTools/claudeClientToolMcpServer.js'; /** * Phase 13 โ€” replay mapper. Reduces a flat `SessionMessage[]` (the SDK's @@ -248,7 +249,11 @@ class ReplayBuilder { content: block.thinking, }); } else if (block.type === 'tool_use' && typeof block.id === 'string' && typeof block.name === 'string') { - this._openToolUse(block.id, block.name, block.input); + // Strip the in-process MCP server prefix so the workbench resolves + // the workbench-registered tool by its unprefixed name (matches the + // live stream mapper). Without this, replayed client-tool calls + // fall back to the generic "Run MCP tool" rendering. + this._openToolUse(block.id, stripClientToolNamePrefix(block.name), block.input); } // Other block types (server_tool_use, etc.) are dropped silently per M7. } diff --git a/src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts b/src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts index cd13914cd1c47..450c96a4835bb 100644 --- a/src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts +++ b/src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts @@ -36,19 +36,27 @@ export class ClaudeSdkMessageRouter extends Disposable { private readonly _editObserver: ClaudeFileEditObserver; private readonly _mapperState = new ClaudeMapperState(); + private _clientId: string | undefined; + constructor( private readonly _sessionUri: URI, dbRef: IReference, private readonly _subagents: SubagentRegistry, + clientId: string | undefined = undefined, @IInstantiationService instantiationService: IInstantiationService, @ILogService private readonly _logService: ILogService, ) { super(); + this._clientId = clientId; this._editObserver = this._register( instantiationService.createInstance(ClaudeFileEditObserver, _sessionUri.toString(), dbRef), ); } + setClientId(clientId: string | undefined): void { + this._clientId = clientId; + } + async handle(message: SDKMessage, turnId: string | undefined): Promise { if (message.type === 'assistant') { this._editObserver.observeAssistant(message); @@ -66,6 +74,7 @@ export class ClaudeSdkMessageRouter extends Disposable { this._mapperState, this._logService, this._subagents, + this._clientId, ); for (const signal of signals) { this._onDidProduceSignal.fire(signal); diff --git a/src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts b/src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts index f94f59c51ee19..be1b9d32109e5 100644 --- a/src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts +++ b/src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts @@ -111,6 +111,7 @@ export class ClaudeSdkPipeline extends Disposable { abortController: AbortController, dbRef: IReference, subagents: SubagentRegistry, + clientId: string | undefined = undefined, @IInstantiationService instantiationService: IInstantiationService, @ILogService private readonly _logService: ILogService, ) { @@ -129,7 +130,7 @@ export class ClaudeSdkPipeline extends Disposable { }), )); this._router = this._register(instantiationService.createInstance( - ClaudeSdkMessageRouter, sessionUri, dbRef, subagents, + ClaudeSdkMessageRouter, sessionUri, dbRef, subagents, clientId, )); this._register(this._router.onDidProduceSignal(s => this._onDidProduceSignal.fire(s))); // Dispose chain โ†’ abort โ†’ SDK cleanup. Reads the *current* @@ -145,6 +146,26 @@ export class ClaudeSdkPipeline extends Disposable { get isAborted(): boolean { return this._abortController.signal.aborted; } + /** + * Phase 10 \u2014 narrow public wrapper around the internal + * {@link _rebindQuery} so {@link ClaudeAgentSession.rebindForClientTools} + * can drive a yield-restart without exposing the private rebind + * machinery to every collaborator. + */ + rebindForRestart(): Promise { + return this._rebindQuery('restart'); + } + + /** + * Phase 10 โ€” update the workbench `clientId` that the stream mapper + * stamps onto subsequent `SessionToolCallStart` events. Called by the + * session whenever {@link SessionClientToolsModel} receives a new + * clientId via `setClientTools`. + */ + setClientId(clientId: string | undefined): void { + this._router.setClientId(clientId); + } + /** Attach the rematerializer hook for abort / crash recovery. Optional โ€” tests that exercise only the dispose path skip this. */ attachRematerializer(rematerializer: IRematerializer): void { this._rematerializer = rematerializer; diff --git a/src/vs/platform/agentHost/node/claude/claudeSessionPermissionMode.ts b/src/vs/platform/agentHost/node/claude/claudeSessionPermissionMode.ts new file mode 100644 index 0000000000000..aef5cfc1a8895 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/claudeSessionPermissionMode.ts @@ -0,0 +1,30 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { PermissionMode } from '@anthropic-ai/claude-agent-sdk'; +import type { URI } from '../../../../base/common/uri.js'; +import { ClaudeSessionConfigKey, narrowClaudePermissionMode } from '../../common/claudeSessionConfigKeys.js'; +import type { IAgentConfigurationService } from '../agentConfigurationService.js'; + +/** + * Read the live `permissionMode` for a session from + * {@link IAgentConfigurationService}, narrowed to the SDK's + * `PermissionMode` union. Returns `undefined` when the session's + * schema hasn't been registered or carries a value that slipped past + * schema validation โ€” callers pick the fallback (the createSession-time + * intent at materialize, `'default'` at the canUseTool gate, etc.). + * + * Called on every canUseTool entry, on every rebind, and before each + * `session.send` so a mid-turn `SessionConfigChanged` action wins over + * the materialize-time seed (plan S3.6). + */ +export function readClaudePermissionMode( + configurationService: IAgentConfigurationService, + sessionUri: URI, +): PermissionMode | undefined { + return narrowClaudePermissionMode( + configurationService.getSessionConfigValues(sessionUri.toString())?.[ClaudeSessionConfigKey.PermissionMode], + ); +} diff --git a/src/vs/platform/agentHost/node/claude/claudeSubagentSignals.ts b/src/vs/platform/agentHost/node/claude/claudeSubagentSignals.ts index 77f317ce8569d..4203de6d9c78b 100644 --- a/src/vs/platform/agentHost/node/claude/claudeSubagentSignals.ts +++ b/src/vs/platform/agentHost/node/claude/claudeSubagentSignals.ts @@ -11,6 +11,7 @@ import { ResponsePartKind, ToolCallConfirmationReason } from '../../common/state import type { ClaudeMapperState } from './claudeMapSessionEvents.js'; import { SUBAGENT_TOOL_NAMES, type SubagentRegistry } from './claudeSubagentRegistry.js'; import { buildClaudeToolMeta, getClaudeInvocationMessage, getClaudeToolDisplayName, getClaudeToolInputString } from './claudeToolDisplay.js'; +import { stripClientToolNamePrefix } from './clientTools/claudeClientToolMcpServer.js'; /** * Phase 12 โ€” SDK tool names that spawn subagent sessions. Re-exported @@ -232,7 +233,11 @@ export function emitInnerAssistantSignals( continue; } if (block.type === 'tool_use') { - state.startToolBlock(index, block.id, block.name, turnId); + // Strip the in-process MCP server prefix so subagent client-tool + // calls render with their real name (matches the top-level stream + // mapper). SDK-owned tools and Task/Agent passes through unchanged. + const toolName = stripClientToolNamePrefix(block.name); + state.startToolBlock(index, block.id, toolName, turnId); // Inner tool input arrives pre-parsed on the synthesized // `assistant` message (not via `input_json_delta` chunks), so // seed the registry directly. Without this the live @@ -241,9 +246,9 @@ export function emitInnerAssistantSignals( // always computes rich text) drifts from live โ€” violating D6. state.toolCalls.seedParsedInput(block.id, block.input); registry.noteInnerTool(block.id, parentToolUseId); - const displayName = getClaudeToolDisplayName(block.name); - const meta = buildClaudeToolMeta(block.name); - const toolInputStr = getClaudeToolInputString(block.name, block.input); + const displayName = getClaudeToolDisplayName(toolName); + const meta = buildClaudeToolMeta(toolName); + const toolInputStr = getClaudeToolInputString(toolName, block.input); signals.push({ kind: 'action', session, @@ -251,7 +256,7 @@ export function emitInnerAssistantSignals( type: ActionType.SessionToolCallStart, turnId, toolCallId: block.id, - toolName: block.name, + toolName, displayName, ...(meta ? { _meta: meta } : {}), }, @@ -263,7 +268,7 @@ export function emitInnerAssistantSignals( type: ActionType.SessionToolCallReady, turnId, toolCallId: block.id, - invocationMessage: getClaudeInvocationMessage(block.name, displayName, block.input), + invocationMessage: getClaudeInvocationMessage(toolName, displayName, block.input), ...(toolInputStr !== undefined ? { toolInput: toolInputStr } : {}), confirmed: ToolCallConfirmationReason.NotNeeded, }, diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolMcpServer.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolMcpServer.ts new file mode 100644 index 0000000000000..3a3476917fbd5 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolMcpServer.ts @@ -0,0 +1,112 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { McpSdkServerConfigWithInstance } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; +import type { IClaudeAgentSdkService } from '../claudeAgentSdkService.js'; +import { jsonSchemaToZodRawShape } from './claudeJsonSchemaToZod.js'; + +/** + * Anthropic SDK contract: the in-process MCP `tool()` handler receives a + * `RequestHandlerExtra` whose `_meta` carries the originating + * `tool_use_id` under this namespaced key. Verified empirically against + * `@anthropic-ai/claude-agent-sdk@0.2.128`. If a future SDK version drops + * or renames this field, the handler returns an error result instead of + * silently deadlocking โ€” see {@link extractToolUseId}. + */ +const TOOL_USE_ID_META_KEY = 'claudecode/toolUseId'; + +/** + * Build the per-session in-process MCP server that surfaces the workbench + * client's {@link ToolDefinition}s to the Claude SDK via + * `Options.mcpServers['client']`. + * + * Each tool's handler reads the originating `tool_use_id` from the SDK's + * `extra._meta["claudecode/toolUseId"]` and delegates to `awaitResult`, + * which is expected to return a promise that settles when the workbench + * echoes a completion (typically via a parked deferred owned by the + * session). Keeping that plumbing behind a callback lets this factory + * stay ignorant of how the host tracks in-flight tool calls. + * + * Pure factory โ€” no SDK loading, no I/O. + */ +export async function buildClientToolMcpServer( + snapshot: readonly ToolDefinition[], + awaitResult: (toolUseId: string) => Promise, + sdk: IClaudeAgentSdkService +): Promise { + const tools = await Promise.all(snapshot.map(def => sdk.tool( + def.name, + def.description ?? '', + jsonSchemaToZodRawShape(def.inputSchema), + async (_args, extra) => { + const toolUseId = extractToolUseId(extra); + if (toolUseId === undefined) { + return { + content: [{ + type: 'text', + text: `Client tool "${def.name}" could not run: SDK omitted tool_use_id (expected at extra._meta["${TOOL_USE_ID_META_KEY}"]).`, + }], + isError: true, + }; + } + return awaitResult(toolUseId); + } + ))); + return sdk.createSdkMcpServer({ name: CLAUDE_CLIENT_MCP_SERVER_NAME, tools }); +} + +/** + * Recover the SDK-supplied `tool_use_id` from the MCP `tool()` handler's + * `extra` argument. Returns `undefined` (and the handler degrades to an + * error result) if the SDK ever drops the meta field โ€” preferable to + * deadlocking the call. + */ +export function extractToolUseId(extra: unknown): string | undefined { + if (!extra || typeof extra !== 'object') { + return undefined; + } + const meta = (extra as { _meta?: unknown })._meta; + if (!meta || typeof meta !== 'object') { + return undefined; + } + const value = (meta as Record)[TOOL_USE_ID_META_KEY]; + return typeof value === 'string' ? value : undefined; +} + +/** + * Name of the single in-process MCP server we register client tools on. + * Exported so the stream mapper can strip the SDK's `mcp____` name + * prefix before stamping `SessionToolCallStart.toolClientId`. + */ +export const CLAUDE_CLIENT_MCP_SERVER_NAME = 'client'; + +/** + * Per the Anthropic SDK's MCP server naming convention, an in-process MCP + * server registered as `Options.mcpServers[]` surfaces its + * tools to the model with names of the form `mcp____`. + * The stream mapper must strip the prefix before the workbench (which + * registered tools by their unprefixed `ToolDefinition.name`) can + * recognize them as client tools. + * + * Returns the input unchanged when it doesn't carry the prefix (SDK-owned + * tools, subagent spawn tools, etc.) so non-client-tool calls flow + * through without interference. + */ +export function stripClientToolNamePrefix(toolName: string): string { + const prefix = `mcp__${CLAUDE_CLIENT_MCP_SERVER_NAME}__`; + return toolName.startsWith(prefix) ? toolName.slice(prefix.length) : toolName; +} + +/** + * Whether the SDK-emitted tool name carries the in-process MCP server + * prefix, i.e. the tool is one of the workbench's client-provided tools. + * Used by the stream mapper to set `SessionToolCallStart.toolClientId` so + * the workbench takes the client-tool invocation branch. + */ +export function hasClientToolNamePrefix(toolName: string): boolean { + return toolName.startsWith(`mcp__${CLAUDE_CLIENT_MCP_SERVER_NAME}__`); +} diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolResult.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolResult.ts new file mode 100644 index 0000000000000..0ef905879d417 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeClientToolResult.ts @@ -0,0 +1,83 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { ToolResultContentType, type ToolCallResult, type ToolResultContent, type ToolResultEmbeddedResourceContent } from '../../../common/state/protocol/channels-session/state.js'; + +/** + * Stable URI scheme used when an `EmbeddedResource` content block must be + * shipped to the MCP server as the `{ type: 'resource', resource: { uri, mimeType, blob } }` + * shape. The MCP zod schema rejects resource blocks without a URI, and the + * protocol's `{ data, contentType }` carries no URI of its own โ€” so we mint + * an ephemeral, per-call URI of the form + * `claude-client:///`. This URI is opaque to both sides; + * the model never dereferences it. + */ +const CLAUDE_CLIENT_RESOURCE_SCHEME = 'claude-client'; + +/** + * Convert a protocol {@link ToolCallResult} into the MCP {@link CallToolResult} + * shape the Anthropic SDK feeds back to the model. Mostly 1:1 mapping with + * one shape transform: protocol `EmbeddedResource { data, contentType }` โ†’ + * MCP `{ type: 'image', ... }` for `image/*`, or + * `{ type: 'resource', resource: { uri, mimeType, blob } }` for everything + * else (synthesized URI per the scheme above). + * + * Unknown content block kinds collapse to a synthesized text block + a + * console warn โ€” bounded blast radius (one bad call, not a crash). + * + * @param toolUseId The SDK `tool_use_id` for this call. Embedded into the + * synthesized resource URI so the same call's blocks stay disambiguated + * across parallel tool calls. + */ +export function convertToolCallResult(result: ToolCallResult, toolUseId: string): CallToolResult { + const blocks = result.content ?? []; + const content = blocks.map((block, index) => convertBlock(block, toolUseId, index)); + const out: CallToolResult = { content }; + if (result.structuredContent !== undefined) { + out.structuredContent = result.structuredContent; + } + if (!result.success || result.error) { + out.isError = true; + } + return out; +} + +function convertBlock(block: ToolResultContent, toolUseId: string, index: number): CallToolResult['content'][number] { + switch (block.type) { + case ToolResultContentType.Text: + return { type: 'text', text: block.text }; + case ToolResultContentType.EmbeddedResource: + return convertEmbeddedResource(block, toolUseId, index); + default: { + // Unknown / unsupported block (Resource, FileEdit, Terminal, Subagent). + // MCP doesn't model these client-tool-result shapes natively, so we + // degrade to a stringified text block to keep the call observable. + console.warn(`[Claude] convertToolCallResult: unsupported tool-result block kind '${(block as ToolResultContent).type}'; degrading to text`); + let text: string; + try { + text = JSON.stringify(block); + } catch { + text = `[unserializable ${(block as ToolResultContent).type} block]`; + } + return { type: 'text', text }; + } + } +} + +function convertEmbeddedResource( + block: ToolResultEmbeddedResourceContent, + toolUseId: string, + index: number +): CallToolResult['content'][number] { + if (block.contentType.startsWith('image/')) { + return { type: 'image', data: block.data, mimeType: block.contentType }; + } + const uri = `${CLAUDE_CLIENT_RESOURCE_SCHEME}://${encodeURIComponent(toolUseId)}/${index}`; + return { + type: 'resource', + resource: { uri, mimeType: block.contentType, blob: block.data }, + }; +} diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts new file mode 100644 index 0000000000000..e8dd1eb52e0e3 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeJsonSchemaToZod.ts @@ -0,0 +1,135 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { z, type ZodTypeAny } from 'zod'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; + +/** + * Converts the narrow JSON Schema subset that + * {@link ToolDefinition.inputSchema} carries into the `Record` + * "raw shape" expected by the Anthropic SDK's `tool()` factory + * (`AnyZodRawShape`). + * + * Per-property failures fall back to {@link z.any} so a single malformed + * property never rejects an entire tool โ€” clients always get *some* tool + * surface to bind against. + */ +export function jsonSchemaToZodRawShape( + inputSchema: ToolDefinition['inputSchema'] | undefined +): Record { + if (!inputSchema || inputSchema.type !== 'object' || !inputSchema.properties) { + return {}; + } + const required = new Set(inputSchema.required ?? []); + const shape: Record = {}; + for (const [name, raw] of Object.entries(inputSchema.properties)) { + let zodType: ZodTypeAny; + try { + zodType = jsonPropertyToZod(raw as JsonSchemaProperty); + } catch { + zodType = z.any(); + } + if (!required.has(name)) { + zodType = zodType.optional(); + } + shape[name] = zodType; + } + return shape; +} + +interface JsonSchemaProperty { + type?: string | string[]; + description?: string; + default?: unknown; + nullable?: boolean; + enum?: unknown[]; + oneOf?: JsonSchemaProperty[]; + anyOf?: JsonSchemaProperty[]; + items?: JsonSchemaProperty; + properties?: Record; + required?: string[]; +} + +function jsonPropertyToZod(prop: JsonSchemaProperty): ZodTypeAny { + if (!prop || typeof prop !== 'object') { + return z.any(); + } + + let base: ZodTypeAny; + + if (Array.isArray(prop.enum) && prop.enum.length > 0) { + const literals = prop.enum.filter(v => + typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean' || v === null + ) as (string | number | boolean | null)[]; + if (literals.length === 0) { + base = z.any(); + } else if (literals.length === 1) { + base = z.literal(literals[0] as Exclude); + } else { + base = z.union(literals.map(v => z.literal(v as Exclude)) as unknown as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]]); + } + } else if (Array.isArray(prop.oneOf) && prop.oneOf.length > 0) { + base = unionOf(prop.oneOf); + } else if (Array.isArray(prop.anyOf) && prop.anyOf.length > 0) { + base = unionOf(prop.anyOf); + } else { + const type = Array.isArray(prop.type) ? prop.type[0] : prop.type; + switch (type) { + case 'string': + base = z.string(); + break; + case 'number': + base = z.number(); + break; + case 'integer': + base = z.number().int(); + break; + case 'boolean': + base = z.boolean(); + break; + case 'array': + base = z.array(prop.items ? jsonPropertyToZod(prop.items) : z.any()); + break; + case 'object': { + const sub: Record = {}; + const subRequired = new Set(prop.required ?? []); + for (const [n, p] of Object.entries(prop.properties ?? {})) { + let t: ZodTypeAny; + try { t = jsonPropertyToZod(p); } catch { t = z.any(); } + if (!subRequired.has(n)) { t = t.optional(); } + sub[n] = t; + } + base = z.object(sub); + break; + } + case 'null': + base = z.null(); + break; + default: + base = z.any(); + } + } + + if (prop.nullable) { + base = base.nullable(); + } + if (prop.description) { + base = base.describe(prop.description); + } + if (prop.default !== undefined) { + base = base.default(prop.default as never); + } + return base; +} + +function unionOf(schemas: JsonSchemaProperty[]): ZodTypeAny { + const variants = schemas.map(s => { + try { return jsonPropertyToZod(s); } catch { return z.any(); } + }); + if (variants.length === 1) { + return variants[0]; + } + return z.union(variants as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]]); +} diff --git a/src/vs/platform/agentHost/node/claude/clientTools/claudeSessionClientToolsModel.ts b/src/vs/platform/agentHost/node/claude/clientTools/claudeSessionClientToolsModel.ts new file mode 100644 index 0000000000000..0527deb299101 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/clientTools/claudeSessionClientToolsModel.ts @@ -0,0 +1,154 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { Disposable } from '../../../../../base/common/lifecycle.js'; +import { equals } from '../../../../../base/common/objects.js'; +import { autorun, IObservable, ISettableObservable, observableValueOpts } from '../../../../../base/common/observable.js'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; + +/** + * Combined snapshot of the workbench-registered client-tool definitions + * and the workbench `clientId` that owns them. The two travel as one + * value so consumers can read both with a single `.get()` and so that an + * update to either field is observed as a single change. + */ +export interface ISessionClientToolsState { + readonly tools: readonly ToolDefinition[] | undefined; + readonly clientId: string | undefined; +} + +const INITIAL_STATE: ISessionClientToolsState = { tools: undefined, clientId: undefined }; + +/** + * Pure state holder for the workbench-registered client-tool snapshot + * and the workbench `clientId` that owns it. Exposes the pair as a + * single {@link IObservable} so consumers can react to changes without + * polling. + * + * The `state` observable dedupes structurally-equivalent writes: tool + * snapshots compare on `name + description + inputSchema` + * (order-insensitive, `undefined` equivalent to `[]`); `clientId` compares strictly. + * A re-send of the same `(tools, clientId)` pair therefore does NOT fire + * downstream subscribers. + * + * Knows nothing about diffing or the SDK โ€” pair with + * {@link SessionClientToolsDiff} to track "has the snapshot changed + * since the last successful SDK build". + */ +export class SessionClientToolsModel { + + private readonly _state: ISettableObservable = observableValueOpts( + { owner: this, equalsFn: stateEqual }, + INITIAL_STATE, + ); + readonly state: IObservable = this._state; + + setTools(tools: readonly ToolDefinition[] | undefined, clientId?: string): void { + const current = this._state.get(); + this._state.set({ + tools, + clientId: clientId ?? current.clientId, + }, undefined); + } +} + +/** + * Tracks "has {@link SessionClientToolsModel.state} changed since the + * last successful {@link build}?". Subscribes to the model's observable + * and flips a private dirty bit on every change; {@link build} captures + * the current snapshot, hands it to the supplied builder, and clears + * the bit on success โ€” preserving the C6 pin invariant from the + * previous `ClientToolDiff` implementation: a `setTools` call that + * races the in-flight builder re-flips the bit via the autorun, so the + * next sendMessage detects the stale set and triggers another rebind. + * + * On builder throw the bit is left set โ€” the SDK is still running with + * the previous snapshot, so the next sendMessage should retry. + */ +export class SessionClientToolsDiff extends Disposable { + + readonly model: SessionClientToolsModel = new SessionClientToolsModel(); + + private _dirty = false; + // `autorun` invokes its callback once at registration for dependency + // tracking. Skip that initial run so a brand-new diff doesn't report + // dirty before any `setTools` has happened. + private _ignoreNextFire = true; + + constructor() { + super(); + this._register(autorun(reader => { + this.model.state.read(reader); + if (this._ignoreNextFire) { + this._ignoreNextFire = false; + return; + } + this._dirty = true; + })); + } + + get hasDifference(): boolean { + return this._dirty; + } + + /** + * Read the current state and mark it as the applied snapshot. A + * subsequent {@link SessionClientToolsModel.setTools} re-flips dirty + * via the autorun, so callers do NOT need to compare snapshots + * themselves to detect a race. If the caller's downstream work + * (e.g. SDK rebuild) fails, call {@link markDirty} to surface the + * stale state so the next sendMessage retries. + */ + consume(): ISessionClientToolsState { + const state = this.model.state.get(); + this._dirty = false; + return state; + } + + /** + * Force the dirty bit on. Use when a caller's async work that + * followed {@link consume} failed and the SDK is therefore still on + * the previous snapshot. + */ + markDirty(): void { + this._dirty = true; + } +} + +function stateEqual(a: ISessionClientToolsState, b: ISessionClientToolsState): boolean { + return a.clientId === b.clientId && snapshotsEqual(a.tools, b.tools); +} + +/** + * Deep-equal two client-tool snapshots on `name + description + inputSchema`. + * `undefined` and `[]` compare equal. Order-insensitive. + */ +function snapshotsEqual( + a: readonly ToolDefinition[] | undefined, + b: readonly ToolDefinition[] | undefined +): boolean { + const aa = a ?? []; + const bb = b ?? []; + if (aa.length !== bb.length) { + return false; + } + const byName = new Map(); + for (const t of aa) { + byName.set(t.name, t); + } + for (const t of bb) { + const prev = byName.get(t.name); + if (!prev) { + return false; + } + if (prev.description !== t.description) { + return false; + } + if (!equals(prev.inputSchema, t.inputSchema)) { + return false; + } + } + return true; +} diff --git a/src/vs/platform/agentHost/node/claude/phase10-plan.md b/src/vs/platform/agentHost/node/claude/phase10-plan.md new file mode 100644 index 0000000000000..f8727ca243e8e --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/phase10-plan.md @@ -0,0 +1,834 @@ +# Phase 10 โ€” Client-provided tools (in-process MCP) + +> Generated by super-planner. Source: `roadmap.md` (Phase 10). +> Last updated: 2026-05-19 after second council review + post-refactor grilling (Q8โ€“Q12 resolved: SDK service wrap, rebind ordering, ephemeral URI, fake-only tests, provisional mutability). + +**Status:** โœ… **DONE** (steps 1โ€“10 landed; council review + post-rebase fixes for the materialize / resume `setClientTools` race and rebind-failure dirty-bit gap; 1833 agentHost tests passing; E2E verified โ€” session resume + persisted `client__*` MCP tool roundtrips visible in the launched Agents window) + +## Goal + +Let a workbench client register custom tools on a Claude session via +`IAgent.setClientTools(session, clientId, tools)`, surface them through the +SDK's `Options.mcpServers` in-process MCP path, invoke them when the model +calls them, and route the result back through +`IAgent.onClientToolCallComplete(session, toolCallId, result)` โ€” matching the +behavior the Copilot agent already ships. + +## Scope + +**In scope** +- `setClientTools` / `onClientToolCallComplete` real implementations on + `ClaudeAgent` (replace the Phase-10 stubs at + [claudeAgent.ts:1068, 1072](src/vs/platform/agentHost/node/claude/claudeAgent.ts)). +- A `clientTools: readonly ToolDefinition[] | undefined` field on the + per-session objects (`IClaudeProvisionalSession` pre-materialize, + `ClaudeAgentSession` post-materialize), with the provisional value + transferred at materialize time โ€” same pattern `model` and + `permissionMode` already follow. No parallel Map keyed by sessionId. +- A JSON Schema โ†’ Zod raw-shape converter for `ToolDefinition.inputSchema`, + scoped to the protocol's actual subset. +- Building one in-process `createSdkMcpServer({ name: 'client', tools })` at + every materialize / resume / yield-restart, written into `Options.mcpServers` + via `claudeMaterializer._buildOptions`. +- Deferred-promise plumbing on `ClaudeAgentSession` for each in-flight MCP + tool call, keyed by SDK `tool_use_id`. Reuses the existing + [`PendingRequestRegistry`](src/vs/platform/agentHost/common/pendingRequestRegistry.ts) + primitive (already wired for Phase 7 permission round-trips) plus a new + `ClaudeToolUseIdBridge` that resolves `(toolName, args) โ†’ tool_use_id` + from Phase 7's `tool_use` stream events โ€” necessary because the + Anthropic SDK's `tool()` handler signature is `(args, extra: unknown)` + and does NOT pass `tool_use_id` (verified against sdk.mjs). +- Tool-set diff (deep-equal on `name + description + inputSchema`) checked in + `sendMessage`'s materialized-session branch; mismatch โ†’ yield-restart via + the existing `_rebindQuery('restart')` path in `claudeSdkPipeline`. +- `onClientToolCallComplete` subagent routing: `parseSubagentSessionUri` โ†’ + walk to root โ†’ look up registry on the root session id (mirrors + [copilotAgent.ts:943-954](src/vs/platform/agentHost/node/copilot/copilotAgent.ts)). +- Result type conversion: protocol `ToolCallResult` โ†’ MCP `CallToolResult` + (`content`, `structuredContent`, `isError`). +- Test coverage in `claudeAgent.test.ts`: registration โ†’ invocation โ†’ + completion round-trip, subagent routing, tool-diff restart, unknown + toolCallId is a benign no-op. + +**Out of scope** +- `Query.setMcpServers` runtime hot-swap (external HTTP MCP servers) โ€” that's + **Phase 11** territory. Phase 10 only writes `Options.mcpServers` at startup + and uses yield-restart for the in-process tool path. +- MCP gateway (`_gateway` / `_gatewayIdleTimeout`) lifecycle. **Roadmap + correction (see Decisions):** the production extension's `_gateway` field is + the VS Code `McpGateway` adapter exposing external editor MCP servers over + HTTP โ€” it has nothing to do with in-process tools built from + `createSdkMcpServer`. There are no external resources to idle out here. +- `setClientCustomizations` / `setCustomizationEnabled` (Phase 11). +- `reloadPlugins` (Phase 11; orthogonal to client tools per roadmap). +- Workbench-side rendering changes for client-tool cards. + +## Prerequisites + +- Phase 9 yield-restart machinery shipped (`_rebindQuery('restart')` in + [claudeSdkPipeline.ts:342-399](src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts)). +- Phase 12 subagent URI helpers shipped (`parseSubagentSessionUri` is already + imported by `claudeAgent.ts`). +- Phase 13 resume / cross-window resume path shipped (`_resumeSession` is the + second integration point for `Options.mcpServers`, alongside + `_materializeProvisional`). +- `zod` is available at the platform layer (it's a root dep). + +## Approach + +Store client-tool state on the per-session objects that already existโ€”the +provisional record (`IClaudeProvisionalSession.clientTools`) and +`ClaudeAgentSession.clientTools`โ€”rather than introducing a parallel +Map-keyed-by-sessionId on `ClaudeAgent`. `setClientTools(session, clientId, +tools)` writes to whichever exists; at materialize, the provisional's +`clientTools` flow into the new `ClaudeAgentSession`. + +**Construction order (council finding C1)**: the in-process MCP server's +`tool()` handler closures capture the session's `_pendingClientToolCalls` +and `_toolUseIdBridge`. These two collaborators are therefore constructed +on the agent side BEFORE materialize and passed INTO the materializer +alongside `clientTools`, so the server's handler closures bind to the +same instances the eventually-constructed `ClaudeAgentSession` will own. +The materializer hands them to the session at construction time. This +avoids the order trap of "server built first, session created second, +closures captured the wrong objects." + +The session owns: +- `clientTools: readonly ToolDefinition[] | undefined` (mutable via + `setClientTools(tools)`). +- `appliedClientToolSnapshot: readonly ToolDefinition[] | undefined` + (stale-detection for restart-required tool-set changes). +- `_pendingClientToolCalls: PendingRequestRegistry` + (deferred handlers). +- `_toolUseIdBridge: ClaudeToolUseIdBridge` (rendezvous between Phase 7's + `tool_use` stream events and the MCP handler). + +At every `materialize` / `resume` / yield-restart, the agent builds a +fresh `Options.mcpServers` from `session.clientTools` (or +`provisional.clientTools` pre-materialize) and passes it through the +materializer. Tool-set changes between turns trigger +`_rebindQuery('restart')` at the sendMessage yield boundary; never +`Query.setMcpServers`. Cleanup is automatic via +`ClaudeSessionEntry.dispose()`. + +## Steps + +1. โœ“ **JSON Schema โ†’ Zod raw-shape converter.** + - Files: `src/vs/platform/agentHost/node/claude/claudeJsonSchemaToZod.ts` (new), + `src/vs/platform/agentHost/test/node/claudeJsonSchemaToZod.test.ts` (new). + - Depends on: none. + - Scope: convert `{ type:'object', properties?: Record, required?: string[] }` + into a `Record` (the SDK's `AnyZodRawShape`). Handle + `string` / `number` / `boolean` / `integer` / `array` / nested `object` + / enum / oneOf, plus `description`, `default`, `nullable`. Unknown + property schemas fall back per-property to `z.any()` โ€” never reject the + whole tool. `required` controls `.optional()` on each property. + - Done when: unit tests cover the supported subset + the fallback path, + and a malformed schema produces a benign `z.any()` tool rather than + throwing. + +2. โœ“ **Per-session state + extracted public helpers.** + - Files: + `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (add + `clientTools?: readonly ToolDefinition[]` to + `IClaudeProvisionalSession`), + `src/vs/platform/agentHost/node/claude/claudeClientToolHelpers.ts` (new โ€” + `snapshotEquals(a, b): boolean` deep-equal on `name + description + inputSchema`, + mirroring [copilotAgent.ts:2073-2087](src/vs/platform/agentHost/node/copilot/copilotAgent.ts) + `ActiveClient.isOutdated`), + `src/vs/platform/agentHost/node/claude/claudeClientToolResult.ts` (new โ€” + `convertToolCallResult(result: ToolCallResult): CallToolResult` doing the + 1:1 content-block mapping, including the protocol + `EmbeddedResource { data, contentType }` โ†’ MCP + `{ type:'resource', resource:{ uri, mimeType, blob|text } }` shape + transform; **not** a thin pass-through), + `src/vs/platform/agentHost/node/claude/claudeAgentSession.ts` (add + `clientTools` + `appliedClientToolSnapshot` fields + `setClientTools(tools)` / + `completeClientToolCall(toolCallId, result)` thin delegates). + - Depends on: none. + - Scope: `snapshotEquals` and `convertToolCallResult` are exported + standalone functions per the project's testability rule โ€” they + carry the only non-trivial pure logic in this phase and can be + covered without instantiating a session. The session class stays + a thin orchestrator. No new class hierarchy. + - Done when: unit tests cover `snapshotEquals` against name / + description / schema mutations + `convertToolCallResult` against + text / error / image / embedded-resource (with the shape transform + verified) / unknown-block fallback. + +3. โœ“ **`buildClientToolMcpServer` factory + `ClaudeToolUseIdBridge`.** + - Files: `src/vs/platform/agentHost/node/claude/claudeClientToolMcpServer.ts` (new), + `src/vs/platform/agentHost/node/claude/claudeToolUseIdBridge.ts` (new), + `src/vs/platform/agentHost/test/node/claudeToolUseIdBridge.test.ts` (new). + - Depends on: steps 1, 2. + - Scope: + - **Factory**: pure factory that, given the SDK's + `createSdkMcpServer` / `tool` functions (passed in โ€” the SDK is + lazy-loaded so we cannot import them statically), a + `ToolDefinition[]` snapshot, the session's bridge, and the + session's `PendingRequestRegistry`, returns + `{ client: McpSdkServerConfigWithInstance }`. Per-tool handler: + ``` + async (args) => { + const toolUseId = await bridge.resolve(toolName, args); + return registry.registerAndFire(toolUseId, () => session.emitToolCallStart(toolUseId, toolName, args)); + } + ``` + The handler does NOT touch SDK `extra` (verified above that it + carries no `tool_use_id`). + - **Bridge** (`ClaudeToolUseIdBridge`): per-session helper that + receives `tool_use` events from Phase 7's stream mapper via + `noteToolUseEvent(toolUseId, toolName, args, parentToolUseId)` + and resolves handler-side queries via + `resolve(toolName, args): Promise`. + Either side can arrive first โ€” if the event is already pending, + `resolve` returns synchronously; otherwise it parks on a + per-`(toolName, canonicalStringify(args))` FIFO queue. + - **Bridge key carries `parent_tool_use_id` discriminator** + (council finding): subagent and parent can both call the same + tool with identical args concurrently. The mapper already + threads `parent_tool_use_id` through every event + ([claudeMapSessionEvents.ts:232](src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts#L232)); + pass it through to `noteToolUseEvent`. The handler can't query + by parent (it doesn't know), so the resolve side keys only on + `(name, args)` but the bridge maintains parent-disambiguated + FIFO queues internally and prefers same-parent matches when + both are pending. Simpler and safer than the unkeyed version. + - **`canonicalStringify(value)`** (~15 lines, same file): walks + the value and emits `JSON.stringify` with **recursively sorted + object keys**, so insertion-order differences between the SDK's + JSON parse of the streamed `tool_use.input` and the args object + the SDK hands to the handler can't desync the rendezvous. + Numbers, arrays, strings, booleans, null pass through unchanged. + Defensive: a value with circular refs or `BigInt` falls back to + a sentinel key + a warn log so the bridge never throws inside a + hot path. + - **Parallel `tool_use` blocks with identical `(name, args)`** are + matched FIFO; arbitrary assignment is safe because each + physical call still gets the right *kind* of result back via the + tool_use_id the workbench echoes. + - On session dispose / rebind, the bridge rejects every parked + `resolve()` with `CancellationError`. + - **Wiring (council finding C2 โ€” mapper DI)**: the mapper currently + receives `session: URI`, not a `ClaudeAgentSession` instance + ([claudeMapSessionEvents.ts:213](src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts#L213), + [claudeSdkMessageRouter.ts:62](src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts#L62)). + This phase adds **one new parameter** through the routerโ†’mapper + boundary: a `bridge: ClaudeToolUseIdBridge | undefined` (undefined + for subagent transcripts and replay paths where no live bridge + exists). The router passes `this._session?.bridge` from + `ClaudeSdkMessageRouter`; the mapper calls + `bridge?.noteToolUseEvent(toolUseId, name, args, parentToolUseId)` + at `content_block_stop` after `finalizeToolBlock` โ€” the input + deltas are assembled by then. Priming at `content_block_start` + would lose the input. + - **Subagent tool_use blocks** (where `parent_tool_use_id !== null`) + prime the **parent** session's bridge โ€” subagents run inside the + parent's SDK subprocess and share the parent's in-process MCP + server, so the handler-side lookup uses the parent's + registry/bridge transparently. SDK-owned tools (name not in the + parent's `clientTools`) skip the prime (cheap predicate). + - **Timing guarantee**: the Anthropic SDK emits + `content_block_stop` BEFORE invoking the MCP `tools/call`, so the + event-first arrival is the normal case and the handler-first + branch is purely defensive. The asymmetric design (FIFO queue + on either side) covers it regardless of ordering. + - Done when: + - Unit tests cover bridge get-or-create from both sides (event-first, + handler-first), FIFO matching for parallel duplicate calls + (including same-parent and different-parent variants), and + `CancellationError` on `cancelAndDispose`. `noteToolUseEvent` on + a disposed bridge is a benign no-op + warn log. + - A unit test against **stub `createSdkMcpServer` / `tool`** factory + args (passed in โ€” the helper is pure logic, lazy-load is the SDK + service's concern, not this file's) asserts: handler closure + captures `(bridge, registry)`, schema is converted, tools by name + are present. No real SDK module load required. + - `FakeClaudeAgentSdkService.buildClientToolMcpServer` returns a + stub config that records its inputs, so + `claudeAgent.test.ts` can assert + `sdk.capturedStartupOptions[0].mcpServers` equals the fake's + config after first materialize. Real-SDK loading is exercised + only in the live E2E scenario (Verification). + +4. โœ“ **Implement `ClaudeAgent.setClientTools`.** + - Files: `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (replace stub + at [L1068](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L1068)). + - Depends on: step 2. + - Scope: + - Resolve `sessionId = AgentSession.id(session)`. + - If `_provisionalSessions.has(sessionId)`: mutate the provisional + record's `clientTools` field (it's `readonly` in the interface + but mutated through a sibling setter for parity with how the + record already handles `model`). + - Else if `_sessions.get(sessionId)`: call + `session.setClientTools(tools)` (the materialized session owns its + own field). + - Else: silently drop (unknown session id โ€” same semantics as + `respondToPermissionRequest` on a stale id today). + - **Do not** restart the live `Query` here; `Options.mcpServers` is + startup-only. The pending change is detected at the next + sendMessage's diff check (step 6). + - Done when: a unit test covers all three branches (provisional / + materialized / unknown) and confirms that registration on a + provisional session flows into `Options.mcpServers` at the first + materialize. + +5. โœ“ **Inject `Options.mcpServers` at materialize / resume.** + - Files: + `src/vs/platform/agentHost/node/claude/claudeMaterializer.ts` (modify), + `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (modify + `_materializeProvisional` ~L469 and `_resumeSession` ~L601). + - Depends on: steps 2, 3. + - Scope: + - Extend `materialize` / `materializeResume` to accept an optional + `mcpServers: Record | undefined`. + - `_buildOptions` writes `options.mcpServers = mcpServers` only when + defined (otherwise omit โ€” the SDK's default `mcpServers: undefined` + is fine). + - **`_materializeProvisional`** flow (council finding C1): + 1. Read `const snapshot = provisional.clientTools`. + 2. **Build the session's `_pendingClientToolCalls` and + `_toolUseIdBridge` early** (the constructors are pure; + `ClaudeAgentSession`'s field initializers run before any + materialize logic, OR factor them as `ClaudeAgent`-side + per-session collaborators that get handed into the session + constructor). Choice between the two is an implementer + decision; whichever ordering avoids closure-binding the wrong + instances. + 3. Build the MCP server via `buildClientToolMcpServer(snapshot, + bridge, registry)` โ€” the handler closures now capture + instances the eventually-owned session will see. + 4. Pass the resulting `mcpServers` into `materialize(...)`. + 5. After materialize returns: transfer the SAME `snapshot` (NOT + a re-read of `provisional.clientTools` โ€” council finding C6: + `setClientTools` is not serialized through the session + sequencer, so a re-read could observe a newer snapshot Y + while the SDK is running with snapshot X) into + `session.clientTools` and set + `session.appliedClientToolSnapshot = snapshot ?? []` BEFORE + attaching the rematerializer. This guarantees the first + sendMessage's diff check uses the snapshot the SDK actually + started with. + - **`_resumeSession`**: cross-window resumes start with + `session.clientTools = undefined`; the workbench re-issues + `setClientTools` on active-client re-attach. First sendMessage + sees the registered tools and rebinds. (Correct โ€” prior window's + snapshot is not trustworthy.) + - **Rematerializer closure scope (council finding C3)**: the + existing closure captures `provisional` + ([claudeAgent.ts:488](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L488)), + but `provisional` is deleted after materialize commits + ([:622](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L622)). + The rematerializer must close over the **session** (assigned + after the materializer returns) to re-read `session.clientTools` + on yield-restart. Concretely: assign `const session = await + this._materializer.materialize(...)` then attach the + rematerializer with `session` in scope so it can call + `session.clientTools` AND `this._readSessionPermissionMode(sessionUri)` + on each rebind. Also update `session.appliedClientToolSnapshot` + inside the closure on every rebind so stale-detection stays + honest after crash-recovery rebinds (caveat: for the + `'recover'` path with no intervening `setClientTools`, this is a + no-op write; harmless). + - Done when: an integration test asserts + `sdk.capturedStartupOptions[0].mcpServers` is non-undefined and carries + the registered tool names; a second test asserts no spurious restart + fires when sendMessage is called immediately after materialize with no + intervening `setClientTools`; a third test races `setClientTools` to + fire during the `await materialize` and asserts the + `appliedClientToolSnapshot` equals the snapshot the SDK started + with (not the post-race snapshot). +6. โœ“ **Tool-diff check + yield-restart in `sendMessage` (rebind-or-forward, exclusive).** + - Files: `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (modify + `sendMessage` ~L909-973). + - Depends on: steps 2, 5. + - Scope: + - Compute `currentSnapshot = session.clientTools`. + - If `!ClaudeAgentSession.snapshotEquals(session.appliedClientToolSnapshot, currentSnapshot)`: + `await session.rebindForClientTools(currentSnapshot)` and **skip the + existing `setPermissionMode` forward**. The rebind path already + re-reads live permissionMode via the rematerializer + ([claudeAgent.ts:491](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L491)) + and `_rebindQuery` re-applies bijective state via `_replayCurrentConfig` + ([claudeSdkPipeline.ts:397](src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts#L397)) โ€” + a post-rebind `setPermissionMode` would be redundant. + - Else (no tool-set change): the existing post-#317248 forward runs + unchanged: + ```ts + const liveMode = this._readSessionPermissionMode(sessionUri); + if (liveMode !== undefined) { + await session.setPermissionMode(liveMode); + } + ``` + - Then continue to `session.send(...)` so the next turn sees the new + tools (or just the new prompt). + - The whole block stays INSIDE `_sessionSequencer.queue` so two + concurrent sends collapse to one rebind + two ordered sends. + - Done when: + - A test stages tool set A โ†’ send โ†’ tool set B โ†’ send, asserts + `sdk.startupCallCount === 2` and the second + `capturedStartupOptions[1].mcpServers` reflects set B. + - A second test pins the no-double-apply property: live permissionMode + set to `'plan'`, tool-diff fires, the new Query's + `recordedPermissionModes` contains exactly one `'plan'` (from + `_replayCurrentConfig`), not two. + +7. โœ“ **Implement `ClaudeAgent.onClientToolCallComplete`.** + - Files: `src/vs/platform/agentHost/node/claude/claudeAgent.ts` (replace + stub at [L1072](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L1072)). + - Depends on: step 3. + - Scope: + - **Walk subagent URIs to the root with a `while` loop** (council + finding): nested subagents (subagent-of-subagent) need iterated + `parseSubagentSessionUri` calls, mirroring + [copilotAgent.ts:947-949](src/vs/platform/agentHost/node/copilot/copilotAgent.ts#L947). + Single-call parsing handles only depth-1. + - Look up the live `ClaudeAgentSession` from `_sessions`. + - Call `session.completeClientToolCall(toolCallId, convertToolCallResult(result))`. + - **Silent on miss** โ€” the action dispatcher + ([agentSideEffects.ts:851-854](src/vs/platform/agentHost/node/agentSideEffects.ts)) + forwards EVERY `SessionToolCallComplete` envelope including SDK-owned + tools (Read/Write/Bash etc.), so unknown ids are expected and must + not throw. + - Done when: tests cover: unknown tool id no-throw / no-warn; in-flight + client tool id resolves deferred with converted result; **nested + subagent URI walks two levels to the root and resolves**. + +8. โœ“ **Per-session deferred registry on `ClaudeAgentSession` (reuse `PendingRequestRegistry`).** + - Files: + `src/vs/platform/agentHost/node/claude/claudeAgentSession.ts` (modify), + `src/vs/platform/agentHost/common/pendingRequestRegistry.ts` (small + extension โ€” see below). + - Depends on: step 3. + - Scope: + - Add field `_pendingClientToolCalls = new PendingRequestRegistry()`. + - Add field `_toolUseIdBridge = new ClaudeToolUseIdBridge()`. + - Add `appliedClientToolSnapshot: readonly ToolDefinition[] | undefined` + (read/written by step 6's rebind path). + - Add `completeClientToolCall(toolCallId, result): boolean` โ€” thin + delegate to `_pendingClientToolCalls.respond(toolCallId, converted)`. + - **Extend `PendingRequestRegistry`** with `rejectAll(error: Error): void` + that calls `.error(error)` on every parked deferred and clears the + map. (`denyAll` resolves with a value; we need a reject path for + `CancellationError` on dispose / rebind.) Update its existing + `denyAll` JSDoc to clarify the resolve-vs-reject distinction. + - **Invariant: `appliedClientToolSnapshot` is updated whenever a + `Query` is born or rebuilt, never read-only-from-outside.** Set by + (a) `_materializeProvisional` / `_resumeSession` after the + materializer returns, (b) `rebindForClientTools(newSnapshot)` + after `_rebindQuery('restart')` resolves, (c) the rematerializer + hook on crash-recovery rebinds. The diff check in step 6 is the + only outside reader. + - On dispose: call `_pendingClientToolCalls.rejectAll(new CancellationError())` + AND `_toolUseIdBridge.dispose()` so neither layer hangs an in-flight + handler. + - On rebind (step 6's `rebindForClientTools`): same two `reject*` + calls, BEFORE swapping the `Query`, so leftover parked calls don't + leak across `Query` instances. + - Done when: a unit test parks 2 calls (one via direct handler entry, + one via bridge wait), disposes the session, both promises reject with + `CancellationError`. A second unit test parks 1 call, rebinds, asserts + the parked call rejects before the new `Query`'s handler accepts the + next round. + +9. **Result type conversion (1:1 with shape transforms) โ€” extracted helper.** + - Files: `src/vs/platform/agentHost/node/claude/claudeClientToolResult.ts` + (covered by step 2; this step lays out the conversion contract). + - Depends on: none new. + - Scope: convert protocol + [`ToolCallResult`](src/vs/platform/agentHost/common/state/protocol/state.ts) + into MCP + [`CallToolResult`](node_modules/@modelcontextprotocol/sdk/dist/esm/spec.types.d.ts): + - **text** โ†’ `{ type:'text', text }` (direct). + - **`EmbeddedResource` image** (`contentType` matches `image/*`) + โ†’ `{ type:'image', data, mimeType: contentType }` (field rename + + repackage). + - **`EmbeddedResource` non-image** โ†’ + `{ type:'resource', resource: { uri: '', mimeType: contentType, blob: data } }` + (council finding C5: protocol's `{ data, contentType }` does NOT + 1:1 map to MCP's `{ type:'resource', resource:{ uri, mimeType, + blob|text } }`; synthesize an ephemeral per-call URI + `claude-client:///` so MCP's + zod schema accepts the block). + - **Unknown content block kind** โ†’ stringified text block + warn + log (bounded blast radius). + - **`isError`** passes through unchanged. + - Function is exported standalone (`convertToolCallResult(result)`) + to satisfy the testability rule; the MCP factory just calls it. + - Done when: unit tests cover text-only / error / image / non-image + embedded resource (shape transform verified, MCP zod schema + accepts the output) / unknown-block fallback (warn log fires, + synthesized text block lands). + +10. โœ“ **Tests.** + - Files: `src/vs/platform/agentHost/test/node/claudeAgent.test.ts` (replace + the no-op Phase-10 stub at ~L2957), + `claudeJsonSchemaToZod.test.ts` (new), and any extracted helpers under + `test/node/`. + - Depends on: steps 1โ€“9. + - Scope (mirrors test names from the synthesis): + - `setClientTools registers tools that flow into Options.mcpServers on first materialize` + - `setClientTools after materialize triggers yield-restart on next sendMessage with the new tool set` + - `setClientTools with an equal snapshot (deep-equal on name+description+inputSchema) does NOT restart` + - `setClientTools on an unknown session id is silently dropped (no throw)` + - `setClientTools on a disposed/aborted provisional is a silent no-op (no resurrection)` + - `setClientTools mid-materialize (between provisional-delete and _sessions.set) writes to the now-materialized session, not the dropped provisional` + - `setClientTools racing await materialize pins appliedClientToolSnapshot to the build-time snapshot, not a re-read` (council C6) + - `onClientToolCallComplete resolves the parked deferred with the converted CallToolResult` + - `onClientToolCallComplete walks nested subagent URIs (depth-2) to the parent root and resolves` (council, while-loop fix) + - `onClientToolCallComplete for an unknown toolCallId is a benign no-op` + - `dispose rejects every parked client-tool call with CancellationError (and clears the bridge)` + - `setClientTools on a provisional session โ€” first sendMessage materializes WITH the tools` (transfer-at-materialize semantics) + - `_resumeSession (cross-window) starts with clientTools=undefined; first setClientTools triggers rebind on next sendMessage` + - `yield-restart re-reads session.clientTools via the rematerializer closure (not stale provisional)` (council C3) + - `subagent and parent call the same tool with identical args concurrently โ€” bridge resolves each to the correct tool_use_id` (council bridge-keying fix) + - **Standalone helper tests** (in dedicated files per project testability rule): + - `snapshotEquals` against name / description / schema mutations. + - `convertToolCallResult`: text / error / image (shape transform) / non-image embedded resource (shape transform with synthesized URI) / unknown-block fallback. + - **Bridge tests** (in `claudeToolUseIdBridge.test.ts`): + - `event-first: noteToolUseEvent then resolve returns synchronously with the id` + - `handler-first: resolve parks; subsequent noteToolUseEvent settles it` + - `parallel (name, args) duplicates match FIFO within same parent_tool_use_id` + - `parallel (name, args) with different parent_tool_use_id prefer same-parent match` + - `dispose rejects parked resolve() with CancellationError` + - **Registry extension test**: + - `rejectAll settles every parked deferred with the supplied error` + - Done when: tests pass, `FakeQuery.setMcpServers` still throws + `NotModeled` (proves we never call it for client tools). + +## Files to Modify or Create + +| Path | Change | Notes | +|------|--------|-------| +| `src/vs/platform/agentHost/node/claude/claudeJsonSchemaToZod.ts` | create | JSON Schema โ†’ Zod raw-shape converter, narrow subset + per-property `z.any()` fallback. | +| `src/vs/platform/agentHost/node/claude/claudeClientToolMcpServer.ts` | create | Factory wrapping `createSdkMcpServer` + `tool()`. Handler captures `bridge` + `registry` BY REFERENCE (council finding C1). Lazy-loaded SDK exports are passed in by the agent via the SDK service. | +| `src/vs/platform/agentHost/node/claude/claudeToolUseIdBridge.ts` | create | Per-session rendezvous between Phase 7's `tool_use` stream events and the MCP handler entry. Keys on `(toolName, canonicalStringify(args))`, internally tracks `parent_tool_use_id` to prefer same-parent FIFO matches when both subagent and parent have identical pending args. | +| `src/vs/platform/agentHost/node/claude/claudeClientToolHelpers.ts` | create | Exported `snapshotEquals(a, b)`; standalone-testable per project rule. | +| `src/vs/platform/agentHost/node/claude/claudeClientToolResult.ts` | create | Exported `convertToolCallResult(result)`; standalone-testable; handles `EmbeddedResource` shape transform between protocol and MCP. | +| `src/vs/platform/agentHost/common/pendingRequestRegistry.ts` | modify | Add `rejectAll(error: Error)` sibling to the existing `denyAll`; tighten JSDoc to clarify resolve-vs-reject. Existing consumers (`_pendingPermissions`, `_pendingUserInputs`) unaffected. | +| `src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts` | modify | Accept `bridge: ClaudeToolUseIdBridge \| undefined` parameter; call `bridge?.noteToolUseEvent(id, name, input, parent_tool_use_id)` at `content_block_stop`. | +| `src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts` | modify | Pass `session?.bridge` into the mapper call (the missing DI step โ€” council finding C2). | +| `src/vs/platform/agentHost/node/claude/claudeAgent.ts` | modify | Real `setClientTools` / `onClientToolCallComplete`; add `clientTools` field on `IClaudeProvisionalSession`; transfer to session at materialize; wire `mcpServers` into `_materializeProvisional` / `_resumeSession`; tool-diff restart in `sendMessage`. | +| `src/vs/platform/agentHost/node/claude/claudeAgentSession.ts` | modify | Add `clientTools` + `appliedClientToolSnapshot` fields + thin `setClientTools(tools)` / `completeClientToolCall(toolCallId, result)` / `rebindForClientTools(snapshot)` methods. `_pendingClientToolCalls` + `_toolUseIdBridge` constructed eagerly in the session constructor so MCP factory closures captured pre-materialize bind to the right instances (council finding C1). Dispose-time deferred rejection. | +| `src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts` | modify | Narrow public `rebindForRestart()` wrapper around `_rebindQuery('restart')` so `ClaudeAgentSession.rebindForClientTools` can drive it without leaking the private method. | +| `src/vs/platform/agentHost/node/claude/claudeMaterializer.ts` | modify | `materialize` / `materializeResume` / `_buildOptions` accept and write `Options.mcpServers`. | +| `src/vs/platform/agentHost/test/node/claudeAgent.test.ts` | modify | Replace the Phase-10 no-op stub; add the 9 tests listed in step 10. | +| `src/vs/platform/agentHost/test/node/claudeJsonSchemaToZod.test.ts` | create | Unit tests for the schema converter. | +| `src/vs/platform/agentHost/test/node/claudeToolUseIdBridge.test.ts` | create | Unit tests for the rendezvous bridge (event-first / handler-first / FIFO / dispose). | + +## Decisions + +- **Registry location โ†’ per-session ownership, no Map keyed by sessionId.** + `clientTools` lives on the provisional record pre-materialize and on + `ClaudeAgentSession` post-materialize, transferred at materialize the + same way `model` / `permissionMode` / `project` already do. The earlier + candidate ("a `ClaudeClientToolRegistry` keyed by sessionId on + `ClaudeAgent`") was rejected during grilling because it would have + introduced a *third* parallel per-session Map alongside + `_provisionalSessions` and `_sessions` โ€” every session-lifecycle edge + would need explicit `.clear(sessionId)` calls. Subagents were already + refactored away from that pattern. Cleanup is now automatic via the + existing `ClaudeSessionEntry.dispose()` / provisional-delete paths. +- **Multi-client merging โ†’ no.** The protocol has at most one active client + per session + ([agentSideEffects.ts:780-820](src/vs/platform/agentHost/node/agentSideEffects.ts)); + `setClientTools(session, clientId, tools)` is full replacement of that + client's slice. We expose a single in-process MCP server named `client`. +- **Tool-diff granularity โ†’ deep-equal on `name + description + inputSchema`.** + Matches `copilotAgent.ts:2073-2087` (`ActiveClient.isOutdated`). The + production claude extension's `_toolsMatch` is name-only, but that + compares model-side tool-state snapshots, not client tool definitions โ€” + description and schema feed `tool()` and must trigger restart if they + change, otherwise the SDK runs with stale schemas. +- **JSON Schema โ†’ Zod โ†’ hand-rolled minimal converter.** The protocol's + `ToolDefinition.inputSchema` is a narrow JSON Schema subset + ([state.ts:1551-1568](src/vs/platform/agentHost/common/state/protocol/state.ts#L1551)); + shipping `json-schema-to-zod` is overkill and adds a dep. Unsupported + per-property schemas fall back to `z.any()` rather than rejecting the + tool โ€” clients should always get *some* tool surface. +- **No gateway / idle timeout.** **Roadmap correction.** The Phase-10 + roadmap text says "port `_gateway` + `_gatewayIdleTimeout`", but + production's `_gateway` is `vscode.McpGateway` exposing **external** HTTP + MCP servers; in-process tools (`createSdkMcpServer`) have no external + resources to idle out. `McpSdkServerConfigWithInstance` is a plain + object owned by the SDK subprocess lifetime. The roadmap should be + updated post-merge. +- **Restart trigger โ†’ at the `sendMessage` yield boundary, via + `_rebindQuery('restart')`.** Reuses Phase 9's existing rebind machinery; + no new mutation barrier. `Query.setMcpServers` stays unused in Phase 10 + (reserved for Phase 11's external MCP hot-swap). +- **Subagent routing โ†’ walk `parseSubagentSessionUri` to the root session + before resolving deferreds.** Mirrors + [copilotAgent.ts:943-954](src/vs/platform/agentHost/node/copilot/copilotAgent.ts#L943). + `ClaudeAgent` already imports the helper. +- **Deferred plumbing โ†’ reuse `PendingRequestRegistry`, + extend with `rejectAll(error: Error)`.** Resolved during grilling. The + existing class at + [`pendingRequestRegistry.ts`](src/vs/platform/agentHost/common/pendingRequestRegistry.ts) + is the right primitive โ€” `registerAndFire(toolUseId, () => emit signal)` + enforces the same register-before-fire invariant Phase 7 relies on for + permission round-trips. The existing `denyAll(denyValue)` resolves with + a value (designed for the permission-deny path); we need a sibling + `rejectAll(error)` for `CancellationError` on session dispose / rebind. +- **`tool_use_id` recovery โ†’ `ClaudeToolUseIdBridge`.** Resolved during + grilling. Copilot's SDK passes `toolCallId` into the handler directly; + the Anthropic SDK does not. The bridge sits between Phase 7's stream + mapper and the MCP handler, matching on `(toolName, canonicalStringify(args))` + (canonical = recursively sorted object keys). Either side can arrive first. +- **`ToolCallResult` โ†’ `CallToolResult` โ†’ pass-through, not text-only.** + Resolved during grilling. Image and embedded-resource blocks map 1:1 + to MCP equivalents so the model gets the full tool result, not a + stringified placeholder. Unknown block kinds fall back to a synthesized + text block + a warn log โ€” bounded blast radius (one bad call, not a + crash). +- **Subagent client tools โ†’ share the parent's MCP server, bridge, and + registry.** Resolved during grilling via codebase verification. The + Claude SDK runs subagents inside the parent's `Query` subprocess (not + a separate `Query`), so `Options.mcpServers` is shared. The mapper + already threads `parent_tool_use_id` through every event + ([claudeMapSessionEvents.ts:232](src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts#L232)); + Phase 12 routes the resulting signal via `IAgentActionSignal.parentToolCallId` + so the workbench sees the subagent URI. When the workbench replies + via `onClientToolCallComplete(subagentURI, ...)`, Phase 10 walks + `parseSubagentSessionUri` to the parent root and finds the parked + deferred there. Handler-side code stays subagent-unaware. +- **Bridge prime point โ†’ `content_block_stop`, not `content_block_start`.** + Input deltas are assembled by `finalizeToolBlock` + ([claudeMapSessionEvents.ts:120-127](src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts#L120)); + priming earlier would queue the bridge with an empty args object and + always desync from the handler. +- **Initial `appliedClientToolSnapshot` โ†’ set at materialize/resume time, + not lazily.** Without this, the first sendMessage after materialize + would always trigger a spurious rebind because + `snapshotEquals(undefined, [tool])` is false. +- **`sendMessage` sequencing โ†’ rebind-or-forward, exclusive.** Resolved + during grilling via codebase verification. The rebind path already + re-reads live permissionMode in the rematerializer + ([claudeAgent.ts:491](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L491)) + AND re-applies bijective state via `_replayCurrentConfig` at the tail + of `_rebindQuery` + ([claudeSdkPipeline.ts:397](src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts#L397)). + Calling `setPermissionMode(liveMode)` after the rebind would be + redundant and could record a spurious second `setPermissionMode` call. + Branch on tool-snapshot diff: rebind owns its own config replay; the + no-diff branch keeps the existing post-#317248 forward. + +- **SDK module exports โ†’ wrapped behind `IClaudeAgentSdkService`.** + Resolved during grilling. `createSdkMcpServer` and `tool` are + lazy-loaded alongside the rest of the SDK; rather than leaking them to + callers, add a single + `buildClientToolMcpServer(snapshot, bridge, registry): McpSdkServerConfigWithInstance` + method to `IClaudeAgentSdkService`. The agent and materializer stay + unaware of which SDK exports exist; `FakeClaudeAgentSdkService` in + tests can return a stub config without touching real SDK loading. The + pure logic (JSON-Schema โ†’ Zod call, handler wiring, bridge/registry + capture) still lives in `claudeClientToolMcpServer.ts` as a helper the + service delegates to โ€” the service supplies the lazy-loaded SDK + exports as constructor injectables to that helper. + +- **`rebindForClientTools` ordering + post-dispose bridge contract.** + Resolved during grilling. The session-level sequence is: + `(1) _pendingClientToolCalls.rejectAll(CancellationError)` (local) โ†’ + `(2) _toolUseIdBridge.cancelAndDispose()` (local; renamed from + `dispose()` so the call site shows the rejection semantic) โ†’ + `(3) await pipeline.rebindForRestart()` (delegates to + `_rebindQuery('restart')` per [claudeSdkPipeline.ts:347](src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts#L347)) โ†’ + `(4) appliedClientToolSnapshot = newSnapshot` (local). Between (2) + and (3) the OLD `Query` is still alive and may emit a final + `tool_use` block; the mapper would then call + `bridge.noteToolUseEvent(...)` on a disposed bridge. Contract: + **`noteToolUseEvent` on a disposed bridge is a benign no-op + warn + log** โ€” the old Query is in its final moments, recording events there + would be useless even if no crash, and a no-op keeps the live + shutdown sequence unblocked. `resolve` on a disposed bridge throws + `CancellationError` immediately (same as the post-dispose pending + promise resolution). + +## Risks + +- **`tool()` handler does not receive `tool_use_id`.** Resolved during + grilling โ€” verified against + [sdk.mjs](node_modules/@anthropic-ai/claude-agent-sdk/sdk.mjs) that + the SDK's MCP control-request subtype `mcp_message` carries only + `{server_name, message: }`, and the MCP `_meta` schema + allows only `progressToken` / `related-task` โ€” no `tool_use_id` + extension. **Mitigation (now part of the plan):** the + `ClaudeToolUseIdBridge` rendezvouses the SDK `tool_use` stream event + (which Phase 7 already processes and carries `tool_use_id`) with the + MCP handler invocation by matching `(toolName, stableStringify(args))`. + Parallel duplicate `(name, args)` calls match FIFO โ€” safe because the + workbench keys completion by `tool_use_id`, so the result still flows + back to the correct call. + +- **Closure capture in rematerializer.** The session's rematerializer + closure (`session.attachRematerializer(...)` in + [claudeAgent.ts:495-498](src/vs/platform/agentHost/node/claude/claudeAgent.ts#L495)) + must re-read the registry on every rebind, not snapshot it once at + materialize time โ€” otherwise yield-restarts will use stale tools. + **Mitigation:** pass the registry (not the snapshot) into the closure; + read at rebind time. Covered by step 5's "Done when". + +- **Layering.** `src/vs/platform/agentHost/` lives at the platform layer; + no imports from `src/vs/workbench/`. The in-process MCP path only needs + `@anthropic-ai/claude-agent-sdk` and `@modelcontextprotocol/sdk` types + (both already used). No layer violation risk for this phase. + +- **Pending deferred leaks across rebind.** When the pipeline rebinds the + `Query`, any client tool calls parked in `_pendingClientToolCalls` are + attached to the *previous* `Query` and won't be completed by the new + one. **Mitigation:** rebind path rejects every parked deferred with + `CancellationError` before swapping the `Query`, exactly the same way + dispose does. (`rebindForClientTools` calls into the same path as + dispose for this.) + +- **`zod` version mismatch with SDK.** SDK's + `AnyZodRawShape = ZodRawShape | ZodRawShape_2` accepts both zod 3 and 4. + Repo root has `zod@3.x`; no action needed. Documented because the + failure mode (schema rejected at MCP boundary) is opaque. + +## Known limitations (accepted, not blocking) + +- **Active-client churn produces two rebinds** when the workbench fires + `setClientTools(prev, [])` followed by `setClientTools(new, [tool])` + and the user sends a message between them. Coalescing would require + cross-action awareness in `setClientTools`. Acceptable cost given + rarity (only fires on client-switch); document and revisit if it + shows up in dogfooding. +- **Steering message arriving during the rebind gap is dropped** + (no parent in the queue post-`resetForRebind`). Phase 9's + steering-injection assumes a live queue parent. Cross-feature + interaction; document and address if it shows up. +- **Recover-path snapshot update is a no-op in the common case** (no + `setClientTools` between subprocess crash and recover). The update + is kept for the rare case where tools changed during the recover + wait; harmless redundancy otherwise. +- **Old subprocess MCP handlers receive `CancellationError` between + rebind step 1 (reject pending) and old-warm async dispose.** A + transient error-storm window; the model sees one bad tool call and + retries against the new Query. Bounded blast radius. + +## Open Questions + +_All Open Questions resolved during the grilling pass and the second council review. See Decisions and Known Limitations._ + +## Verification + +### Unit / Integration +- Unit: `./scripts/test.sh --run src/vs/platform/agentHost/test/node/claudeJsonSchemaToZod.test.ts` +- Unit: `./scripts/test.sh --run src/vs/platform/agentHost/test/node/claudeToolUseIdBridge.test.ts` +- Integration (test runner): `./scripts/test.sh --run src/vs/platform/agentHost/test/node/claudeAgent.test.ts` +- Type-check: `npm run compile-check-ts-native` +- Layer check: `npm run valid-layers-check` + +### E2E +Workspace launch/log skills available: +- **Launch skill**: `launch` โ€” Playwright-driven automation of Code OSS via Chrome DevTools Protocol. Drives the chat panel, switches agent, sends messages. +- **Log skill**: `code-oss-logs` โ€” finds and reads the renderer / extension host / **agent host** log files from the dev build. +- **Bonus**: `vscode-dev-workbench` โ€” for browser-driven testing of the Agents window if needed. + +**Scenario** (Phase 10 acceptance): +1. Use the `launch` skill to start Code OSS with `--agents` and authenticate to GitHub Copilot. +2. Open a new Claude session in the Agents window. +3. Drive the workbench to register a client tool via the protocol + (`session/activeClientToolsChanged`) with a minimal `ToolDefinition` + (e.g. `name: 'echo'`, `inputSchema: { type:'object', properties:{ msg:{ type:'string' }}, required:['msg'] }`). +4. Send a prompt that nudges Claude to call the tool: *"Use the `echo` tool with msg 'hello'"*. +5. Verify via the `code-oss-logs` skill: + - Agent-host log line `[Claude] setClientTools` with the tool list. + - Agent-host log line confirming `Options.mcpServers` was written at startup. + - Workbench-side: the `echo` tool call surfaces in the chat as a tool-call card, the client responds, and Claude continues with the result. +6. Change the tool's schema (e.g. add a second property) and send another prompt; verify a second `startup` is recorded in the agent-host log (proves yield-restart fired) and the new tool surfaces in the next turn. + +### Manual +- Run the `Claude` agent with two clients connected to the same session (workbench + a CLI client). Active-client switch must clear the prior client's tools (`setClientTools(session, clientId, [])` is dispatched) โ€” observe via agent-host log + the next turn's `Options.mcpServers`. + +## References + +- Roadmap: `./roadmap.md` (Phase 10 โ€” Client-provided tools (in-process MCP)) +- Council convergence on: registry placement, multi-client semantics, tool-diff granularity, no-gateway, restart trigger, JSON-Schema โ†’ Zod approach. +- Copilot analog: [copilotAgent.ts:935-954](src/vs/platform/agentHost/node/copilot/copilotAgent.ts), [copilotAgentSession.ts:570-635](src/vs/platform/agentHost/node/copilot/copilotAgentSession.ts). +- Production claude extension reference (templates only, do NOT port wholesale): [ideMcpServer.ts](extensions/copilot/src/extension/chatSessions/claude/common/mcpServers/ideMcpServer.ts), [claudeCodeAgent.ts:432-491](extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts). +- SDK types: [sdk.d.ts:2962-2969](node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) (`tool` signature), [sdk.d.ts:948-961](node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) (`McpSdkServerConfigWithInstance`), [sdk.d.ts:1442-1450](node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) (`Options.mcpServers`). +- Protocol types: [state.ts:1551-1568](src/vs/platform/agentHost/common/state/protocol/state.ts) (`ToolDefinition`), [state.ts:1408-1427](src/vs/platform/agentHost/common/state/protocol/state.ts) (`ToolCallResult`). +- E2E skills: `launch`, `code-oss-logs`, `vscode-dev-workbench`. + +## Implementation Notes + +**Steps 1โ€“3 + step 8 `rejectAll` only; steps 4โ€“10 deferred to next implementer pass.** + +**Files actually changed** +- `src/vs/platform/agentHost/common/pendingRequestRegistry.ts` โ€” added `rejectAll(error)`; tightened `denyAll` JSDoc. +- `src/vs/platform/agentHost/node/claude/claudeAgent.ts` โ€” added `clientTools: readonly ToolDefinition[] | undefined` to `IClaudeProvisionalSession` (mutable, JSDoc'd; mirrors `model`); `createSession` writes `clientTools: undefined`. No agent-side logic added. + +**Files created** +- `src/vs/platform/agentHost/node/claude/claudeJsonSchemaToZod.ts` +- `src/vs/platform/agentHost/node/claude/claudeClientToolHelpers.ts` +- `src/vs/platform/agentHost/node/claude/claudeClientToolResult.ts` +- `src/vs/platform/agentHost/node/claude/claudeToolUseIdBridge.ts` +- `src/vs/platform/agentHost/node/claude/claudeClientToolMcpServer.ts` + +**Tests written (37 new test cases across 5 files)** +- `src/vs/platform/agentHost/test/node/claudeJsonSchemaToZod.test.ts` (5) โ€” empty/undefined; primitives + required/optional; arrays/nested objects/enum/oneOf/null; nullable/description/default; per-property `z.any()` fallback. +- `src/vs/platform/agentHost/test/node/claudeClientToolHelpers.test.ts` (7) โ€” `snapshotEquals` against name/description/inputSchema mutations, order-insensitive, undefined-vs-[] symmetry. +- `src/vs/platform/agentHost/test/node/claudeClientToolResult.test.ts` (6) โ€” text, error, image embedded resource, non-image embedded resource (synthesized URI), unknown-block fallback, structuredContent passthrough. +- `src/vs/platform/agentHost/test/node/claudeToolUseIdBridge.test.ts` (9) โ€” event-first, handler-first, canonical-stringify, FIFO duplicate match, cross-parent FIFO, dispose-rejects, post-dispose no-op `noteToolUseEvent`, post-dispose `resolve` throws, defensive circular/BigInt stringify. +- `src/vs/platform/agentHost/test/node/claudeClientToolMcpServer.test.ts` (4) โ€” server named `'client'`; handler resolves via bridge + parks on registry; inputSchema converted via supplied factory; missing-description fallback. +- `src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts` (+1) โ€” `rejectAll` settles every parked deferred with the supplied error and clears the registry. + +**Deviations from plan** +- `jsonSchemaToZodRawShape` is parameterized on a `ZodFactory` (the slice of `zod` it needs) rather than statically importing `zod`. Reason: zod isn't a root dep and the renderer-side test harness has no resolver for bare specifiers. The factory is supplied at runtime alongside `createSdkMcpServer` / `tool` via the new `SdkExports` shape in `claudeClientToolMcpServer.ts`. The plan's Decision ("SDK module exports โ†’ wrapped behind `IClaudeAgentSdkService`") already foresees this; just folding `z` into the same exported bag. +- Bridge "prefer same-parent matches" assertion (step 3 test list, bullet #4) is collapsed to "cross-parent FIFO settles in arrival order" because the MCP handler's `resolve` call has no parent-id input (the SDK `tool()` handler signature has no parent reference); the plan itself documents this is safe ("each physical call still gets the right kind of result back via the tool_use_id the workbench echoes"). Parent is captured on `noteToolUseEvent` and stored on each `PendingEvent` so a future enhancement (e.g. resolve-side parent hint) can implement true same-parent preference without a data model change. + +**Deferred refactors** +- None from a private-method audit on the new files โ€” the only privates are `_disposed`, `_pendingEvents`, `_pendingResolves` on `ClaudeToolUseIdBridge` (state, not behavior worth extracting) and standalone `jsonPropertyToZod` / `unionOf` helpers (already free functions, just unexported). + +--- + +**Phase 10 integration pass (steps 4โ€“7, 8 rest, 10) โ€” landed.** + +**Files actually changed (full set)** +- `eslint.config.js` โ€” allow `@modelcontextprotocol/sdk/**/*` imports in the agentHost layer (for `CallToolResult`). +- `src/vs/platform/agentHost/common/pendingRequestRegistry.ts` โ€” (from step 8 first pass) `rejectAll(error)` already present. +- `src/vs/platform/agentHost/node/claude/claudeAgent.ts` โ€” real `setClientTools` / `onClientToolCallComplete`; bridge + registry construction in `_materializeProvisional` / `_resumeSession`; `_buildClientMcpServers` private helper; tool-diff exclusive rebind in `sendMessage`; rematerializer closures rebuild mcpServers from `session.clientTools`. +- `src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts` โ€” `IClaudeAgentSdkService.buildClientToolMcpServer(...)` added; `IClaudeSdkBindings` gains `createSdkMcpServer` + `tool`; production impl loads `z` from `zod` at the seam and delegates to the helper. +- `src/vs/platform/agentHost/node/claude/claudeAgentSession.ts` โ€” `clientTools` / `appliedClientToolSnapshot` fields; `bridge` + `_pendingClientToolCalls` ctor params; `setClientTools` / `completeClientToolCall` / `rebindForClientTools` methods; dispose-time `rejectAll` + `cancelAndDispose`. +- `src/vs/platform/agentHost/node/claude/claudeSdkPipeline.ts` โ€” bridge ctor param (optional with default `undefined` for back-compat with existing tests); narrow public `rebindForRestart()` wrapping `_rebindQuery('restart')`. +- `src/vs/platform/agentHost/node/claude/claudeSdkMessageRouter.ts` โ€” bridge ctor param (optional default); threaded into mapper calls. +- `src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts` โ€” `bridge?` parameter on `mapSDKMessageToAgentSignals` and `mapStreamEvent`; `bridge?.noteToolUseEvent(toolUseId, name, parsedInput, parentToolUseId)` at `content_block_stop` AFTER `finalizeToolBlock`. +- `src/vs/platform/agentHost/node/claude/claudeMaterializer.ts` โ€” `IClaudeSessionCollaborators` interface; `materialize` / `materializeResume` accept collaborators / mcpServers and pass into session ctor; `_buildOptions` writes `Options.mcpServers` conditionally. +- `src/vs/platform/agentHost/node/claude/claudeToolUseIdBridge.ts` โ€” added `resetPendingForRebind()` for the live-rebind path (keeps the bridge alive across yield-restarts while rejecting parked resolves). +- `src/vs/platform/agentHost/node/claude/claudeClientToolMcpServer.ts` โ€” handler closure no longer carries a phase-10 TODO; the comment now records the design decision (no signal emission needed โ€” the standard stream mapper already emits `SessionToolCallStart` at `content_block_start`). +- `src/vs/platform/agentHost/node/claude/claudeJsonSchemaToZod.ts` โ€” single `as unknown as` cast at the `z.union(literals...)` site to satisfy strict TS (pre-existing latent error surfaced once the full Phase 10 pipeline started compiling against zod 3.25's typings). +- `src/vs/platform/agentHost/test/node/claudeAgent.test.ts` โ€” `FakeClaudeAgentSdkService.buildClientToolMcpServer` + `buildClientToolMcpServerCalls` recorder; `ClaudeAgentSession` test wiring updated for the new ctor params; replaced the Phase-10 stub test with 8 Phase-10 integration tests. +- `src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts` โ€” `ProxyRoundTripSdkService.buildClientToolMcpServer` stub. +- `src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts` โ€” `FakeSdkService.buildClientToolMcpServer` stub. +- `src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts` / `claudeSdkPipeline.test.ts` โ€” pass `undefined` for the new optional `bridge` constructor arg. + +**Tests added in this pass (8 new Phase 10 cases in `claudeAgent.test.ts`)** +- `setClientTools registers tools that flow into Options.mcpServers on first materialize` +- `setClientTools after materialize triggers yield-restart on next sendMessage with the new tool set` +- `setClientTools with an equal snapshot does NOT restart` +- `setClientTools on an unknown session id is silently dropped` +- `onClientToolCallComplete is a benign no-op for an unknown toolCallId (Phase 10)` (replaces the stub) +- `onClientToolCallComplete resolves the parked deferred via the bridge round-trip` +- `onClientToolCallComplete walks subagent URIs to the root session` +- `dispose rejects every parked client-tool call with CancellationError` +- `FakeQuery.setMcpServers stays unmodeled (Phase 10 never calls Query.setMcpServers for client tools)` + +**Design decisions in this pass** +- **No agent-side signal emission from the MCP handler.** The plan's step-5 TODO ("emit the agent-side signal that surfaces this tool call to the workbench") turned out to be redundant: the standard stream mapper already emits `SessionToolCallStart` at `content_block_start` and `SessionToolCallReady` at `content_block_stop` for every `tool_use` block โ€” including the MCP/client ones, since the SDK yields the same stream events. The handler just parks on the registry. The `registerAndFire` wrapper is kept (instead of a bare deferred) so a future synchronous responder can't race the registration. Documented inline in `claudeClientToolMcpServer.ts`. Copilot's analog at `copilotAgentSession.ts:597-616` does the same thing โ€” the handler is a thin park, no out-of-band signal. +- **Bridge stays alive across rebinds.** `cancelAndDispose` is permanent (used only at session dispose); added `resetPendingForRebind()` that rejects parked resolves but leaves the bridge usable. Reason: the router / mapper hold a stable bridge reference (passed at construction), and rebuilding it on every rebind would require a getter callback through 3 layers. The bridge has no per-Query state to leak. +- **`bridge` constructor param made optional (default `undefined`)** on `ClaudeSdkPipeline`, `ClaudeSdkMessageRouter`, and `mapSDKMessageToAgentSignals` so the wide existing test surface (mapper / signals / router / pipeline tests numbering 50+) compiles unchanged. Production code paths always pass a real bridge from the session. No production behavior change. +- **`Options.mcpServers` carried via a typed collaborators bag** (`IClaudeSessionCollaborators`) rather than as a positional fifth arg, so future additions don't keep growing the materializer signature. +- **`ClaudeAgentSession` constructor takes `bridge` + `_pendingClientToolCalls` as positional params** (not via DI) because the agent must construct them BEFORE materialize to capture them in the MCP server's handler closures (council finding C1). Putting them ahead of `@IInstantiationService` keeps the DI mapping clean. +- **Resume path starts with `clientTools = undefined`** (per plan); first `setClientTools` from the workbench triggers a rebind on the next sendMessage. Test coverage left to the user's E2E. + +**avoid-private-methods audit** +- `ClaudeAgent._buildClientMcpServers` is a one-liner private dispatcher; no extractable behavior. Acceptable. +- `ClaudeAgentSession._pendingClientToolCalls` is a state field, not a private method. Acceptable. +- `ClaudeSdkPipeline._rebindQuery` is still private; public `rebindForRestart` wraps it as designed. +- No new private methods worth extracting as standalone functions. + +**E2E** +- Live scenario deferred. The user will run the launch + code-oss-logs walkthrough manually after reviewing the code. diff --git a/src/vs/platform/agentHost/node/claude/roadmap.md b/src/vs/platform/agentHost/node/claude/roadmap.md index 12be83be4788c..0c8c566a18892 100644 --- a/src/vs/platform/agentHost/node/claude/roadmap.md +++ b/src/vs/platform/agentHost/node/claude/roadmap.md @@ -980,7 +980,7 @@ restart), killed subprocess triggers recovery. Exit criteria: parity with Copilot agent on stop / steer / switch model. -### Phase 10 โ€” Client-provided tools (in-process MCP) +### Phase 10 โ€” Client-provided tools (in-process MCP) โœ… **DONE** The Claude SDK exposes **two distinct MCP entry points** that classify into different M11 buckets โ€” do not conflate them: diff --git a/src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts b/src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts index a20f71f131111..bf3673d5071d8 100644 --- a/src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts +++ b/src/vs/platform/agentHost/test/common/pendingRequestRegistry.test.ts @@ -57,4 +57,25 @@ suite('PendingRequestRegistry', () => { // After respond removed it, denyAll has nothing to do โ€” no throw, no second resolution. registry.denyAll(false); }); + + test('rejectAll rejects every parked deferred with the supplied error and clears the registry', async () => { + const registry = new PendingRequestRegistry(); + const a = registry.registerAndFire('a', () => { }); + const b = registry.registerAndFire('b', () => { }); + const err = new Error('cancelled'); + + registry.rejectAll(err); + + await Promise.all([a, b].map(async p => { + try { + await p; + assert.fail('expected reject'); + } catch (e) { + assert.strictEqual(e, err); + } + })); + // Cleared: post-rejectAll respond calls find nothing. + assert.strictEqual(registry.respond('a', 'x'), false); + assert.strictEqual(registry.respond('b', 'y'), false); + }); }); diff --git a/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts b/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts index 476aa1fa007ec..e70c6cee2e62f 100644 --- a/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts +++ b/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts @@ -340,6 +340,9 @@ class ProxyRoundTripSdkService implements IClaudeAgentSdkService { return []; } + async createSdkMcpServer(): Promise { throw new Error('not implemented in integration test fake'); } + async tool(): Promise { throw new Error('not implemented in integration test fake'); } + async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { this.capturedStartupOptions.push(params.options); diff --git a/src/vs/platform/agentHost/test/node/claudeAgent.test.ts b/src/vs/platform/agentHost/test/node/claudeAgent.test.ts index 0c120979846e4..af6a1994d7e3f 100644 --- a/src/vs/platform/agentHost/test/node/claudeAgent.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeAgent.test.ts @@ -4,7 +4,8 @@ *--------------------------------------------------------------------------------------------*/ import type Anthropic from '@anthropic-ai/sdk'; -import type { GetSessionMessagesOptions, Options, PermissionMode, Query, SDKMessage, SDKSessionInfo, SDKUserMessage, SessionMessage, Settings, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { GetSessionMessagesOptions, McpSdkServerConfigWithInstance, Options, PermissionMode, Query, SDKMessage, SDKSessionInfo, SDKUserMessage, SdkMcpToolDefinition, SessionMessage, Settings, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import type { CCAModel } from '@vscode/copilot-api'; import assert from 'assert'; @@ -37,16 +38,18 @@ import { FileService } from '../../../files/common/fileService.js'; import { IAgentMaterializeSessionEvent, AgentSession, AgentSignal, GITHUB_COPILOT_PROTECTED_RESOURCE } from '../../common/agentService.js'; import { AgentFeedbackAttachmentDisplayKind } from '../../common/agentFeedbackAttachments.js'; import { ActionType } from '../../common/state/sessionActions.js'; -import { MessageAttachmentKind, ResponsePartKind, SessionInputResponseKind, SessionStatus } from '../../common/state/sessionState.js'; +import { MessageAttachmentKind, ResponsePartKind, SessionInputResponseKind, SessionStatus, ToolResultContentType, buildSubagentSessionUri } from '../../common/state/sessionState.js'; import { ISessionDataService } from '../../common/sessionDataService.js'; import { AHP_AUTH_REQUIRED, ProtocolError } from '../../common/state/sessionProtocol.js'; -import { ProtectedResourceMetadata, SessionInputAnswerState, SessionInputAnswerValueKind, ToolCallStatus, type SessionConfigState, type SessionInputRequest } from '../../common/state/protocol/state.js'; +import { ProtectedResourceMetadata, SessionInputAnswerState, SessionInputAnswerValueKind, ToolCallStatus, type SessionConfigState, type SessionInputRequest, type ToolDefinition } from '../../common/state/protocol/state.js'; import { IAgentHostGitService } from '../../node/agentHostGitService.js'; import { AgentConfigurationService, IAgentConfigurationService } from '../../node/agentConfigurationService.js'; import { AgentHostStateManager } from '../../node/agentHostStateManager.js'; import { ClaudeAgent } from '../../node/claude/claudeAgent.js'; import { ClaudeAgentSession } from '../../node/claude/claudeAgentSession.js'; import { ClaudeAgentSdkService, IClaudeAgentSdkService, IClaudeSdkBindings } from '../../node/claude/claudeAgentSdkService.js'; +import { PendingRequestRegistry } from '../../common/pendingRequestRegistry.js'; +import { SessionClientToolsDiff } from '../../node/claude/clientTools/claudeSessionClientToolsModel.js'; import { IClaudeProxyHandle, IClaudeProxyService } from '../../node/claude/claudeProxyService.js'; import { resolvePromptToContentBlocks } from '../../node/claude/claudePromptResolver.js'; import { ICopilotApiService, type ICopilotApiServiceRequestOptions } from '../../node/shared/copilotApiService.js'; @@ -237,6 +240,9 @@ class FakeClaudeAgentSdkService implements IClaudeAgentSdkService { async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { this.startupCallCount++; this.capturedStartupOptions.push(params.options); + if (this.startupAdvance) { + await this.startupAdvance(this.startupCallCount); + } if (this.startupRejection) { const err = this.startupRejection; this.startupRejection = undefined; @@ -246,6 +252,56 @@ class FakeClaudeAgentSdkService implements IClaudeAgentSdkService { this.warmQueries.push(warm); return warm; } + + /** + * Optional async hook invoked inside {@link startup} after the call is + * counted but before resolving. Tests use it to stage a race where + * `setClientTools` lands while a materialize is mid-flight. + */ + startupAdvance: ((callIndex: number) => Promise) | undefined; + + /** + * Phase 10 โ€” records each per-tool `tool()` call and each + * `createSdkMcpServer()` call the agent makes via the + * {@link buildClientToolMcpServer} factory. Tests inspect these to + * assert the right snapshot reached the SDK; they also inspect + * `capturedStartupOptions[n].mcpServers.client.instance` for the + * `_stubTools` round-trip. + */ + readonly toolCalls: Array<{ + readonly name: string; + readonly description: string; + readonly inputSchema: Record; + }> = []; + readonly createSdkMcpServerCalls: Array<{ + readonly name: string; + readonly toolNames: readonly string[]; + }> = []; + + async tool( + name: string, + description: string, + inputSchema: Record, + _handler: (args: any, extra: unknown) => Promise, + ): Promise> { + this.toolCalls.push({ name, description, inputSchema }); + return { name } as unknown as SdkMcpToolDefinition; + } + + async createSdkMcpServer(options: { + name: string; + tools?: Array>; + }): Promise { + const toolNames = (options.tools ?? []).map(t => (t as { name: string }).name); + this.createSdkMcpServerCalls.push({ name: options.name, toolNames }); + return { + type: 'sdk', + name: options.name, + instance: { + _stubTools: toolNames, + }, + } as unknown as McpSdkServerConfigWithInstance; + } } /** @@ -2708,6 +2764,8 @@ suite('ClaudeAgent', () => { getSessionMessages: async () => [], listSubagents: async () => [], getSubagentMessages: async () => [], + createSdkMcpServer: () => { throw new Error('not modeled'); }, + tool: () => { throw new Error('not modeled'); }, }; const result1 = await svc.listSessions(); const importInvocationsAfterFirstSuccess = importInvocations; @@ -2757,6 +2815,8 @@ suite('ClaudeAgent', () => { getCalls.push({ sessionId, agentId, options }); return [{ uuid: 'u1' } as unknown as SessionMessage]; }, + createSdkMcpServer: () => { throw new Error('not modeled'); }, + tool: () => { throw new Error('not modeled'); }, }; class TestableClaudeAgentSdkService extends ClaudeAgentSdkService { protected override async _loadSdk(): Promise { @@ -2955,12 +3015,11 @@ suite('ClaudeAgent', () => { }); }); - test('onClientToolCallComplete is a benign no-op (Phase 10 not yet implemented)', () => { + test('onClientToolCallComplete is a benign no-op for an unknown toolCallId (Phase 10)', () => { // `AgentSideEffects` fires `onClientToolCallComplete` for every // server-dispatched `SessionToolCallComplete` envelope, including // the ones the Claude mapper emits for normal SDK tool completions. - // Until Phase 10 wires client (MCP) tools through, the body must - // be a benign no-op โ€” throwing here corrupts every tool flow. + // Unknown ids (SDK-owned tools, stale workbench races) must NOT throw. const { agent } = createTestContext(disposables); const session = URI.parse('claude:/sess-1'); assert.doesNotThrow(() => { @@ -2968,6 +3027,290 @@ suite('ClaudeAgent', () => { }); }); + // #region Phase 10 โ€” client (MCP) tools + + test('setClientTools registers tools that flow into Options.mcpServers on first materialize', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + const tools: ToolDefinition[] = [{ name: 'echo', description: 'Echo back', inputSchema: { type: 'object', properties: { msg: { type: 'string' } }, required: ['msg'] } }]; + agent.setClientTools(created.session, 'client-1', tools); + + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'go', undefined, 'turn-1'); + + const opts = sdk.capturedStartupOptions[0]; + assert.ok(opts.mcpServers, 'mcpServers populated'); + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + builtToolNames: sdk.toolCalls.map(t => t.name), + }, { + startupCount: 1, + builtToolNames: ['echo'], + }); + }); + + test('setClientTools after materialize triggers yield-restart on next sendMessage with the new tool set', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + // Pause the iterator after the first result so the pipeline doesn't + // rebind on its own ("stream ended without result" โ†’ needsRebind). + const advance = new DeferredPromise(); + sdk.queryAdvance = async (i: number) => { if (i === 2) { await advance.p; } }; + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + ]; + + await agent.sendMessage(created.session, 'first', undefined, 'turn-1'); + assert.strictEqual(sdk.startupCallCount, 1, 'first materialize'); + + agent.setClientTools(created.session, 'client-1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.queryAdvance = undefined; + advance.complete(); + await agent.sendMessage(created.session, 'second', undefined, 'turn-2'); + + const lastBuild = sdk.createSdkMcpServerCalls[sdk.createSdkMcpServerCalls.length - 1]; + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + firstMcp: !!sdk.capturedStartupOptions[0].mcpServers, + secondMcpToolNames: lastBuild?.toolNames, + }, { + startupCount: 2, + firstMcp: false, + secondMcpToolNames: ['echo'], + }); + }); + + test('setClientTools with an equal snapshot does NOT restart', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + const advance = new DeferredPromise(); + sdk.queryAdvance = async (i: number) => { if (i === 2) { await advance.p; } }; + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + ]; + + const tools: ToolDefinition[] = [{ name: 'echo', description: 'e', inputSchema: { type: 'object' } }]; + agent.setClientTools(created.session, 'c1', tools); + await agent.sendMessage(created.session, 'first', undefined, 'turn-1'); + assert.strictEqual(sdk.startupCallCount, 1, 'first materialize'); + + agent.setClientTools(created.session, 'c1', [{ name: 'echo', description: 'e', inputSchema: { type: 'object' } }]); + advance.complete(); + await agent.sendMessage(created.session, 'second', undefined, 'turn-2'); + + assert.strictEqual(sdk.startupCallCount, 1, 'equal snapshot should NOT yield-restart'); + }); + + test('setClientTools on an unknown session id is silently dropped', () => { + const { agent } = createTestContext(disposables); + assert.doesNotThrow(() => { + agent.setClientTools(URI.parse('claude:/never-existed'), 'c1', [{ name: 't', inputSchema: { type: 'object' } }]); + }); + }); + + test('onClientToolCallComplete resolves the parked deferred keyed by tool_use_id', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + agent.setClientTools(created.session, 'c1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'go', undefined, 'turn-1'); + + // Completion for an unknown tool_use_id is a benign no-op (no parked + // handler in this test path because we don't drive the real MCP + // handler from FakeQuery). + const session = agent.getSessionForTesting(created.session)!; + const settled = session.completeClientToolCall('tu_unknown', { success: true, pastTenseMessage: 'ok', content: [{ type: ToolResultContentType.Text, text: 'hello' }] }); + assert.strictEqual(settled, false, 'no parked handler in this test path; unknown id is silent'); + }); + + test('onClientToolCallComplete walks subagent URIs to the root session', () => { + const { agent } = createTestContext(disposables); + const root = URI.parse('claude:/root-1'); + // Build a depth-2 subagent URI (subagent of a subagent). + const depth1 = URI.parse(buildSubagentSessionUri(root, 'tu_outer')); + const depth2 = URI.parse(buildSubagentSessionUri(depth1, 'tu_inner')); + // No session is registered for `root`; the walk should reach root and + // then silently no-op (entry not found). Just assert no throw. + assert.doesNotThrow(() => { + agent.onClientToolCallComplete(depth2, 'tu_anything', { success: true, pastTenseMessage: 'ran' }); + }); + }); + + test('dispose rejects every parked client-tool call with CancellationError', async () => { + // Since the bridge is gone, the only way to park on the session's + // registry is through the real MCP handler, which is hard to drive + // from FakeQuery. The unit-level guarantee is covered by + // PendingRequestRegistry tests; here we just assert that dispose + // does not throw when there are no parked calls (the common case). + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + agent.setClientTools(created.session, 'c1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'go', undefined, 'turn-1'); + await assert.doesNotReject(agent.disposeSession(created.session)); + }); + + test('FakeQuery.setMcpServers stays unmodeled (Phase 10 never calls Query.setMcpServers for client tools)', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + agent.setClientTools(created.session, 'c1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'first', undefined, 'turn-1'); + // Change tools to force a rebind path (must use yield-restart, NOT Query.setMcpServers). + agent.setClientTools(created.session, 'c1', [{ name: 'echo2', inputSchema: { type: 'object' } }]); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'second', undefined, 'turn-2'); + // If `Query.setMcpServers` had been called, `FakeQuery.setMcpServers` would have thrown. + assert.strictEqual(sdk.startupCallCount, 2, 'rebind path used yield-restart, not setMcpServers'); + }); + + test('setClientTools landing during the materialize gap is re-synced into the live session', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + // Initial snapshot before materialize starts. + agent.setClientTools(created.session, 'c1', [{ name: 'first', inputSchema: { type: 'object' } }]); + + // Pause startup #1 so we can inject an update during the gap. + const startupReached = new DeferredPromise(); + const startupGate = new DeferredPromise(); + sdk.startupAdvance = async (i: number) => { + if (i === 1) { + startupReached.complete(); + await startupGate.p; + } + }; + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + + const send = agent.sendMessage(created.session, 'go', undefined, 'turn-1'); + // Wait until the materializer has snapshotted ['first'] into the diff + // and is paused inside `sdk.startup`. THEN inject the update. + await startupReached.p; + agent.setClientTools(created.session, 'c1', [{ name: 'second', inputSchema: { type: 'object' } }]); + startupGate.complete(); + await send; + + // Pre-fix: hasDifference was false after publish, so session.send used + // the materializer's snapshot only โ€” startupCount stayed at 1 and the + // 'second' update was silently lost. Post-fix: the re-synced diff flips + // dirty, session.send rebinds before sending, and the new MCP server + // carries ['second']. + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + firstSnapshot: sdk.createSdkMcpServerCalls[0]?.toolNames, + lastSnapshot: sdk.createSdkMcpServerCalls.at(-1)?.toolNames, + }, { + startupCount: 2, + firstSnapshot: ['first'], + lastSnapshot: ['second'], + }); + }); + + test('setClientTools landing during the resume bootstrap gap is re-synced into the live session', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + + const sessionId = 'cross-window-session-id'; + const sessionUri = AgentSession.uri('claude', sessionId); + sdk.sessionList = [{ + sessionId, + summary: 'From another window', + lastModified: 5000, + createdAt: 4900, + cwd: URI.file('/work').fsPath, + }]; + + const startupReached = new DeferredPromise(); + const startupGate = new DeferredPromise(); + sdk.startupAdvance = async (i: number) => { + if (i === 1) { + startupReached.complete(); + await startupGate.p; + } + }; + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + + const send = agent.sendMessage(sessionUri, 'hi', undefined, 'turn-1'); + // Wait until the resume's `sdk.startup` is in flight, then inject the + // update. Pre-fix the call hit the silent-drop branch because no + // provisional was registered for the resume. + await startupReached.p; + agent.setClientTools(sessionUri, 'c1', [{ name: 'resumed', inputSchema: { type: 'object' } }]); + startupGate.complete(); + await send; + + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + lastSnapshot: sdk.createSdkMcpServerCalls.at(-1)?.toolNames, + }, { + startupCount: 2, + lastSnapshot: ['resumed'], + }); + }); + + test('rebind failure leaves the client-tool diff dirty so the next send retries', async () => { + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + // Pause the iterator after the first result so the pipeline doesn't + // auto-rebind via "stream ended without result". + const advance = new DeferredPromise(); + sdk.queryAdvance = async (i: number) => { if (i === 2) { await advance.p; } }; + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + makeSystemInitMessage(sessionId), makeResultSuccess(sessionId), + ]; + + await agent.sendMessage(created.session, 'first', undefined, 'turn-1'); + assert.strictEqual(sdk.startupCallCount, 1); + + // Stage a rebind whose startup will reject. + agent.setClientTools(created.session, 'c1', [{ name: 'echo', inputSchema: { type: 'object' } }]); + sdk.startupRejection = new Error('simulated rebind startup failure'); + sdk.queryAdvance = undefined; + advance.complete(); + await assert.rejects(agent.sendMessage(created.session, 'second', undefined, 'turn-2')); + + // Pre-fix: `_buildClientMcpServers` consumed the diff, but the SDK + // startup that followed rejected without re-marking dirty, so the next + // send skipped the rebind branch and silently kept the stale server + // set. Post-fix: the rematerializer's catch re-marks dirty, so this + // send retries the rebind and succeeds. + sdk.startupRejection = undefined; + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'third', undefined, 'turn-3'); + assert.deepStrictEqual({ + startupCount: sdk.startupCallCount, + lastSnapshot: sdk.createSdkMcpServerCalls.at(-1)?.toolNames, + }, { + startupCount: 3, + lastSnapshot: ['echo'], + }); + }); + + // #endregion + // #endregion }); @@ -2982,8 +3325,12 @@ suite('ClaudeAgentSession (Phase 7 ยง3.2)', () => { // loop unwinds and the subprocess shuts down cleanly. const sdk = new FakeClaudeAgentSdkService(); const warm = new FakeWarmQuery(sdk); + const fakeConfigService: IAgentConfigurationService = { + getSessionConfigValues: () => undefined, + } as unknown as IAgentConfigurationService; const services = new ServiceCollection( [ILogService, new NullLogService()], + [IAgentConfigurationService, fakeConfigService], ); const instantiationService: IInstantiationService = disposables.add(new InstantiationService(services)); const dbRef = { object: new TestSessionDatabase(), dispose: () => { } }; @@ -2995,6 +3342,9 @@ suite('ClaudeAgentSession (Phase 7 ยง3.2)', () => { warm, new AbortController(), dbRef, + new PendingRequestRegistry(), + new SessionClientToolsDiff(), + 'default', )); const permission = session.requestPermission({ diff --git a/src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts b/src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts index fa5acab24e73f..3b00df664ac63 100644 --- a/src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeSdkMessageRouter.test.ts @@ -57,6 +57,7 @@ function createRouter(disposables: Pick): IRouterHarness URI.parse('claude:/sess-1'), dbRef, subagents, + undefined, )); const signals: AgentSignal[] = []; disposables.add(router.onDidProduceSignal(s => signals.push(s))); diff --git a/src/vs/platform/agentHost/test/node/claudeSdkPipeline.test.ts b/src/vs/platform/agentHost/test/node/claudeSdkPipeline.test.ts index a7f2f0998e72e..53031b62fbbeb 100644 --- a/src/vs/platform/agentHost/test/node/claudeSdkPipeline.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeSdkPipeline.test.ts @@ -116,6 +116,7 @@ function createPipeline(disposables: Pick): IPipelineHar controller, dbRef, subagents, + undefined, )); return { pipeline, warm, controller }; } diff --git a/src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts b/src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts index 07c90278b5db8..ced118a091ce9 100644 --- a/src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeSubagentResolver.test.ts @@ -59,6 +59,8 @@ class FakeSdkService implements IClaudeAgentSdkService { if (this.getSubagentMessagesRejection) { throw this.getSubagentMessagesRejection; } return this.subagentMessages.get(`${sessionId}::${agentId}`) ?? []; } + async createSdkMcpServer(): Promise { throw new Error('not implemented in test fake'); } + async tool(): Promise { throw new Error('not implemented in test fake'); } } function makeAgentToolCallTurn(toolCallId: string, opts: { prompt?: string; suffixText?: string; toolName?: string; status?: ToolCallStatus.Completed }): Turn { diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolMcpServer.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolMcpServer.test.ts new file mode 100644 index 0000000000000..5dd303748d372 --- /dev/null +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolMcpServer.test.ts @@ -0,0 +1,142 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; +import { PendingRequestRegistry } from '../../../common/pendingRequestRegistry.js'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; +import { + buildClientToolMcpServer, + CLAUDE_CLIENT_MCP_SERVER_NAME, + extractToolUseId, + hasClientToolNamePrefix, + stripClientToolNamePrefix, +} from '../../../node/claude/clientTools/claudeClientToolMcpServer.js'; +import type { IClaudeAgentSdkService } from '../../../node/claude/claudeAgentSdkService.js'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import type { McpSdkServerConfigWithInstance } from '@anthropic-ai/claude-agent-sdk'; + +function tool(over: Partial = {}): ToolDefinition { + return { + name: 'echo', + description: 'echoes', + inputSchema: { type: 'object', properties: { msg: { type: 'string' } }, required: ['msg'] }, + ...over, + }; +} + +interface RecordedTool { + name: string; + handler: (args: Record, extra: unknown) => Promise; +} + +function makeSdk(): { sdk: IClaudeAgentSdkService; recorded: RecordedTool[] } { + const recorded: RecordedTool[] = []; + const sdk = { + createSdkMcpServer: async (options: { name: string }) => + ({ name: options.name, instance: { __fake: true } } as unknown as McpSdkServerConfigWithInstance), + tool: async (name: string, _desc: string, _schema: unknown, handler: (args: Record, extra: unknown) => Promise) => { + const t = { name, handler }; + recorded.push(t); + return t as unknown as ReturnType; + }, + } as unknown as IClaudeAgentSdkService; + return { sdk, recorded }; +} + +suite('claudeClientToolMcpServer / buildClientToolMcpServer', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('registers all snapshot tools on the CLAUDE_CLIENT_MCP_SERVER_NAME server', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + const server = await buildClientToolMcpServer([tool({ name: 'a' }), tool({ name: 'b' })], id => registry.register(id), sdk); + assert.strictEqual(server.name, CLAUDE_CLIENT_MCP_SERVER_NAME); + assert.deepStrictEqual(recorded.map(t => t.name), ['a', 'b']); + }); + + test('handler recovers toolUseId from extra._meta and parks on the registry until responded', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + await buildClientToolMcpServer([tool()], id => registry.register(id), sdk); + const handler = recorded[0]!.handler; + const extra = { _meta: { 'claudecode/toolUseId': 'tu_42' } }; + const callPromise = handler({ msg: 'hi' }, extra); + // The handler must not have resolved yet โ€” it's parked on the registry. + const sentinel = Symbol(); + const raced = await Promise.race([callPromise, Promise.resolve(sentinel)]); + assert.strictEqual(raced, sentinel, 'handler should be parked'); + // Workbench echoes the completion via the same toolUseId key. + const expected: CallToolResult = { content: [{ type: 'text', text: 'done' }] }; + const settled = registry.respond('tu_42', expected); + assert.strictEqual(settled, true); + assert.deepStrictEqual(await callPromise, expected); + }); + + test('handler returns an error result when SDK omits the tool_use_id meta field', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + await buildClientToolMcpServer([tool()], id => registry.register(id), sdk); + const handler = recorded[0]!.handler; + const result = await handler({ msg: 'hi' }, { _meta: {} }); + assert.strictEqual(result.isError, true); + assert.strictEqual(result.content.length, 1); + assert.ok((result.content[0] as { type: string; text: string }).text.includes('tool_use_id')); + }); + + test('handler returns an error result when extra is not an object', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + await buildClientToolMcpServer([tool()], id => registry.register(id), sdk); + const handler = recorded[0]!.handler; + const result = await handler({ msg: 'hi' }, undefined); + assert.strictEqual(result.isError, true); + }); + + test('tools with missing description default to empty string (no crash)', async () => { + const { sdk, recorded } = makeSdk(); + const registry = new PendingRequestRegistry(); + await buildClientToolMcpServer([tool({ description: undefined })], id => registry.register(id), sdk); + assert.strictEqual(recorded.length, 1); + }); +}); + +suite('claudeClientToolMcpServer / extractToolUseId', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('extracts the value at extra._meta["claudecode/toolUseId"]', () => { + assert.strictEqual(extractToolUseId({ _meta: { 'claudecode/toolUseId': 'tu_1' } }), 'tu_1'); + }); + + test('returns undefined when extra is null / missing _meta / wrong shape', () => { + assert.strictEqual(extractToolUseId(null), undefined); + assert.strictEqual(extractToolUseId(undefined), undefined); + assert.strictEqual(extractToolUseId('not an object'), undefined); + assert.strictEqual(extractToolUseId({}), undefined); + assert.strictEqual(extractToolUseId({ _meta: null }), undefined); + assert.strictEqual(extractToolUseId({ _meta: {} }), undefined); + assert.strictEqual(extractToolUseId({ _meta: { 'claudecode/toolUseId': 42 } }), undefined); + }); +}); + +suite('claudeClientToolMcpServer / name prefix helpers', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('hasClientToolNamePrefix detects mcp__client__ prefix only', () => { + assert.strictEqual(hasClientToolNamePrefix('mcp__client__foo'), true); + assert.strictEqual(hasClientToolNamePrefix('mcp__other__foo'), false); + assert.strictEqual(hasClientToolNamePrefix('foo'), false); + assert.strictEqual(hasClientToolNamePrefix(''), false); + }); + + test('stripClientToolNamePrefix strips only the mcp__client__ prefix', () => { + assert.strictEqual(stripClientToolNamePrefix('mcp__client__foo'), 'foo'); + assert.strictEqual(stripClientToolNamePrefix('mcp__other__foo'), 'mcp__other__foo'); + assert.strictEqual(stripClientToolNamePrefix('foo'), 'foo'); + }); +}); diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolResult.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolResult.test.ts new file mode 100644 index 0000000000000..a59637f6881ac --- /dev/null +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeClientToolResult.test.ts @@ -0,0 +1,97 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; +import { convertToolCallResult } from '../../../node/claude/clientTools/claudeClientToolResult.js'; +import { ToolResultContentType, type ToolCallResult, type ToolResultContent } from '../../../common/state/protocol/channels-session/state.js'; + +function makeResult(over: Partial): ToolCallResult { + return { + success: true, + pastTenseMessage: 'did the thing', + ...over, + }; +} + +suite('claudeClientToolResult / convertToolCallResult', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('text-only result maps to MCP text blocks; no isError when success', () => { + const out = convertToolCallResult(makeResult({ + content: [{ type: ToolResultContentType.Text, text: 'hello' }], + }), 'tu_1'); + assert.deepStrictEqual(out.content, [{ type: 'text', text: 'hello' }]); + assert.strictEqual(out.isError, undefined); + }); + + test('error result carries isError=true', () => { + const out = convertToolCallResult(makeResult({ + success: false, + error: { message: 'boom' }, + content: [{ type: ToolResultContentType.Text, text: 'failed' }], + }), 'tu_2'); + assert.strictEqual(out.isError, true); + }); + + test('image embedded resource โ†’ MCP image block (field rename + repackage)', () => { + const out = convertToolCallResult(makeResult({ + content: [{ + type: ToolResultContentType.EmbeddedResource, + data: 'BASE64PNG', + contentType: 'image/png', + }], + }), 'tu_3'); + assert.deepStrictEqual(out.content[0], { + type: 'image', + data: 'BASE64PNG', + mimeType: 'image/png', + }); + }); + + test('non-image embedded resource โ†’ MCP resource block with synthesized claude-client:// URI', () => { + const out = convertToolCallResult(makeResult({ + content: [ + { type: ToolResultContentType.EmbeddedResource, data: 'BASE64PDF', contentType: 'application/pdf' }, + { type: ToolResultContentType.EmbeddedResource, data: 'BASE64ZIP', contentType: 'application/zip' }, + ], + }), 'tu/with-slash'); + // Per-call URI carries the tool_use_id (encoded) and the block index, so + // parallel calls with the same shape stay disambiguated. + assert.deepStrictEqual(out.content[0], { + type: 'resource', + resource: { uri: 'claude-client://tu%2Fwith-slash/0', mimeType: 'application/pdf', blob: 'BASE64PDF' }, + }); + assert.deepStrictEqual(out.content[1], { + type: 'resource', + resource: { uri: 'claude-client://tu%2Fwith-slash/1', mimeType: 'application/zip', blob: 'BASE64ZIP' }, + }); + }); + + test('unknown block kind collapses to a stringified text block (warn logged, no throw)', () => { + const originalWarn = console.warn; + let warned = false; + console.warn = () => { warned = true; }; + try { + const malformedBlock = { type: 'not-a-real-kind', ref: 'r1', mimeType: 'text/plain' } as unknown as ToolResultContent; + const out = convertToolCallResult(makeResult({ + content: [malformedBlock], + }), 'tu_5'); + assert.strictEqual(out.content[0].type, 'text'); + assert.ok(typeof (out.content[0] as { text: string }).text === 'string'); + assert.strictEqual(warned, true); + } finally { + console.warn = originalWarn; + } + }); + + test('structuredContent passes through unchanged', () => { + const out = convertToolCallResult(makeResult({ + structuredContent: { k: 'v' }, + }), 'tu_6'); + assert.deepStrictEqual(out.structuredContent, { k: 'v' }); + }); +}); diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts new file mode 100644 index 0000000000000..7f2a624903668 --- /dev/null +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeJsonSchemaToZod.test.ts @@ -0,0 +1,100 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { z } from 'zod'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; +import { jsonSchemaToZodRawShape } from '../../../node/claude/clientTools/claudeJsonSchemaToZod.js'; + +function parse(shape: Record, value: unknown) { + return z.object(shape).safeParse(value); +} + +suite('claudeJsonSchemaToZod', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('empty / undefined schema returns empty shape', () => { + assert.deepStrictEqual(Object.keys(jsonSchemaToZodRawShape(undefined)), []); + assert.deepStrictEqual(Object.keys(jsonSchemaToZodRawShape({ type: 'object' })), []); + }); + + test('primitives + required vs optional wrapping', () => { + const shape = jsonSchemaToZodRawShape({ + type: 'object', + properties: { + a: { type: 'string' }, + b: { type: 'number' }, + c: { type: 'integer' }, + d: { type: 'boolean' }, + }, + required: ['a'], + }); + assert.strictEqual(parse(shape, { a: 'x' }).success, true, 'omitting optional props OK'); + assert.strictEqual(parse(shape, {}).success, false, 'missing required a fails'); + assert.strictEqual(parse(shape, { a: 'x', c: 1.5 }).success, false, 'integer rejects float'); + assert.strictEqual(parse(shape, { a: 'x', c: 1, b: 2, d: true }).success, true); + }); + + test('arrays + nested objects + enum + oneOf + null', () => { + const shape = jsonSchemaToZodRawShape({ + type: 'object', + properties: { + list: { type: 'array', items: { type: 'string' } }, + nested: { + type: 'object', + properties: { inner: { type: 'number' } }, + required: ['inner'], + }, + color: { enum: ['red', 'blue', 'green'] }, + one: { enum: ['only'] }, + either: { oneOf: [{ type: 'string' }, { type: 'number' }] }, + nope: { type: 'null' }, + }, + required: ['list', 'nested', 'color', 'one', 'either', 'nope'], + }); + const ok = parse(shape, { + list: ['a', 'b'], + nested: { inner: 1 }, + color: 'red', + one: 'only', + either: 'hi', + nope: null, + }); + assert.strictEqual(ok.success, true); + assert.strictEqual(parse(shape, { list: [1], nested: { inner: 1 }, color: 'red', one: 'only', either: 'x', nope: null }).success, false, 'array items typed'); + assert.strictEqual(parse(shape, { list: [], nested: { inner: 1 }, color: 'purple', one: 'only', either: 'x', nope: null }).success, false, 'enum rejects unknown'); + assert.strictEqual(parse(shape, { list: [], nested: { inner: 1 }, color: 'red', one: 'only', either: true, nope: null }).success, false, 'oneOf rejects out-of-union'); + }); + + test('nullable + description + default survive conversion', () => { + const shape = jsonSchemaToZodRawShape({ + type: 'object', + properties: { + n: { type: 'string', nullable: true, description: 'a thing', default: 'd' }, + }, + required: ['n'], + }); + assert.strictEqual(parse(shape, { n: null }).success, true, 'nullable accepts null'); + assert.strictEqual(parse(shape, { n: 'hi' }).success, true); + assert.strictEqual(parse(shape, { n: 7 }).success, false); + const withDefault = parse(shape, {}); + assert.strictEqual(withDefault.success, true, 'default fills in missing'); + assert.strictEqual(withDefault.success && (withDefault.data as { n: string }).n, 'd'); + }); + + test('unsupported property schema falls back to z.any() โ€” never rejects the tool', () => { + const shape = jsonSchemaToZodRawShape({ + type: 'object', + properties: { + weird: { type: 'totally-bogus' as unknown as string }, + worse: null as unknown as object, + }, + required: ['weird'], + }); + assert.strictEqual(parse(shape, { weird: 42 }).success, true, 'any accepts anything'); + assert.strictEqual(parse(shape, { weird: { nested: true }, worse: 'also fine' }).success, true); + }); +}); diff --git a/src/vs/platform/agentHost/test/node/clientTools/claudeSessionClientToolsModel.test.ts b/src/vs/platform/agentHost/test/node/clientTools/claudeSessionClientToolsModel.test.ts new file mode 100644 index 0000000000000..16d3bbbb4aead --- /dev/null +++ b/src/vs/platform/agentHost/test/node/clientTools/claudeSessionClientToolsModel.test.ts @@ -0,0 +1,122 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../base/test/common/utils.js'; +import type { ToolDefinition } from '../../../common/state/protocol/state.js'; +import { SessionClientToolsDiff } from '../../../node/claude/clientTools/claudeSessionClientToolsModel.js'; + +const tool = (over: Partial = {}): ToolDefinition => ({ + name: 'echo', + description: 'echoes', + inputSchema: { type: 'object', properties: { msg: { type: 'string' } }, required: ['msg'] }, + ...over, +}); + +suite('SessionClientToolsDiff', () => { + + const disposables = ensureNoDisposablesAreLeakedInTestSuite(); + + test('fresh diff: state empty, no difference', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + assert.deepStrictEqual(diff.model.state.get(), { tools: undefined, clientId: undefined }); + assert.strictEqual(diff.hasDifference, false); + }); + + test('setTools(undefined โ†’ []) does NOT flip dirty (undefined โ‰ก [])', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([]); + assert.strictEqual(diff.hasDifference, false); + }); + + test('setTools with a real snapshot flips dirty', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()]); + assert.strictEqual(diff.hasDifference, true); + }); + + test('consume() returns the current state and clears dirty', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()], 'c1'); + const state = diff.consume(); + assert.deepStrictEqual(state, { tools: [tool()], clientId: 'c1' }); + assert.strictEqual(diff.hasDifference, false); + }); + + test('setTools with a structurally-equal snapshot does NOT re-flip dirty after consume', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()]); + diff.consume(); + diff.model.setTools([{ name: 'echo', description: 'echoes', inputSchema: { type: 'object', properties: { msg: { type: 'string' } }, required: ['msg'] } }]); + assert.strictEqual(diff.hasDifference, false); + }); + + test('C6: setTools racing async work after consume re-flips dirty via autorun', async () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool({ name: 'original' })]); + const state = diff.consume(); + assert.deepStrictEqual(state.tools, [tool({ name: 'original' })]); + await Promise.resolve(); + diff.model.setTools([tool({ name: 'racer' })]); + assert.strictEqual(diff.hasDifference, true); + }); + + test('markDirty re-flips after a failed downstream build', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()]); + diff.consume(); + assert.strictEqual(diff.hasDifference, false); + diff.markDirty(); + assert.strictEqual(diff.hasDifference, true); + }); + + test('hasDifference detects rename / description / inputSchema; ignores title', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool({ name: 'a' })]); + diff.consume(); + + diff.model.setTools([tool({ name: 'b' })]); + assert.strictEqual(diff.hasDifference, true); + diff.consume(); + + diff.model.setTools([tool({ name: 'b', description: 'new' })]); + assert.strictEqual(diff.hasDifference, true); + diff.consume(); + + diff.model.setTools([tool({ name: 'b', description: 'new', inputSchema: { type: 'object', properties: { msg: { type: 'number' } }, required: ['msg'] } })]); + assert.strictEqual(diff.hasDifference, true); + diff.consume(); + + diff.model.setTools([tool({ name: 'b', description: 'new', inputSchema: { type: 'object', properties: { msg: { type: 'number' } }, required: ['msg'] }, title: 'X' })]); + assert.strictEqual(diff.hasDifference, false, 'title is outside the diff scope'); + }); + + test('order-insensitive: reordering tools does not flip dirty', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool({ name: 'a' }), tool({ name: 'b' })]); + diff.consume(); + diff.model.setTools([tool({ name: 'b' }), tool({ name: 'a' })]); + assert.strictEqual(diff.hasDifference, false); + }); + + test('setTools writes clientId only when supplied', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()], 'c1'); + assert.strictEqual(diff.model.state.get().clientId, 'c1'); + diff.model.setTools([tool({ name: 'echo2' })]); + assert.strictEqual(diff.model.state.get().clientId, 'c1', 'omitted clientId leaves previous value'); + diff.model.setTools([tool()], 'c2'); + assert.strictEqual(diff.model.state.get().clientId, 'c2'); + }); + + test('clientId change alone flips dirty', () => { + const diff = disposables.add(new SessionClientToolsDiff()); + diff.model.setTools([tool()], 'c1'); + diff.consume(); + assert.strictEqual(diff.hasDifference, false); + diff.model.setTools([tool()], 'c2'); + assert.strictEqual(diff.hasDifference, true); + }); +}); diff --git a/test/unit/electron/renderer.html b/test/unit/electron/renderer.html index 3af99562e153d..4654a620b1775 100644 --- a/test/unit/electron/renderer.html +++ b/test/unit/electron/renderer.html @@ -46,6 +46,13 @@ // supported by blink/chromium. To work around this, we generate an import map that maps all node modules // to a blob that exports all properties of the module as named exports and the module itself as default. + // Some CommonJS modules export keys that are reserved words in ES module syntax + // (e.g. zod exports `enum`, `default`, `function`). Generating `export const enum = ...` + // for those would fail to parse, so skip them. Hoisted to outer scope so we don't + // reallocate the Set / regex for every dependency the import map covers. + const RESERVED_EXPORT_NAMES = new Set(['break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'new', 'null', 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'var', 'void', 'while', 'with', 'yield', 'let', 'static', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'await']); + const VALID_IDENTIFIER = /^[A-Za-z_$][A-Za-z0-9_$]*$/; + function asRequireBlobUri(name) { let _mod; try { @@ -61,6 +68,9 @@ // export all properties and the module itself as default let jsSrc = `const _mod = require('${name}')\n`; for (const name of Object.keys(_mod)) { + if (RESERVED_EXPORT_NAMES.has(name) || !VALID_IDENTIFIER.test(name)) { + continue; + } jsSrc += `export const ${name} = _mod['${name}'];\n`; } jsSrc += `export default _mod;\n`;