From 083cedfd2c48d310f3ff99c2795ac10a7ae75abc Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Thu, 16 Apr 2026 10:40:14 -0700 Subject: [PATCH 01/12] chore: update project agent configuration --- .agentsroom/.gitignore | 4 ++++ .agentsroom/agents.json | 1 + .agentsroom/prompts.json | 4 ++++ 3 files changed, 9 insertions(+) create mode 100644 .agentsroom/.gitignore create mode 100644 .agentsroom/agents.json create mode 100644 .agentsroom/prompts.json diff --git a/.agentsroom/.gitignore b/.agentsroom/.gitignore new file mode 100644 index 000000000..1acd1a387 --- /dev/null +++ b/.agentsroom/.gitignore @@ -0,0 +1,4 @@ +# AgentsRoom: personal files (not committed to git) +*-personal.json +agents-local.json +sessions/ diff --git a/.agentsroom/agents.json b/.agentsroom/agents.json new file mode 100644 index 000000000..0637a088a --- /dev/null +++ b/.agentsroom/agents.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/.agentsroom/prompts.json b/.agentsroom/prompts.json new file mode 100644 index 000000000..f4455d843 --- /dev/null +++ b/.agentsroom/prompts.json @@ -0,0 +1,4 @@ +{ + "folders": [], + "prompts": [] +} \ No newline at end of file From 57a8abdd5e9efa2c5e7a7c4044e5a56ec77fedf5 Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Thu, 16 Apr 2026 10:40:44 -0700 Subject: [PATCH 02/12] chore: update project agent configuration --- .agentsroom/agents.json | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.agentsroom/agents.json b/.agentsroom/agents.json index 0637a088a..61f6b9421 100644 --- a/.agentsroom/agents.json +++ b/.agentsroom/agents.json @@ -1 +1,9 @@ -[] \ No newline at end of file +[ + { + "role": "fullstack", + "model": "opus", + "customName": "Full-Stack Developer", + "isPersonal": false, + "id": "agent-1776361243376-3sekdc" + } +] \ No newline at end of file From 922d64d80750a32aa9652bc74d47a7ea74adc757 Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Thu, 16 Apr 2026 10:40:59 -0700 Subject: [PATCH 03/12] chore: update project agent configuration --- .agentsroom/agents.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.agentsroom/agents.json b/.agentsroom/agents.json index 61f6b9421..e9a83e418 100644 --- a/.agentsroom/agents.json +++ b/.agentsroom/agents.json @@ -4,6 +4,7 @@ "model": "opus", "customName": "Full-Stack Developer", "isPersonal": false, - "id": "agent-1776361243376-3sekdc" + "id": "agent-1776361243376-3sekdc", + "claudeSessionId": "96773a93-be2a-45a9-a732-ceb224d3d0e5" } ] \ No newline at end of file From 701e12894b2cdabe0e81c354c9b10572571a4635 Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Fri, 12 Jun 2026 18:07:02 -0700 Subject: [PATCH 04/12] feat: add @tanstack/ai-claude-code harness adapter and coding-agent example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New @tanstack/ai-claude-code package that runs Claude Code (via @anthropic-ai/claude-agent-sdk) as a TanStack AI chat backend. Unlike HTTP provider adapters, this is a harness adapter: Claude Code owns the agent loop and executes its built-in tools (bash, file edits, search) server-side. - Stream translator maps Agent SDK messages to AG-UI events; harness tool activity arrives as already-resolved TOOL_CALL_*/TOOL_CALL_RESULT pairs and runs always finish with stop/length (never tool_calls), so the engine never re-executes harness tools. Every started tool call is guaranteed a result (synthesized on abort) to keep the engine's pending-call scan safe. - TanStack toolDefinition() server tools are bridged into the harness as an in-process MCP server (raw JSON Schema passthrough, no zod round-trip). Client-side/approval tools fail fast — documented v1 limitation. - Stateful sessions: session id surfaced via a claude-code.session-id CUSTOM event; resume via modelOptions.sessionId (+ forkSession). - Structured output uses the SDK's native outputFormat json_schema. - settingSources defaults to ['project'] so servers don't inherit user-level ~/.claude config from the host machine. - E2E: excluded from the aimock matrix (subprocess can't carry X-Test-Id isolation); covered by 44 unit tests plus a gated live smoke spec (CLAUDE_CODE_E2E=1). Also adds examples/ts-react-coding-agent: a TanStack Start app demoing session resume, the harness tool timeline, read-only/edit permission modes, tool bridging, and a sandboxed scratch workspace — with the agent registry structured so future Codex/Gemini CLI harness adapters can slot in. Co-Authored-By: Claude Fable 5 --- .changeset/ai-claude-code-initial.md | 5 + docs/adapters/claude-code.md | 181 +++++++ docs/config.json | 5 + examples/ts-react-coding-agent/README.md | 50 ++ examples/ts-react-coding-agent/package.json | 36 ++ .../ts-react-coding-agent/src/lib/agents.ts | 35 ++ .../src/lib/style-guide-tool.ts | 26 + .../src/routeTree.gen.ts | 86 ++++ examples/ts-react-coding-agent/src/router.tsx | 13 + .../src/routes/__root.tsx | 41 ++ .../src/routes/api.chat.ts | 96 ++++ .../src/routes/index.tsx | 209 ++++++++ examples/ts-react-coding-agent/src/styles.css | 1 + examples/ts-react-coding-agent/tsconfig.json | 28 + examples/ts-react-coding-agent/vite.config.ts | 30 ++ .../ts-react-coding-agent/workspace/README.md | 13 + .../workspace/temperature.js | 12 + .../ts-react-coding-agent/workspace/todo.md | 5 + packages/ai-claude-code/README.md | 18 + packages/ai-claude-code/package.json | 60 +++ packages/ai-claude-code/src/adapters/text.ts | 372 ++++++++++++++ packages/ai-claude-code/src/index.ts | 19 + .../ai-claude-code/src/messages/prompt.ts | 68 +++ packages/ai-claude-code/src/model-meta.ts | 21 + .../ai-claude-code/src/provider-options.ts | 32 ++ .../ai-claude-code/src/stream/sdk-types.ts | 135 +++++ .../ai-claude-code/src/stream/translate.ts | 483 +++++++++++++++++ packages/ai-claude-code/src/tools/bridge.ts | 64 +++ packages/ai-claude-code/tests/bridge.test.ts | 103 ++++ packages/ai-claude-code/tests/prompt.test.ts | 97 ++++ .../ai-claude-code/tests/text-adapter.test.ts | 370 +++++++++++++ .../ai-claude-code/tests/translate.test.ts | 485 ++++++++++++++++++ packages/ai-claude-code/tsconfig.json | 8 + packages/ai-claude-code/vite.config.ts | 37 ++ pnpm-lock.yaml | 181 +++++++ testing/e2e/README.md | 2 + testing/e2e/package.json | 1 + testing/e2e/tests/claude-code.spec.ts | 72 +++ 38 files changed, 3500 insertions(+) create mode 100644 .changeset/ai-claude-code-initial.md create mode 100644 docs/adapters/claude-code.md create mode 100644 examples/ts-react-coding-agent/README.md create mode 100644 examples/ts-react-coding-agent/package.json create mode 100644 examples/ts-react-coding-agent/src/lib/agents.ts create mode 100644 examples/ts-react-coding-agent/src/lib/style-guide-tool.ts create mode 100644 examples/ts-react-coding-agent/src/routeTree.gen.ts create mode 100644 examples/ts-react-coding-agent/src/router.tsx create mode 100644 examples/ts-react-coding-agent/src/routes/__root.tsx create mode 100644 examples/ts-react-coding-agent/src/routes/api.chat.ts create mode 100644 examples/ts-react-coding-agent/src/routes/index.tsx create mode 100644 examples/ts-react-coding-agent/src/styles.css create mode 100644 examples/ts-react-coding-agent/tsconfig.json create mode 100644 examples/ts-react-coding-agent/vite.config.ts create mode 100644 examples/ts-react-coding-agent/workspace/README.md create mode 100644 examples/ts-react-coding-agent/workspace/temperature.js create mode 100644 examples/ts-react-coding-agent/workspace/todo.md create mode 100644 packages/ai-claude-code/README.md create mode 100644 packages/ai-claude-code/package.json create mode 100644 packages/ai-claude-code/src/adapters/text.ts create mode 100644 packages/ai-claude-code/src/index.ts create mode 100644 packages/ai-claude-code/src/messages/prompt.ts create mode 100644 packages/ai-claude-code/src/model-meta.ts create mode 100644 packages/ai-claude-code/src/provider-options.ts create mode 100644 packages/ai-claude-code/src/stream/sdk-types.ts create mode 100644 packages/ai-claude-code/src/stream/translate.ts create mode 100644 packages/ai-claude-code/src/tools/bridge.ts create mode 100644 packages/ai-claude-code/tests/bridge.test.ts create mode 100644 packages/ai-claude-code/tests/prompt.test.ts create mode 100644 packages/ai-claude-code/tests/text-adapter.test.ts create mode 100644 packages/ai-claude-code/tests/translate.test.ts create mode 100644 packages/ai-claude-code/tsconfig.json create mode 100644 packages/ai-claude-code/vite.config.ts create mode 100644 testing/e2e/tests/claude-code.spec.ts diff --git a/.changeset/ai-claude-code-initial.md b/.changeset/ai-claude-code-initial.md new file mode 100644 index 000000000..4cca73f0f --- /dev/null +++ b/.changeset/ai-claude-code-initial.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-claude-code': minor +--- + +New `@tanstack/ai-claude-code` package: a Claude Code harness adapter that runs `@anthropic-ai/claude-agent-sdk` as a TanStack AI chat backend. Claude Code owns the agent loop and executes its built-in tools (bash, file edits, search) server-side; their activity streams back as resolved tool-call events. TanStack `toolDefinition()` server tools are bridged into the harness via an in-process MCP server, sessions are resumable via `modelOptions.sessionId` (surfaced through a `claude-code.session-id` custom event), and structured output uses the harness's native JSON-schema output format. diff --git a/docs/adapters/claude-code.md b/docs/adapters/claude-code.md new file mode 100644 index 000000000..3f0f6dfbc --- /dev/null +++ b/docs/adapters/claude-code.md @@ -0,0 +1,181 @@ +--- +title: Claude Code +id: claude-code-adapter +order: 11 +description: "Use Claude Code as a chat backend in TanStack AI — agent harness with local tool execution, stateful coding sessions, and tool bridging via @tanstack/ai-claude-code." +keywords: + - tanstack ai + - claude code + - claude agent sdk + - anthropic + - harness + - agent + - coding agent + - adapter +--- + +The Claude Code adapter runs [Claude Code](https://docs.anthropic.com/en/docs/claude-code) (via the `@anthropic-ai/claude-agent-sdk`) as a chat backend. Unlike HTTP provider adapters, this is a **harness adapter**: Claude Code runs its own agent loop and executes its own tools — bash, file reads and edits, glob/grep search, web search — locally on your server. Each `chat()` call runs one full harness turn; the harness's tool activity streams back as already-resolved tool-call events your UI can render. + +> **Server-only.** The harness spawns the Claude Code runtime as a subprocess, so this adapter only works in a Node.js server environment — never in the browser. Treat it like giving Claude a shell on the machine it runs on, and configure permissions accordingly. + +## Installation + +```bash +npm install @tanstack/ai-claude-code +``` + +A runnable demo lives at [`examples/ts-react-coding-agent`](https://github.com/TanStack/ai/tree/main/examples/ts-react-coding-agent) — session resume, the harness tool timeline, permission modes, and tool bridging, wired into a React app. + +## Authentication + +The harness resolves credentials the same way Claude Code does: + +- `ANTHROPIC_API_KEY` in the server's environment (or the `apiKey` config option), or +- an existing Claude subscription login on the machine (`claude login`). + +## Basic Usage + +```typescript +import { chat } from "@tanstack/ai"; +import { claudeCodeText } from "@tanstack/ai-claude-code"; + +const stream = chat({ + adapter: claudeCodeText("claude-opus-4-8", { + cwd: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: [{ role: "user", content: "Fix the failing test in utils.test.ts" }], +}); +``` + +## Configuration + +| Option | Description | +| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | +| `cwd` | Working directory for the harness session. Defaults to `process.cwd()`. | +| `permissionMode` | Claude Code permission mode (`'default'`, `'acceptEdits'`, `'bypassPermissions'`, `'plan'`, `'dontAsk'`, `'auto'`). See the permissions note below. | +| `allowedTools` | Built-in tools the harness may use without prompting (e.g. `['Read', 'Grep', 'Bash(npm test:*)']`). | +| `disallowedTools` | Built-in tools removed from the harness entirely. | +| `maxTurns` | Maximum harness-internal turns per run. | +| `systemPromptMode` | `'append'` (default) keeps Claude Code's preset system prompt and appends your `systemPrompts`; `'replace'` sends yours as the entire prompt. | +| `mcpServers` | Extra MCP servers passed through to the harness untouched. | +| `apiKey` | Anthropic API key for the harness subprocess. | +| `env` | Extra environment variables for the harness subprocess. | +| `pathToClaudeCodeExecutable` | Use a specific Claude Code executable instead of the SDK's bundled one. | +| `streamPartials` | Emit true token-level text deltas (default `true`). | +| `canUseTool` | Custom permission handler; replaces the adapter's default handler. | +| `settingSources` | Claude Code settings tiers to load. Default `['project']`: the `cwd`'s CLAUDE.md and project settings apply, but user-level config on the host (`~/.claude` plugins, hooks, skills) is ignored. Pass `['user', 'project', 'local']` for CLI-equivalent behavior, or `[]` for full isolation. | + +**Permissions on headless servers.** Without an explicit `permissionMode` or `canUseTool`, the adapter installs a safe default handler: bridged TanStack tools always run, and any built-in tool call that would normally prompt a human is denied with guidance instead of hanging the request. To let the harness edit files or run commands, set `permissionMode: 'acceptEdits'` / `'bypassPermissions'`, or enumerate `allowedTools`. + +## Stateful Sessions + +Claude Code sessions are stateful — the harness keeps the full working context (files read, commands run, conclusions reached) between turns. The adapter surfaces the session id of every run as a custom stream event named `claude-code.session-id`; thread it back via `modelOptions.sessionId` to resume the session. When resuming, only the latest user message is sent — the harness already holds the prior context. + +Server endpoint: + +```typescript +import { + chat, + chatParamsFromRequest, + toServerSentEventsResponse, +} from "@tanstack/ai"; +import { claudeCodeText } from "@tanstack/ai-claude-code"; + +export async function POST(request: Request) { + const params = await chatParamsFromRequest(request); + + // Extra fields the client puts in the connection `body` arrive here. + const sessionId = + typeof params.forwardedProps.sessionId === "string" + ? params.forwardedProps.sessionId + : undefined; + + const stream = chat({ + adapter: claudeCodeText("claude-opus-4-8", { + cwd: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: params.messages, + modelOptions: { sessionId }, + }); + + return toServerSentEventsResponse(stream); +} +``` + +Client (React) — capture the session id from the custom event and send it back on subsequent requests: + +```typescript +import { useState } from "react"; +import { useChat } from "@tanstack/ai-react"; +import { fetchServerSentEvents } from "@tanstack/ai-client"; + +function CodingAssistant() { + const [sessionId, setSessionId] = useState(undefined); + + const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents("/api/chat", () => ({ + body: { sessionId }, + })), + onCustomEvent: (name, value) => { + if ( + name === "claude-code.session-id" && + typeof value === "object" && + value !== null && + "sessionId" in value && + typeof value.sessionId === "string" + ) { + setSessionId(value.sessionId); + } + }, + }); + + // ... render messages; harness tool activity (Bash, Edit, Read, ...) + // arrives as regular tool-call parts with their results attached. +} +``` + +Sessions are stored on the machine that ran them (`~/.claude/projects/`), so resuming only works on the same server instance. Pass `modelOptions: { forkSession: true }` alongside `sessionId` to branch a session instead of continuing it. + +## Tools + +Two kinds of tools flow through this adapter: + +1. **Built-in harness tools** (`Bash`, `Read`, `Write`, `Edit`, `Glob`, `Grep`, `WebSearch`, ...) are executed by Claude Code itself. Their activity streams back as tool-call events with results already attached, so `useChat` UIs render them with no extra wiring — but your code never executes them. + +2. **Your TanStack tools** are bridged *into* the harness as an in-process MCP server. Define them as usual with `toolDefinition().server()`; the model sees them as `mcp__tanstack__` and the adapter strips the prefix on the way back out, so events match the names you registered. + +```typescript +import { z } from "zod"; +import { chat, toolDefinition } from "@tanstack/ai"; +import { claudeCodeText } from "@tanstack/ai-claude-code"; + +const lookupTicket = toolDefinition({ + name: "lookup_ticket", + description: "Look up an issue ticket by id", + inputSchema: z.object({ ticketId: z.string() }), +}).server(async ({ ticketId }) => { + return { ticketId, status: "open", title: "Crash on startup" }; +}); + +const stream = chat({ + adapter: claudeCodeText("claude-opus-4-8"), + messages: [{ role: "user", content: "What's the status of ticket T-123?" }], + tools: [lookupTicket], +}); +``` + +**Client-side and approval-gated tools are not supported.** The harness executes tools inside a live subprocess, which cannot pause across HTTP requests to wait for a browser round-trip or a human approval. Passing a tool without a server `execute()` implementation — or one marked `needsApproval` — fails fast with a descriptive error. Run those tools outside the harness with a regular provider adapter. + +## Structured Output + +`structuredOutput()` uses the harness's native JSON-schema output format in a one-shot run (single turn, no tools). It works for finalization after a chat, but a plain provider adapter (e.g. `@tanstack/ai-anthropic`) is the better choice when structured extraction is the primary job — it's faster and doesn't spawn a subprocess. + +## Limitations + +- **Server-only (Node).** The harness spawns a subprocess; Windows support is untested. +- **The harness owns the agent loop.** TanStack's agent-loop strategies and per-iteration middleware don't apply inside a harness turn; `maxTurns` is the equivalent control. +- **No sampling controls.** `temperature`-style options don't exist here. +- **Sessions are machine-local.** Resume requires hitting the same server instance. +- **Cold starts.** Each call spawns a harness turn; expect higher first-token latency than HTTP adapters. diff --git a/docs/config.json b/docs/config.json index e3fc3b712..a53629291 100644 --- a/docs/config.json +++ b/docs/config.json @@ -446,6 +446,11 @@ "label": "OpenAI-Compatible", "to": "adapters/openai-compatible", "addedAt": "2026-06-01" + }, + { + "label": "Claude Code", + "to": "adapters/claude-code", + "addedAt": "2026-06-12" } ] }, diff --git a/examples/ts-react-coding-agent/README.md b/examples/ts-react-coding-agent/README.md new file mode 100644 index 000000000..4576afdea --- /dev/null +++ b/examples/ts-react-coding-agent/README.md @@ -0,0 +1,50 @@ +# TanStack AI — Coding Agent Example + +A React (TanStack Start) app that drives a **coding-agent harness** through +TanStack AI — currently [Claude Code](https://docs.anthropic.com/en/docs/claude-code) +via `@tanstack/ai-claude-code`, with the agent registry structured so future +harness adapters (Codex, Gemini CLI, ...) can slot in. + +Unlike a normal chat example, the agent here runs its own loop server-side +and executes its own tools — reading, searching, and (in Edit mode) editing +the files in `workspace/`. Its tool activity streams into the UI as a +timeline of resolved tool calls. + +## What it demonstrates + +- **Session resume** — the server emits the harness session id via a + `claude-code.session-id` custom event; the client pins it and sends it + back through `forwardedProps` → `modelOptions.sessionId`, so follow-ups + continue the same stateful session. +- **Harness tool timeline** — built-in tools (Read, Grep, Edit, ...) arrive + as already-resolved tool-call parts and render with their inputs/outputs. +- **Permission modes** — a Read-only/Edit toggle maps to `disallowedTools` + vs `permissionMode: 'acceptEdits'`. Shell commands are denied by the + adapter's default permission policy either way — ask it to run something + and watch the denial show up in the timeline. +- **Tool bridging** — `lookup_style_guide` is an ordinary TanStack server + tool the harness calls from inside its own loop. +- **Sandboxed cwd** — the agent only works inside `workspace/`. + +## Running + +This is a server-spawning example: each chat turn launches the Claude Code +runtime as a subprocess on your machine. + +1. Auth: set `ANTHROPIC_API_KEY`, or have a local Claude Code login + (`claude login`). +2. From this directory: + + ```bash + pnpm install + pnpm dev + ``` + +3. Open http://localhost:3000 and try: + - "What files are in this project, and what do they do?" (Read-only) + - Switch to **Edit mode**: "Fix the bug in temperature.js" — note it + calls `lookup_style_guide` first. + - "Now update todo.md to check off what you did" — same session, no + re-explaining. + +Reset the demo workspace afterwards with `git checkout -- workspace/`. diff --git a/examples/ts-react-coding-agent/package.json b/examples/ts-react-coding-agent/package.json new file mode 100644 index 000000000..cccab8e65 --- /dev/null +++ b/examples/ts-react-coding-agent/package.json @@ -0,0 +1,36 @@ +{ + "name": "ts-react-coding-agent", + "private": true, + "type": "module", + "scripts": { + "dev": "vite dev --port 3000", + "build": "vite build", + "serve": "vite preview", + "test": "exit 0", + "test:types": "tsc --noEmit" + }, + "dependencies": { + "@tailwindcss/vite": "^4.1.18", + "@tanstack/ai": "workspace:*", + "@tanstack/ai-claude-code": "workspace:*", + "@tanstack/ai-client": "workspace:*", + "@tanstack/ai-react": "workspace:*", + "@tanstack/nitro-v2-vite-plugin": "^1.154.7", + "@tanstack/react-router": "^1.158.4", + "@tanstack/react-start": "^1.159.0", + "@tanstack/router-plugin": "^1.158.4", + "react": "^19.2.3", + "react-dom": "^19.2.3", + "tailwindcss": "^4.1.18", + "vite-tsconfig-paths": "^5.1.4", + "zod": "^4.2.0" + }, + "devDependencies": { + "@types/node": "^24.10.1", + "@types/react": "^19.2.7", + "@types/react-dom": "^19.2.3", + "@vitejs/plugin-react": "^5.1.2", + "typescript": "5.9.3", + "vite": "^7.3.3" + } +} diff --git a/examples/ts-react-coding-agent/src/lib/agents.ts b/examples/ts-react-coding-agent/src/lib/agents.ts new file mode 100644 index 000000000..49e236ec0 --- /dev/null +++ b/examples/ts-react-coding-agent/src/lib/agents.ts @@ -0,0 +1,35 @@ +/** + * Registry of coding-agent harnesses this example can drive. + * + * Each entry maps to a harness adapter on the server (see + * `src/routes/api.chat.ts`). Today only Claude Code ships; Codex and + * Gemini CLI slots are reserved for future harness adapters. + */ +export const AGENTS = [ + { id: 'claude-code', label: 'Claude Code', available: true }, + { id: 'codex', label: 'Codex (coming soon)', available: false }, + { id: 'gemini-cli', label: 'Gemini CLI (coming soon)', available: false }, +] as const + +/** Agent ids with a working adapter behind them. */ +export type AgentId = 'claude-code' + +export const DEFAULT_AGENT: AgentId = 'claude-code' + +export function isAgentId(value: unknown): value is AgentId { + return value === 'claude-code' +} + +/** + * What the agent is allowed to do in the workspace: + * - `read-only`: it can read and search, but file edits and shell commands + * are blocked. + * - `edit`: file edits are auto-approved; shell commands still get denied by + * the adapter's default permission policy (a deliberate demo of the + * permission system). + */ +export type AgentMode = 'read-only' | 'edit' + +export function isAgentMode(value: unknown): value is AgentMode { + return value === 'read-only' || value === 'edit' +} diff --git a/examples/ts-react-coding-agent/src/lib/style-guide-tool.ts b/examples/ts-react-coding-agent/src/lib/style-guide-tool.ts new file mode 100644 index 000000000..679356a48 --- /dev/null +++ b/examples/ts-react-coding-agent/src/lib/style-guide-tool.ts @@ -0,0 +1,26 @@ +import { z } from 'zod' +import { toolDefinition } from '@tanstack/ai' + +/** + * A TanStack server tool bridged *into* the harness. The agent sees it as + * `mcp__tanstack__lookup_style_guide`, calls it like any built-in tool, and + * the adapter strips the prefix so the UI shows `lookup_style_guide`. + */ +export const lookupStyleGuide = toolDefinition({ + name: 'lookup_style_guide', + description: + "Look up this project's coding style guide. Call this before writing or editing any code so your changes match the house style.", + inputSchema: z.object({ + topic: z + .string() + .describe('What you are about to write, e.g. "functions", "naming"'), + }), +}).server(({ topic }) => ({ + topic, + rules: [ + 'Use arrow functions assigned to const, never function declarations.', + 'Prefer single quotes and no semicolons.', + 'Every exported function gets a one-line JSDoc comment.', + 'Keep files under 100 lines; split modules instead of growing them.', + ], +})) diff --git a/examples/ts-react-coding-agent/src/routeTree.gen.ts b/examples/ts-react-coding-agent/src/routeTree.gen.ts new file mode 100644 index 000000000..861dc17e2 --- /dev/null +++ b/examples/ts-react-coding-agent/src/routeTree.gen.ts @@ -0,0 +1,86 @@ +/* eslint-disable */ + +// @ts-nocheck + +// noinspection JSUnusedGlobalSymbols + +// This file was automatically generated by TanStack Router. +// You should NOT make any changes in this file as it will be overwritten. +// Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified. + +import { Route as rootRouteImport } from './routes/__root' +import { Route as IndexRouteImport } from './routes/index' +import { Route as ApiChatRouteImport } from './routes/api.chat' + +const IndexRoute = IndexRouteImport.update({ + id: '/', + path: '/', + getParentRoute: () => rootRouteImport, +} as any) +const ApiChatRoute = ApiChatRouteImport.update({ + id: '/api/chat', + path: '/api/chat', + getParentRoute: () => rootRouteImport, +} as any) + +export interface FileRoutesByFullPath { + '/': typeof IndexRoute + '/api/chat': typeof ApiChatRoute +} +export interface FileRoutesByTo { + '/': typeof IndexRoute + '/api/chat': typeof ApiChatRoute +} +export interface FileRoutesById { + __root__: typeof rootRouteImport + '/': typeof IndexRoute + '/api/chat': typeof ApiChatRoute +} +export interface FileRouteTypes { + fileRoutesByFullPath: FileRoutesByFullPath + fullPaths: '/' | '/api/chat' + fileRoutesByTo: FileRoutesByTo + to: '/' | '/api/chat' + id: '__root__' | '/' | '/api/chat' + fileRoutesById: FileRoutesById +} +export interface RootRouteChildren { + IndexRoute: typeof IndexRoute + ApiChatRoute: typeof ApiChatRoute +} + +declare module '@tanstack/react-router' { + interface FileRoutesByPath { + '/': { + id: '/' + path: '/' + fullPath: '/' + preLoaderRoute: typeof IndexRouteImport + parentRoute: typeof rootRouteImport + } + '/api/chat': { + id: '/api/chat' + path: '/api/chat' + fullPath: '/api/chat' + preLoaderRoute: typeof ApiChatRouteImport + parentRoute: typeof rootRouteImport + } + } +} + +const rootRouteChildren: RootRouteChildren = { + IndexRoute: IndexRoute, + ApiChatRoute: ApiChatRoute, +} +export const routeTree = rootRouteImport + ._addFileChildren(rootRouteChildren) + ._addFileTypes() + +import type { getRouter } from './router.tsx' +import type { createStart } from '@tanstack/react-start' +declare module '@tanstack/react-start' { + interface Register { + ssr: true + router: Awaited> + } +} diff --git a/examples/ts-react-coding-agent/src/router.tsx b/examples/ts-react-coding-agent/src/router.tsx new file mode 100644 index 000000000..ee1edab88 --- /dev/null +++ b/examples/ts-react-coding-agent/src/router.tsx @@ -0,0 +1,13 @@ +import { createRouter } from '@tanstack/react-router' + +// Import the generated route tree +import { routeTree } from './routeTree.gen' + +// Create a new router instance +export const getRouter = () => { + return createRouter({ + routeTree, + scrollRestoration: true, + defaultPreloadStaleTime: 0, + }) +} diff --git a/examples/ts-react-coding-agent/src/routes/__root.tsx b/examples/ts-react-coding-agent/src/routes/__root.tsx new file mode 100644 index 000000000..950ce1bcc --- /dev/null +++ b/examples/ts-react-coding-agent/src/routes/__root.tsx @@ -0,0 +1,41 @@ +import { HeadContent, Scripts, createRootRoute } from '@tanstack/react-router' +import appCss from '../styles.css?url' + +export const Route = createRootRoute({ + head: () => ({ + meta: [ + { + charSet: 'utf-8', + }, + { + name: 'viewport', + content: 'width=device-width, initial-scale=1', + }, + { + title: 'TanStack AI — Coding Agent', + }, + ], + links: [ + { + rel: 'stylesheet', + href: appCss, + }, + ], + }), + + shellComponent: RootDocument, +}) + +function RootDocument({ children }: { children: React.ReactNode }) { + return ( + + + + + + {children} + + + + ) +} diff --git a/examples/ts-react-coding-agent/src/routes/api.chat.ts b/examples/ts-react-coding-agent/src/routes/api.chat.ts new file mode 100644 index 000000000..445d6be05 --- /dev/null +++ b/examples/ts-react-coding-agent/src/routes/api.chat.ts @@ -0,0 +1,96 @@ +import path from 'node:path' +import { createFileRoute } from '@tanstack/react-router' +import { + chat, + chatParamsFromRequestBody, + toServerSentEventsResponse, +} from '@tanstack/ai' +import { claudeCodeText } from '@tanstack/ai-claude-code' +import { isAgentId, isAgentMode } from '@/lib/agents' +import { lookupStyleGuide } from '@/lib/style-guide-tool' +import type { AgentId, AgentMode } from '@/lib/agents' +import type { AnyTextAdapter } from '@tanstack/ai' + +const SYSTEM_PROMPT = `You are a coding assistant working on the small demo +project mounted in your working directory. Before writing or editing any +code, call the lookup_style_guide tool and follow what it says. Keep your +answers short — the user is watching your tool activity stream by.` + +/** + * One harness adapter per agent id. This is the seam where future harness + * adapters (Codex, Gemini CLI, ...) slot in as additional cases. + */ +function createAdapter( + agentId: AgentId, + mode: AgentMode, + cwd: string, +): AnyTextAdapter { + switch (agentId) { + case 'claude-code': + return claudeCodeText('claude-opus-4-8', { + cwd, + maxTurns: 25, + ...(mode === 'edit' + ? // Auto-approve file edits. Shell commands still go through the + // adapter's default permission policy, which denies them with an + // explanatory message — watch for it in the tool timeline. + { permissionMode: 'acceptEdits' } + : // Read-only: searching and reading work, mutating tools are + // removed from the harness entirely. + { disallowedTools: ['Write', 'Edit', 'NotebookEdit', 'Bash'] }), + }) + } +} + +export const Route = createFileRoute('/api/chat')({ + server: { + handlers: { + POST: async ({ request }) => { + if (request.signal.aborted) { + return new Response(null, { status: 499 }) + } + const abortController = new AbortController() + + let params + try { + params = await chatParamsFromRequestBody(await request.json()) + } catch (error) { + return new Response( + error instanceof Error ? error.message : 'Bad request', + { status: 400 }, + ) + } + + // Client-sent settings arrive via forwardedProps. Validate against + // the allowlist — never feed client strings straight into config. + const agentId = isAgentId(params.forwardedProps.agentId) + ? params.forwardedProps.agentId + : 'claude-code' + const mode = isAgentMode(params.forwardedProps.mode) + ? params.forwardedProps.mode + : 'read-only' + const sessionId = + typeof params.forwardedProps.sessionId === 'string' && + params.forwardedProps.sessionId !== '' + ? params.forwardedProps.sessionId + : undefined + + // The agent only ever works inside the example's scratch workspace. + const cwd = path.join(process.cwd(), 'workspace') + + const stream = chat({ + adapter: createAdapter(agentId, mode, cwd), + messages: params.messages, + systemPrompts: [SYSTEM_PROMPT], + tools: [lookupStyleGuide], + modelOptions: { sessionId }, + threadId: params.threadId, + runId: params.runId, + abortController, + }) + + return toServerSentEventsResponse(stream, { abortController }) + }, + }, + }, +}) diff --git a/examples/ts-react-coding-agent/src/routes/index.tsx b/examples/ts-react-coding-agent/src/routes/index.tsx new file mode 100644 index 000000000..0938968cd --- /dev/null +++ b/examples/ts-react-coding-agent/src/routes/index.tsx @@ -0,0 +1,209 @@ +import { useMemo, useState } from 'react' +import { createFileRoute } from '@tanstack/react-router' +import { fetchServerSentEvents, useChat } from '@tanstack/ai-react' +import { AGENTS, DEFAULT_AGENT } from '@/lib/agents' +import type { UIMessage } from '@tanstack/ai-react' +import type { AgentMode } from '@/lib/agents' + +export const Route = createFileRoute('/')({ + component: CodingAgentPage, +}) + +function ToolCallCard({ + part, +}: { + part: Extract +}) { + const args = useMemo(() => { + try { + return JSON.stringify(JSON.parse(part.arguments), null, 2) + } catch { + return part.arguments + } + }, [part.arguments]) + + const output = useMemo(() => { + if (part.output === undefined) return undefined + return typeof part.output === 'string' + ? part.output + : JSON.stringify(part.output, null, 2) + }, [part.output]) + + return ( +
+ + 🔧 {part.name} + + {output !== undefined ? 'done' : part.state} + + +
+
+          {args}
+        
+ {output !== undefined && ( +
+            {output}
+          
+ )} +
+
+ ) +} + +function Message({ message }: { message: UIMessage }) { + const isUser = message.role === 'user' + return ( +
+
+ {message.parts.map((part, index) => { + if (part.type === 'text' && part.content.trim()) { + return ( +

+ {part.content} +

+ ) + } + if (part.type === 'thinking' && part.content.trim()) { + return ( +
+ + 💭 thinking… + +

+ {part.content} +

+
+ ) + } + if (part.type === 'tool-call') { + return + } + return null + })} +
+
+ ) +} + +function CodingAgentPage() { + const [agentId, setAgentId] = useState(DEFAULT_AGENT) + const [mode, setMode] = useState('read-only') + const [sessionId, setSessionId] = useState(undefined) + const [input, setInput] = useState('') + + const body = useMemo( + () => ({ agentId, mode, sessionId }), + [agentId, mode, sessionId], + ) + + const { messages, sendMessage, isLoading, clear, error } = useChat({ + connection: fetchServerSentEvents('/api/chat'), + body, + onCustomEvent: (eventType, data) => { + if ( + eventType === 'claude-code.session-id' && + typeof data === 'object' && + data !== null && + 'sessionId' in data && + typeof data.sessionId === 'string' + ) { + setSessionId(data.sessionId) + } + }, + }) + + const newSession = () => { + setSessionId(undefined) + clear() + } + + const send = () => { + const text = input.trim() + if (!text || isLoading) return + setInput('') + void sendMessage(text) + } + + return ( +
+
+

Coding Agent

+ + + +
+ +
+ {sessionId + ? `Resuming Claude Code session ${sessionId.slice(0, 8)}… — follow-ups send only your latest message.` + : 'No session yet — the first reply starts one and pins it via the claude-code.session-id event.'} +
+ +
+ {messages.length === 0 && ( +

+ Try: “What files are in this project, and what do they do?” — then + switch to Edit mode and ask it to fix the bug in{' '} + workspace/temperature.js. +

+ )} + {messages.map((message) => ( + + ))} + {error && ( +

+ {String(error)} +

+ )} +
+ +
+ setInput(event.target.value)} + onKeyDown={(event) => { + if (event.key === 'Enter') send() + }} + placeholder="Ask the agent to explore or change the workspace…" + className="flex-1 rounded border border-gray-700 bg-gray-900 px-3 py-2 outline-none focus:border-gray-500" + /> + +
+
+ ) +} diff --git a/examples/ts-react-coding-agent/src/styles.css b/examples/ts-react-coding-agent/src/styles.css new file mode 100644 index 000000000..d4b507858 --- /dev/null +++ b/examples/ts-react-coding-agent/src/styles.css @@ -0,0 +1 @@ +@import 'tailwindcss'; diff --git a/examples/ts-react-coding-agent/tsconfig.json b/examples/ts-react-coding-agent/tsconfig.json new file mode 100644 index 000000000..477479fb7 --- /dev/null +++ b/examples/ts-react-coding-agent/tsconfig.json @@ -0,0 +1,28 @@ +{ + "include": ["**/*.ts", "**/*.tsx"], + "compilerOptions": { + "target": "ES2022", + "jsx": "react-jsx", + "module": "ESNext", + "lib": ["ES2022", "DOM", "DOM.Iterable"], + "types": ["vite/client"], + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": false, + "noEmit": true, + + /* Linting */ + "skipLibCheck": true, + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true, + "baseUrl": ".", + "paths": { + "@/*": ["./src/*"] + } + } +} diff --git a/examples/ts-react-coding-agent/vite.config.ts b/examples/ts-react-coding-agent/vite.config.ts new file mode 100644 index 000000000..563d73a12 --- /dev/null +++ b/examples/ts-react-coding-agent/vite.config.ts @@ -0,0 +1,30 @@ +import { defineConfig } from 'vite' +import { tanstackStart } from '@tanstack/react-start/plugin/vite' +import viteReact from '@vitejs/plugin-react' +import viteTsConfigPaths from 'vite-tsconfig-paths' +import tailwindcss from '@tailwindcss/vite' +import { nitroV2Plugin } from '@tanstack/nitro-v2-vite-plugin' + +const config = defineConfig({ + // The Claude Agent SDK is server-only and ships its own bundled Claude + // Code runtime — keep it external so the SSR build resolves it at runtime + // via require() instead of inlining it into the rollup chunk. + ssr: { + external: ['@anthropic-ai/claude-agent-sdk'], + }, + plugins: [ + nitroV2Plugin({ + externals: { + external: ['@anthropic-ai/claude-agent-sdk'], + }, + }), + viteTsConfigPaths({ + projects: ['./tsconfig.json'], + }), + tailwindcss(), + tanstackStart(), + viteReact(), + ], +}) + +export default config diff --git a/examples/ts-react-coding-agent/workspace/README.md b/examples/ts-react-coding-agent/workspace/README.md new file mode 100644 index 000000000..47dc36b1a --- /dev/null +++ b/examples/ts-react-coding-agent/workspace/README.md @@ -0,0 +1,13 @@ +# Demo Workspace + +This directory is the coding agent's working directory (`cwd`). Everything +the agent reads, searches, and edits happens in here — nothing outside this +folder is touched. + +Files: + +- `temperature.js` — a tiny conversion module with a deliberate bug for the + agent to find and fix (in Edit mode). +- `todo.md` — a short task list the agent can read or update. + +Feel free to reset this directory with `git checkout -- .` after demos. diff --git a/examples/ts-react-coding-agent/workspace/temperature.js b/examples/ts-react-coding-agent/workspace/temperature.js new file mode 100644 index 000000000..4aaeb517b --- /dev/null +++ b/examples/ts-react-coding-agent/workspace/temperature.js @@ -0,0 +1,12 @@ +/** Convert Celsius to Fahrenheit. */ +const celsiusToFahrenheit = (celsius) => { + return celsius * (9 / 5) + 32 +} + +/** Convert Fahrenheit to Celsius. */ +const fahrenheitToCelsius = (fahrenheit) => { + // BUG: should subtract 32 before scaling, not after. + return fahrenheit * (5 / 9) - 32 +} + +export { celsiusToFahrenheit, fahrenheitToCelsius } diff --git a/examples/ts-react-coding-agent/workspace/todo.md b/examples/ts-react-coding-agent/workspace/todo.md new file mode 100644 index 000000000..945973c61 --- /dev/null +++ b/examples/ts-react-coding-agent/workspace/todo.md @@ -0,0 +1,5 @@ +# Tasks + +- [ ] Fix the Fahrenheit → Celsius conversion bug +- [ ] Add a Kelvin conversion helper +- [ ] Write a usage example in the README diff --git a/packages/ai-claude-code/README.md b/packages/ai-claude-code/README.md new file mode 100644 index 000000000..8532fcaae --- /dev/null +++ b/packages/ai-claude-code/README.md @@ -0,0 +1,18 @@ +# @tanstack/ai-claude-code + +Claude Code harness adapter for [TanStack AI](https://tanstack.com/ai) — run [Claude Code](https://docs.anthropic.com/en/docs/claude-code) (via `@anthropic-ai/claude-agent-sdk`) as a chat backend with local tool execution, stateful coding sessions, and TanStack tool bridging. + +```typescript +import { chat } from '@tanstack/ai' +import { claudeCodeText } from '@tanstack/ai-claude-code' + +const stream = chat({ + adapter: claudeCodeText('claude-opus-4-8', { + cwd: '/path/to/project', + permissionMode: 'acceptEdits', + }), + messages: [{ role: 'user', content: 'Fix the failing test.' }], +}) +``` + +Server-only (Node). See the [Claude Code adapter docs](https://tanstack.com/ai/latest/docs/adapters/claude-code) for sessions, tool bridging, permissions, and limitations. diff --git a/packages/ai-claude-code/package.json b/packages/ai-claude-code/package.json new file mode 100644 index 000000000..0dbfcf254 --- /dev/null +++ b/packages/ai-claude-code/package.json @@ -0,0 +1,60 @@ +{ + "name": "@tanstack/ai-claude-code", + "version": "0.1.0", + "description": "Claude Code harness adapter for TanStack AI — run Claude Code as a chat backend with local tool execution and stateful sessions.", + "author": "", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/TanStack/ai.git", + "directory": "packages/ai-claude-code" + }, + "keywords": [ + "ai", + "ai-sdk", + "typescript", + "tanstack", + "anthropic", + "claude", + "claude-code", + "harness", + "agent", + "adapter", + "chat", + "tool-calling" + ], + "type": "module", + "module": "./dist/esm/index.js", + "types": "./dist/esm/index.d.ts", + "exports": { + ".": { + "types": "./dist/esm/index.d.ts", + "import": "./dist/esm/index.js" + } + }, + "files": [ + "dist", + "src" + ], + "scripts": { + "build": "vite build", + "clean": "premove ./build ./dist", + "lint:fix": "eslint ./src --fix", + "test:build": "publint --strict", + "test:eslint": "eslint ./src", + "test:lib": "vitest", + "test:lib:dev": "pnpm test:lib --watch", + "test:types": "tsc" + }, + "dependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.3.176", + "@modelcontextprotocol/sdk": "^1.29.0" + }, + "peerDependencies": { + "@tanstack/ai": "workspace:^" + }, + "devDependencies": { + "@tanstack/ai": "workspace:*", + "@vitest/coverage-v8": "4.0.14" + } +} diff --git a/packages/ai-claude-code/src/adapters/text.ts b/packages/ai-claude-code/src/adapters/text.ts new file mode 100644 index 000000000..f36e34d80 --- /dev/null +++ b/packages/ai-claude-code/src/adapters/text.ts @@ -0,0 +1,372 @@ +import { query } from '@anthropic-ai/claude-agent-sdk' +import { EventType, normalizeSystemPrompts } from '@tanstack/ai' +import { toRunErrorRawEvent } from '@tanstack/ai/adapter-internals' +import { BaseTextAdapter } from '@tanstack/ai/adapters' +import { buildPrompt } from '../messages/prompt' +import { createToolBridge } from '../tools/bridge' +import { + BRIDGED_MCP_SERVER_NAME, + translateSdkStream, +} from '../stream/translate' +import type { + StructuredOutputOptions, + StructuredOutputResult, +} from '@tanstack/ai/adapters' +import type { + AnyTool, + DefaultMessageMetadataByModality, + Modality, + StreamChunk, + TextOptions, +} from '@tanstack/ai' +import type { Options } from '@anthropic-ai/claude-agent-sdk' +import type { ClaudeCodeModel } from '../model-meta' +import type { ClaudeCodeTextProviderOptions } from '../provider-options' +import type { AgentSdkMessage, SdkResultMessage } from '../stream/sdk-types' + +type PermissionMode = NonNullable + +export interface ClaudeCodeTextConfig { + /** Working directory for the harness session. Defaults to `process.cwd()`. */ + cwd?: string + /** + * Claude Code permission mode. Without an explicit mode or a custom + * `canUseTool`, the adapter's default permission handler auto-allows + * bridged TanStack tools and denies anything else that would normally + * prompt — set `'acceptEdits'` / `'bypassPermissions'` (or `allowedTools`) + * to let the harness edit files and run commands on a headless server. + */ + permissionMode?: PermissionMode + /** Built-in tools the harness may use without prompting. */ + allowedTools?: Array + /** Built-in tools removed from the harness entirely. */ + disallowedTools?: Array + /** Maximum harness-internal turns per run. */ + maxTurns?: number + /** + * How `systemPrompts` from `chat()` are applied: + * - `'append'` (default): kept on top of the Claude Code preset prompt + * - `'replace'`: sent as the entire system prompt + */ + systemPromptMode?: 'append' | 'replace' + /** Extra MCP servers passed through to the harness untouched. */ + mcpServers?: Options['mcpServers'] + /** + * Anthropic API key for the harness subprocess. Falls back to the + * process environment / the local Claude Code login when omitted. + */ + apiKey?: string + /** Extra environment variables for the harness subprocess. */ + env?: Record + /** Path to a Claude Code executable (defaults to the SDK's bundled one). */ + pathToClaudeCodeExecutable?: string + /** JavaScript runtime used to execute Claude Code. */ + executable?: Options['executable'] + /** Emit true token-level deltas via partial messages (default true). */ + streamPartials?: boolean + /** Custom permission handler; replaces the adapter's default handler. */ + canUseTool?: Options['canUseTool'] + /** + * Which Claude Code settings tiers the harness loads. Defaults to + * `['project']`: the working directory's CLAUDE.md and project settings + * apply, but user-level config on the host machine (personal plugins, + * hooks, skills under `~/.claude`) is ignored — a server adapter + * shouldn't inherit whoever happens to be logged in on the box. Pass + * `['user', 'project', 'local']` to match CLI behavior, or `[]` for full + * isolation. + */ + settingSources?: Options['settingSources'] +} + +function validateTools(tools: Array | undefined): void { + if (!tools || tools.length === 0) return + const unsupported = tools.filter( + (tool) => typeof tool.execute !== 'function' || tool.needsApproval === true, + ) + if (unsupported.length > 0) { + throw new Error( + `Claude Code harness cannot execute client-side or approval-gated tools: ${unsupported + .map((tool) => tool.name) + .join( + ', ', + )}. Provide server execute() implementations without needsApproval, or run these tools outside the harness.`, + ) + } +} + +function getResultError(result: SdkResultMessage): string { + return result.errors && result.errors.length > 0 + ? result.errors.join('; ') + : `Claude Code run failed: ${result.subtype}` +} + +export class ClaudeCodeTextAdapter< + TModel extends ClaudeCodeModel, +> extends BaseTextAdapter< + TModel, + ClaudeCodeTextProviderOptions, + ReadonlyArray & readonly ['text'], + DefaultMessageMetadataByModality, + ReadonlyArray, + unknown, + never +> { + readonly name = 'claude-code' as const + + private readonly adapterConfig: ClaudeCodeTextConfig + + constructor(config: ClaudeCodeTextConfig, model: TModel) { + super({}, model) + this.adapterConfig = config + } + + async *chatStream( + options: TextOptions, + ): AsyncIterable { + const { logger } = options + try { + validateTools(options.tools) + + const modelOptions = options.modelOptions + const { prompt, resume } = buildPrompt( + options.messages, + modelOptions?.sessionId, + ) + const sdkOptions = this.buildSdkOptions(options, resume) + + logger.request( + `activity=chat provider=claude-code model=${this.model} messages=${options.messages.length} tools=${options.tools?.length ?? 0} resume=${resume ?? 'none'}`, + { provider: 'claude-code', model: this.model }, + ) + + const sdkStream = query({ prompt, options: sdkOptions }) + + yield* translateSdkStream(sdkStream as AsyncIterable, { + model: this.model, + runId: options.runId ?? this.generateId(), + threadId: options.threadId ?? this.generateId(), + ...(options.parentRunId !== undefined && { + parentRunId: options.parentRunId, + }), + genId: () => this.generateId(), + onSdkMessage: (message) => + logger.provider(`provider=claude-code type=${message.type}`, { + chunk: message, + }), + }) + } catch (error: unknown) { + const err = error as Error & { code?: string } + const rawEvent = toRunErrorRawEvent(error) + logger.errors('claude-code.chatStream fatal', { + error, + source: 'claude-code.chatStream', + }) + yield { + type: EventType.RUN_ERROR, + model: options.model, + timestamp: Date.now(), + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + ...(rawEvent !== undefined && { rawEvent }), + error: { + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + }, + } + } + } + + /** + * Structured output via the harness's native `outputFormat` support: a + * one-shot run (no tools, single turn) whose final result carries + * `structured_output` matching the schema. + */ + async structuredOutput( + options: StructuredOutputOptions, + ): Promise> { + const { chatOptions, outputSchema } = options + const { logger } = chatOptions + + // Fresh one-shot run: deliberately no `resume`, so finalization never + // mutates the caller's interactive session. + const { prompt } = buildPrompt(chatOptions.messages, undefined) + + const sdkOptions: Options = { + ...this.buildBaseSdkOptions(), + model: this.model, + maxTurns: 1, + tools: [], + includePartialMessages: false, + outputFormat: { + type: 'json_schema', + schema: outputSchema, + }, + } + + logger.request( + `activity=structured-output provider=claude-code model=${this.model}`, + { provider: 'claude-code', model: this.model }, + ) + + for await (const message of query({ prompt, options: sdkOptions })) { + logger.provider(`provider=claude-code type=${message.type}`, { + chunk: message, + }) + if (message.type !== 'result') continue + const result = message as SdkResultMessage + if (result.subtype !== 'success') { + throw new Error(getResultError(result)) + } + const rawText = result.result ?? '' + const data = + result.structured_output !== undefined + ? result.structured_output + : JSON.parse(rawText) + const usage = result.usage + const promptTokens = usage?.input_tokens ?? 0 + const completionTokens = usage?.output_tokens ?? 0 + return { + data, + rawText, + usage: { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }, + } + } + + throw new Error( + 'Claude Code run ended without a result message during structured output generation.', + ) + } + + /** Options derived from adapter config alone (shared by both entry points). */ + private buildBaseSdkOptions(): Options { + const config = this.adapterConfig + const env = + config.apiKey !== undefined || config.env !== undefined + ? { + ...process.env, + ...config.env, + ...(config.apiKey !== undefined && { + ANTHROPIC_API_KEY: config.apiKey, + }), + } + : undefined + + return { + settingSources: config.settingSources ?? ['project'], + ...(config.cwd !== undefined && { cwd: config.cwd }), + ...(env !== undefined && { env }), + ...(config.pathToClaudeCodeExecutable !== undefined && { + pathToClaudeCodeExecutable: config.pathToClaudeCodeExecutable, + }), + ...(config.executable !== undefined && { executable: config.executable }), + } + } + + private buildSdkOptions( + options: TextOptions, + resume: string | undefined, + ): Options { + const config = this.adapterConfig + const modelOptions = options.modelOptions + + const permissionMode = modelOptions?.permissionMode ?? config.permissionMode + const maxTurns = modelOptions?.maxTurns ?? config.maxTurns + const allowedTools = modelOptions?.allowedTools ?? config.allowedTools + const disallowedTools = + modelOptions?.disallowedTools ?? config.disallowedTools + const cwd = modelOptions?.cwd ?? config.cwd + + const bridged = + options.tools && options.tools.length > 0 + ? createToolBridge(options.tools) + : undefined + const mcpServers = { + ...config.mcpServers, + ...(bridged && { [BRIDGED_MCP_SERVER_NAME]: bridged }), + } + + const systemPrompts = normalizeSystemPrompts(options.systemPrompts) + .map((prompt) => prompt.content) + .filter((content) => content.trim() !== '') + const joinedPrompts = systemPrompts.join('\n\n') + const systemPrompt: Options['systemPrompt'] = + systemPrompts.length === 0 + ? undefined + : config.systemPromptMode === 'replace' + ? joinedPrompts + : { type: 'preset', preset: 'claude_code', append: joinedPrompts } + + const abortController = new AbortController() + const externalSignal = + options.abortController?.signal ?? options.request?.signal + if (externalSignal) { + if (externalSignal.aborted) abortController.abort() + else { + externalSignal.addEventListener( + 'abort', + () => abortController.abort(), + { once: true }, + ) + } + } + + // Default permission handler: bridged TanStack tools always run; any + // other call that would prompt is denied with guidance instead of + // hanging a headless server. + const canUseTool: Options['canUseTool'] = + config.canUseTool ?? + ((toolName) => { + if (toolName.startsWith(`mcp__${BRIDGED_MCP_SERVER_NAME}__`)) { + return Promise.resolve({ behavior: 'allow' as const }) + } + return Promise.resolve({ + behavior: 'deny' as const, + message: `Tool "${toolName}" denied by the @tanstack/ai-claude-code default permission policy. Configure permissionMode, allowedTools, or canUseTool on claudeCodeText() to allow it.`, + }) + }) + + return { + ...this.buildBaseSdkOptions(), + model: this.model, + includePartialMessages: config.streamPartials !== false, + abortController, + canUseTool, + ...(cwd !== undefined && { cwd }), + ...(resume !== undefined && { resume }), + ...(modelOptions?.forkSession !== undefined && { + forkSession: modelOptions.forkSession, + }), + ...(maxTurns !== undefined && { maxTurns }), + ...(permissionMode !== undefined && { permissionMode }), + ...(permissionMode === 'bypassPermissions' && { + allowDangerouslySkipPermissions: true, + }), + ...(allowedTools !== undefined && { allowedTools }), + ...(disallowedTools !== undefined && { disallowedTools }), + ...(Object.keys(mcpServers).length > 0 && { mcpServers }), + ...(systemPrompt !== undefined && { systemPrompt }), + } + } +} + +/** + * Creates a Claude Code text adapter. + * + * Unlike HTTP provider adapters, this is a *harness* adapter: Claude Code + * runs its own agent loop and executes its own tools (bash, file edits, + * search, ...) locally, server-side. Each `chat()` call runs one full + * harness turn; harness tool activity streams back as already-resolved + * tool-call events, and the session id is surfaced via a CUSTOM + * `claude-code.session-id` event so follow-up calls can resume the session + * through `modelOptions.sessionId`. + */ +export function claudeCodeText( + model: TModel, + config: ClaudeCodeTextConfig = {}, +): ClaudeCodeTextAdapter { + return new ClaudeCodeTextAdapter(config, model) +} diff --git a/packages/ai-claude-code/src/index.ts b/packages/ai-claude-code/src/index.ts new file mode 100644 index 000000000..4241528a8 --- /dev/null +++ b/packages/ai-claude-code/src/index.ts @@ -0,0 +1,19 @@ +export { ClaudeCodeTextAdapter, claudeCodeText } from './adapters/text' +export type { ClaudeCodeTextConfig } from './adapters/text' +export type { ClaudeCodeTextProviderOptions } from './provider-options' +export { CLAUDE_CODE_MODELS } from './model-meta' +export type { ClaudeCodeModel, KnownClaudeCodeModel } from './model-meta' +export { + SESSION_ID_EVENT, + BRIDGED_MCP_SERVER_NAME, + translateSdkStream, + stripMcpPrefix, +} from './stream/translate' +export type { + ClaudeCodeProviderUsageDetails, + TranslateContext, +} from './stream/translate' +export type { AgentSdkMessage } from './stream/sdk-types' +export { buildPrompt } from './messages/prompt' +export type { BuiltPrompt } from './messages/prompt' +export { createToolBridge } from './tools/bridge' diff --git a/packages/ai-claude-code/src/messages/prompt.ts b/packages/ai-claude-code/src/messages/prompt.ts new file mode 100644 index 000000000..ca88b7f14 --- /dev/null +++ b/packages/ai-claude-code/src/messages/prompt.ts @@ -0,0 +1,68 @@ +import type { ModelMessage } from '@tanstack/ai' + +export interface BuiltPrompt { + prompt: string + /** Claude Code session id to resume, when the caller threaded one through. */ + resume?: string +} + +function extractText(content: ModelMessage['content']): string { + if (content === null) return '' + if (typeof content === 'string') return content + return content + .map((part) => + part.type === 'text' && typeof part.content === 'string' + ? part.content + : '', + ) + .join('') +} + +/** + * Convert TanStack chat history into the Agent SDK's `{ prompt, resume }` + * inputs. + * + * With a `sessionId`, the harness already holds the conversation context, so + * only the trailing user message is sent and the session is resumed. Without + * one, prior turns are flattened into a plain-text transcript preamble (tool + * messages and tool-call-only assistant turns are harness-internal noise and + * are skipped; prompts are text-only in v1). + */ +export function buildPrompt( + messages: Array, + sessionId: string | undefined, +): BuiltPrompt { + const lastMessage = messages.at(-1) + const lastUserText = + lastMessage?.role === 'user' ? extractText(lastMessage.content).trim() : '' + + if (!lastUserText) { + throw new Error( + 'Claude Code adapter requires a trailing user message with text content.', + ) + } + + if (sessionId !== undefined) { + return { prompt: lastUserText, resume: sessionId } + } + + const priorTurns = messages + .slice(0, -1) + .filter( + (message) => + (message.role === 'user' || message.role === 'assistant') && + extractText(message.content).trim() !== '', + ) + .map( + (message) => + `${message.role === 'user' ? 'User' : 'Assistant'}: ${extractText(message.content).trim()}`, + ) + + if (priorTurns.length === 0) { + return { prompt: lastUserText } + } + + return { + prompt: `Previous conversation:\n${priorTurns.join('\n')}\n\n${lastUserText}`, + } +} diff --git a/packages/ai-claude-code/src/model-meta.ts b/packages/ai-claude-code/src/model-meta.ts new file mode 100644 index 000000000..22edef39f --- /dev/null +++ b/packages/ai-claude-code/src/model-meta.ts @@ -0,0 +1,21 @@ +/** + * Models known to work with Claude Code. The harness accepts any Anthropic + * model id (and the `opus` / `sonnet` / `haiku` aliases resolved by the CLI), + * so this list exists for autocomplete — any string is accepted via the + * `(string & {})` escape hatch in {@link ClaudeCodeModel}. + */ +export const CLAUDE_CODE_MODELS = [ + 'claude-opus-4-8', + 'claude-opus-4-7', + 'claude-opus-4-6', + 'claude-sonnet-4-6', + 'claude-haiku-4-5', + 'opus', + 'sonnet', + 'haiku', +] as const + +export type KnownClaudeCodeModel = (typeof CLAUDE_CODE_MODELS)[number] + +/** Any Claude model id accepted by Claude Code; known ids get autocomplete. */ +export type ClaudeCodeModel = KnownClaudeCodeModel | (string & {}) diff --git a/packages/ai-claude-code/src/provider-options.ts b/packages/ai-claude-code/src/provider-options.ts new file mode 100644 index 000000000..e938439c2 --- /dev/null +++ b/packages/ai-claude-code/src/provider-options.ts @@ -0,0 +1,32 @@ +import type { Options } from '@anthropic-ai/claude-agent-sdk' + +type PermissionMode = NonNullable + +/** + * Per-call provider options for the Claude Code adapter, passed via + * `modelOptions` on `chat()`. + */ +export interface ClaudeCodeTextProviderOptions { + /** + * Resume an existing Claude Code session. The adapter emits the session id + * of every run via a CUSTOM `claude-code.session-id` stream event; thread + * it back here to continue that session (only the latest user message is + * sent — the harness already holds the prior context). + */ + sessionId?: string + /** + * When resuming, fork to a new session id instead of continuing the + * original session. + */ + forkSession?: boolean + /** Per-call override of the configured max harness turns. */ + maxTurns?: number + /** Per-call override of the configured permission mode. */ + permissionMode?: PermissionMode + /** Per-call override of the allowed built-in tool list. */ + allowedTools?: Array + /** Per-call override of the disallowed built-in tool list. */ + disallowedTools?: Array + /** Per-call override of the harness working directory. */ + cwd?: string +} diff --git a/packages/ai-claude-code/src/stream/sdk-types.ts b/packages/ai-claude-code/src/stream/sdk-types.ts new file mode 100644 index 000000000..a4b40be74 --- /dev/null +++ b/packages/ai-claude-code/src/stream/sdk-types.ts @@ -0,0 +1,135 @@ +/** + * Structural subset of the `@anthropic-ai/claude-agent-sdk` message types that + * the stream translator consumes. + * + * These are intentionally defined structurally (rather than imported from the + * agent SDK) so the translator stays a pure, fixture-testable state machine + * and the package's public types don't depend on the agent SDK's bundled + * `@anthropic-ai/sdk` type imports. + */ + +export interface SdkInitMessage { + type: 'system' + subtype: 'init' + session_id: string + model: string + tools: Array + cwd?: string +} + +export type SdkAssistantContentBlock = + | { type: 'text'; text: string } + | { type: 'thinking'; thinking: string } + | { type: 'tool_use'; id: string; name: string; input: unknown } + | { type: string; [key: string]: unknown } + +export interface SdkAssistantMessage { + type: 'assistant' + message: { + id?: string + content: Array + } + parent_tool_use_id: string | null +} + +export type SdkToolResultContent = + | string + | Array<{ type: string; text?: string; [key: string]: unknown }> + +export type SdkUserContentBlock = + | { + type: 'tool_result' + tool_use_id: string + content?: SdkToolResultContent + is_error?: boolean + } + | { type: string; [key: string]: unknown } + +export interface SdkUserMessage { + type: 'user' + message: { + role: 'user' + content: string | Array + } + parent_tool_use_id: string | null +} + +/** Raw Anthropic streaming events forwarded when `includePartialMessages` is set. */ +export type SdkRawStreamEvent = + | { type: 'message_start'; message: { id?: string } } + | { + type: 'content_block_start' + index: number + content_block: { type: string } + } + | { + type: 'content_block_delta' + index: number + delta: { type: string; text?: string; thinking?: string } + } + | { type: 'content_block_stop'; index: number } + | { type: 'message_delta' } + | { type: 'message_stop' } + +export interface SdkPartialAssistantMessage { + type: 'stream_event' + event: SdkRawStreamEvent + parent_tool_use_id: string | null +} + +export interface SdkUsage { + input_tokens?: number + output_tokens?: number + cache_read_input_tokens?: number + cache_creation_input_tokens?: number +} + +export interface SdkResultMessage { + type: 'result' + subtype: + | 'success' + | 'error_max_turns' + | 'error_during_execution' + | 'error_max_budget_usd' + | 'error_max_structured_output_retries' + result?: string + errors?: Array + usage?: SdkUsage + total_cost_usd?: number + structured_output?: unknown +} + +/** + * Harness-internal system messages the translator deliberately ignores. + * (The real SDK union has many more members; unknown runtime types simply + * fall through every branch.) + */ +export interface SdkNoiseSystemMessage { + type: 'system' + subtype: + | 'status' + | 'permission_denied' + | 'plugin_install' + | 'session_state_changed' + | 'task_notification' + | 'task_progress' +} + +/** Other harness-internal top-level message types the translator ignores. */ +export interface SdkNoiseMessage { + type: + | 'tool_progress' + | 'auth_status' + | 'rate_limit_event' + | 'prompt_suggestion' + | 'compact_boundary' +} + +export type AgentSdkMessage = + | SdkInitMessage + | SdkAssistantMessage + | SdkUserMessage + | SdkPartialAssistantMessage + | SdkResultMessage + | SdkNoiseSystemMessage + | SdkNoiseMessage diff --git a/packages/ai-claude-code/src/stream/translate.ts b/packages/ai-claude-code/src/stream/translate.ts new file mode 100644 index 000000000..67271d63c --- /dev/null +++ b/packages/ai-claude-code/src/stream/translate.ts @@ -0,0 +1,483 @@ +import { EventType, buildBaseUsage } from '@tanstack/ai' +import type { StreamChunk, TokenUsage } from '@tanstack/ai' +import type { + AgentSdkMessage, + SdkAssistantMessage, + SdkPartialAssistantMessage, + SdkResultMessage, + SdkToolResultContent, + SdkUsage, + SdkUserMessage, +} from './sdk-types' + +/** Name of the CUSTOM event carrying the Claude Code session id. */ +export const SESSION_ID_EVENT = 'claude-code.session-id' + +/** Server name used for bridged TanStack tools (model sees `mcp__tanstack__`). */ +export const BRIDGED_MCP_SERVER_NAME = 'tanstack' + +const BRIDGED_MCP_PREFIX = `mcp__${BRIDGED_MCP_SERVER_NAME}__` + +/** Claude Code-specific usage details attached to RUN_FINISHED usage. */ +export type ClaudeCodeProviderUsageDetails = { + /** Total cost of the harness run in USD, as reported by Claude Code. */ + totalCostUsd?: number +} + +export interface TranslateContext { + model: string + runId: string + threadId: string + parentRunId?: string + genId: () => string + /** Called as soon as the harness reports its session id. */ + onSessionId?: (sessionId: string) => void + /** Called for each raw SDK message, for logging. */ + onSdkMessage?: (message: AgentSdkMessage) => void +} + +/** + * Strip the bridged MCP server prefix so tool-call events match the TanStack + * tool names the application registered. Built-in harness tools (Bash, Read, + * Edit, ...) and foreign MCP tools pass through verbatim. + */ +export function stripMcpPrefix(name: string): string { + return name.startsWith(BRIDGED_MCP_PREFIX) + ? name.slice(BRIDGED_MCP_PREFIX.length) + : name +} + +function stringifyToolResultContent( + content: SdkToolResultContent | undefined, +): string { + if (content === undefined) return '' + if (typeof content === 'string') return content + return content + .map((block) => (typeof block.text === 'string' ? block.text : '')) + .join('') +} + +function buildUsage( + usage: SdkUsage | undefined, + totalCostUsd: number | undefined, +): TokenUsage | undefined { + if (!usage) return undefined + const promptTokens = usage.input_tokens ?? 0 + const completionTokens = usage.output_tokens ?? 0 + const result = buildBaseUsage({ + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }) + const cacheWrite = usage.cache_creation_input_tokens + const cacheRead = usage.cache_read_input_tokens + const promptTokensDetails = { + ...(cacheWrite ? { cacheWriteTokens: cacheWrite } : {}), + ...(cacheRead ? { cachedTokens: cacheRead } : {}), + } + if (Object.keys(promptTokensDetails).length > 0) { + result.promptTokensDetails = promptTokensDetails + } + if (totalCostUsd !== undefined) { + result.providerUsageDetails = { totalCostUsd } + } + return result +} + +/** + * Translate a Claude Code Agent SDK message stream into AG-UI StreamChunk + * events. + * + * The harness runs its own agent loop and executes its own tools, so the + * translation always ends with `finishReason: 'stop'` (or `'length'` / + * RUN_ERROR) — never `'tool_calls'`. Harness tool activity is emitted as + * already-resolved TOOL_CALL_START/ARGS/END + TOOL_CALL_RESULT sequences so + * UIs can render it, while the TanStack engine never tries to execute them. + * + * Invariant: every TOOL_CALL_START is eventually paired with a + * TOOL_CALL_RESULT (synthesized as `{"status":"interrupted"}` when the run + * ends or aborts before the harness reported one) so the engine's + * pending-tool-call scan on the next request never force-executes them. + */ +export async function* translateSdkStream( + sdkMessages: AsyncIterable, + ctx: TranslateContext, +): AsyncIterable { + const { model, runId, threadId, genId } = ctx + const now = () => Date.now() + + let runStarted = false + /** Tool calls started but with no result yet. */ + const unresolvedToolCalls = new Set() + /** Anthropic message ids whose text/thinking already streamed via partials. */ + const streamedMessageIds = new Set() + + // Partial-stream state + let partialMessageId: string | null = null + let partialBlockType: string | null = null + let partialTextMessageId: string | null = null + let partialTextContent = '' + let partialTextStarted = false + let partialReasoningId: string | null = null + + function* startRun(): Generator { + if (runStarted) return + runStarted = true + yield { + type: EventType.RUN_STARTED, + runId, + threadId, + model, + timestamp: now(), + ...(ctx.parentRunId !== undefined && { parentRunId: ctx.parentRunId }), + } + } + + function* synthesizeUnresolvedResults(): Generator { + for (const toolCallId of unresolvedToolCalls) { + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId, + messageId: genId(), + model, + timestamp: now(), + content: JSON.stringify({ status: 'interrupted' }), + } + } + unresolvedToolCalls.clear() + } + + function* closePartialText(): Generator { + if (partialTextStarted && partialTextMessageId) { + yield { + type: EventType.TEXT_MESSAGE_END, + messageId: partialTextMessageId, + model, + timestamp: now(), + } + } + partialTextStarted = false + partialTextMessageId = null + partialTextContent = '' + } + + function* closePartialReasoning(): Generator { + if (partialReasoningId) { + yield { + type: EventType.REASONING_MESSAGE_END, + messageId: partialReasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_END, + messageId: partialReasoningId, + model, + timestamp: now(), + } + } + partialReasoningId = null + } + + function* emitToolUse(block: { + id: string + name: string + input: unknown + }): Generator { + const toolCallName = stripMcpPrefix(block.name) + const args = JSON.stringify(block.input ?? {}) + yield { + type: EventType.TOOL_CALL_START, + toolCallId: block.id, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + } + yield { + type: EventType.TOOL_CALL_ARGS, + toolCallId: block.id, + model, + timestamp: now(), + delta: args, + args, + } + yield { + type: EventType.TOOL_CALL_END, + toolCallId: block.id, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + input: block.input ?? {}, + } + unresolvedToolCalls.add(block.id) + } + + function* handleAssistant( + message: SdkAssistantMessage, + ): Generator { + const alreadyStreamed = + message.message.id !== undefined && + streamedMessageIds.has(message.message.id) + + for (const block of message.message.content) { + if (block.type === 'text') { + if (alreadyStreamed) continue + const messageId = message.message.id ?? genId() + const text = (block as { text: string }).text + yield { + type: EventType.TEXT_MESSAGE_START, + messageId, + model, + timestamp: now(), + role: 'assistant', + } + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId, + model, + timestamp: now(), + delta: text, + content: text, + } + yield { + type: EventType.TEXT_MESSAGE_END, + messageId, + model, + timestamp: now(), + } + } else if (block.type === 'thinking') { + if (alreadyStreamed) continue + const reasoningId = genId() + const thinking = (block as { thinking: string }).thinking + yield { + type: EventType.REASONING_START, + messageId: reasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_START, + messageId: reasoningId, + role: 'reasoning' as const, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_CONTENT, + messageId: reasoningId, + delta: thinking, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_END, + messageId: reasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_END, + messageId: reasoningId, + model, + timestamp: now(), + } + } else if (block.type === 'tool_use') { + yield* emitToolUse( + block as { id: string; name: string; input: unknown }, + ) + } + } + } + + function* handleUser(message: SdkUserMessage): Generator { + const content = message.message.content + if (typeof content === 'string') return + for (const block of content) { + if (block.type !== 'tool_result') continue + const toolResult = block as { + tool_use_id: string + content?: SdkToolResultContent + is_error?: boolean + } + unresolvedToolCalls.delete(toolResult.tool_use_id) + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId: toolResult.tool_use_id, + messageId: genId(), + model, + timestamp: now(), + content: stringifyToolResultContent(toolResult.content), + ...(toolResult.is_error === true && { state: 'output-error' as const }), + } + } + } + + function* handleResult(message: SdkResultMessage): Generator { + yield* closePartialText() + yield* closePartialReasoning() + yield* synthesizeUnresolvedResults() + + const usage = buildUsage(message.usage, message.total_cost_usd) + if (message.subtype === 'success') { + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + timestamp: now(), + finishReason: 'stop', + ...(usage !== undefined && { usage }), + } + } else if (message.subtype === 'error_max_turns') { + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + timestamp: now(), + finishReason: 'length', + ...(usage !== undefined && { usage }), + } + } else { + const errorMessage = + message.errors && message.errors.length > 0 + ? message.errors.join('; ') + : `Claude Code run failed: ${message.subtype}` + yield { + type: EventType.RUN_ERROR, + model, + timestamp: now(), + message: errorMessage, + code: message.subtype, + error: { message: errorMessage, code: message.subtype }, + } + } + } + + function* handleStreamEvent( + message: SdkPartialAssistantMessage, + ): Generator { + const event = message.event + if (event.type === 'message_start') { + partialMessageId = event.message.id ?? genId() + streamedMessageIds.add(partialMessageId) + } else if (event.type === 'content_block_start') { + partialBlockType = event.content_block.type + if (partialBlockType === 'text') { + partialTextMessageId = partialMessageId ?? genId() + partialTextContent = '' + if (!partialTextStarted) { + partialTextStarted = true + yield { + type: EventType.TEXT_MESSAGE_START, + messageId: partialTextMessageId, + model, + timestamp: now(), + role: 'assistant', + } + } + } else if (partialBlockType === 'thinking') { + partialReasoningId = genId() + yield { + type: EventType.REASONING_START, + messageId: partialReasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_START, + messageId: partialReasoningId, + role: 'reasoning' as const, + model, + timestamp: now(), + } + } + } else if (event.type === 'content_block_delta') { + if ( + event.delta.type === 'text_delta' && + partialTextStarted && + partialTextMessageId && + typeof event.delta.text === 'string' + ) { + partialTextContent += event.delta.text + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId: partialTextMessageId, + model, + timestamp: now(), + delta: event.delta.text, + content: partialTextContent, + } + } else if ( + event.delta.type === 'thinking_delta' && + partialReasoningId && + typeof event.delta.thinking === 'string' + ) { + yield { + type: EventType.REASONING_MESSAGE_CONTENT, + messageId: partialReasoningId, + delta: event.delta.thinking, + model, + timestamp: now(), + } + } + } else if (event.type === 'content_block_stop') { + if (partialBlockType === 'text') { + yield* closePartialText() + } else if (partialBlockType === 'thinking') { + yield* closePartialReasoning() + } + partialBlockType = null + } + } + + try { + for await (const sdkMessage of sdkMessages) { + ctx.onSdkMessage?.(sdkMessage) + + if (sdkMessage.type === 'system' && sdkMessage.subtype === 'init') { + yield* startRun() + ctx.onSessionId?.(sdkMessage.session_id) + yield { + type: EventType.CUSTOM, + model, + timestamp: now(), + name: SESSION_ID_EVENT, + value: { + sessionId: sdkMessage.session_id, + model: sdkMessage.model, + tools: sdkMessage.tools, + }, + } + continue + } + + // Anything before init still needs RUN_STARTED first. + yield* startRun() + + if (sdkMessage.type === 'stream_event') { + if (sdkMessage.parent_tool_use_id !== null) continue + yield* handleStreamEvent(sdkMessage) + } else if (sdkMessage.type === 'assistant') { + if (sdkMessage.parent_tool_use_id !== null) continue + yield* handleAssistant(sdkMessage) + } else if (sdkMessage.type === 'user') { + if (sdkMessage.parent_tool_use_id !== null) continue + yield* handleUser(sdkMessage) + } else if (sdkMessage.type === 'result') { + yield* handleResult(sdkMessage) + } + // All other SDK message types (status, hooks, notifications, ...) are + // harness-internal and intentionally ignored. + } + } catch (error) { + // The run is dying (abort or SDK failure). Pair any started tool calls + // with a synthetic result first so the next request's pending-tool-call + // scan doesn't try to execute them, then let the adapter surface the + // error as RUN_ERROR. + yield* synthesizeUnresolvedResults() + throw error + } +} diff --git a/packages/ai-claude-code/src/tools/bridge.ts b/packages/ai-claude-code/src/tools/bridge.ts new file mode 100644 index 000000000..bae7504e0 --- /dev/null +++ b/packages/ai-claude-code/src/tools/bridge.ts @@ -0,0 +1,64 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { + CallToolRequestSchema, + ListToolsRequestSchema, +} from '@modelcontextprotocol/sdk/types.js' +import { BRIDGED_MCP_SERVER_NAME } from '../stream/translate' +import type { McpSdkServerConfigWithInstance } from '@anthropic-ai/claude-agent-sdk' +import type { AnyTool } from '@tanstack/ai' + +/** + * Expose TanStack tools to the Claude Code harness as an in-process MCP + * server. + * + * The engine has already converted each tool's schema to JSON Schema before + * the adapter sees it, and JSON Schema is exactly what MCP's `tools/list` + * wants — so the low-level request handlers are registered directly on the + * `McpServer`'s underlying server, passing schemas through verbatim instead + * of round-tripping them through zod. + * + * The model sees these tools as `mcp__tanstack__`; the stream + * translator strips that prefix so tool-call events match the names the + * application registered. + */ +export function createToolBridge( + tools: Array, +): McpSdkServerConfigWithInstance { + const instance = new McpServer( + { name: BRIDGED_MCP_SERVER_NAME, version: '1.0.0' }, + { capabilities: { tools: {} } }, + ) + + const toolsByName = new Map(tools.map((tool) => [tool.name, tool])) + + instance.server.setRequestHandler(ListToolsRequestSchema, () => ({ + tools: tools.map((tool) => ({ + name: tool.name, + description: tool.description, + inputSchema: (tool.inputSchema ?? { + type: 'object', + properties: {}, + }) as { type: 'object'; [key: string]: unknown }, + })), + })) + + instance.server.setRequestHandler(CallToolRequestSchema, async (request) => { + const tool = toolsByName.get(request.params.name) + if (!tool?.execute) { + throw new Error(`Unknown tool: ${request.params.name}`) + } + try { + const result: unknown = await tool.execute(request.params.arguments ?? {}) + const text = typeof result === 'string' ? result : JSON.stringify(result) + return { content: [{ type: 'text', text }] } + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + return { + isError: true, + content: [{ type: 'text', text: `Tool execution failed: ${message}` }], + } + } + }) + + return { type: 'sdk', name: BRIDGED_MCP_SERVER_NAME, instance } +} diff --git a/packages/ai-claude-code/tests/bridge.test.ts b/packages/ai-claude-code/tests/bridge.test.ts new file mode 100644 index 000000000..982185f98 --- /dev/null +++ b/packages/ai-claude-code/tests/bridge.test.ts @@ -0,0 +1,103 @@ +import { describe, expect, it } from 'vitest' +import { Client } from '@modelcontextprotocol/sdk/client/index.js' +import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js' +import { createToolBridge } from '../src/tools/bridge' +import type { AnyTool } from '@tanstack/ai' + +const lookupUser: AnyTool = { + name: 'lookup_user', + description: 'Look up a user by id', + inputSchema: { + type: 'object', + properties: { userId: { type: 'string' } }, + required: ['userId'], + }, + execute: async (args: { userId: string }) => ({ + id: args.userId, + name: 'Ada', + }), +} as AnyTool + +async function connect(tools: Array) { + const bridge = createToolBridge(tools) + const [clientTransport, serverTransport] = + InMemoryTransport.createLinkedPair() + await bridge.instance.connect(serverTransport) + const client = new Client({ name: 'test-client', version: '1.0.0' }) + await client.connect(clientTransport) + return client +} + +describe('createToolBridge', () => { + it('returns an sdk-type MCP server config named tanstack', () => { + const bridge = createToolBridge([lookupUser]) + expect(bridge.type).toBe('sdk') + expect(bridge.name).toBe('tanstack') + expect(bridge.instance).toBeDefined() + }) + + it('lists tools with their raw JSON schema', async () => { + const client = await connect([lookupUser]) + const { tools } = await client.listTools() + expect(tools).toHaveLength(1) + expect(tools[0]).toMatchObject({ + name: 'lookup_user', + description: 'Look up a user by id', + inputSchema: { + type: 'object', + properties: { userId: { type: 'string' } }, + required: ['userId'], + }, + }) + }) + + it('executes the tool and serializes object results to JSON text', async () => { + const client = await connect([lookupUser]) + const result = await client.callTool({ + name: 'lookup_user', + arguments: { userId: 'u-1' }, + }) + expect(result.content).toEqual([ + { type: 'text', text: JSON.stringify({ id: 'u-1', name: 'Ada' }) }, + ]) + expect(result.isError ?? false).toBe(false) + }) + + it('passes string results through without double-encoding', async () => { + const echo: AnyTool = { + name: 'echo', + description: 'Echo input', + inputSchema: { type: 'object', properties: {} }, + execute: async () => 'plain text result', + } as AnyTool + const client = await connect([echo]) + const result = await client.callTool({ name: 'echo', arguments: {} }) + expect(result.content).toEqual([ + { type: 'text', text: 'plain text result' }, + ]) + }) + + it('marks thrown tool errors with isError', async () => { + const failing: AnyTool = { + name: 'failing', + description: 'Always fails', + inputSchema: { type: 'object', properties: {} }, + execute: async () => { + throw new Error('tool blew up') + }, + } as AnyTool + const client = await connect([failing]) + const result = await client.callTool({ name: 'failing', arguments: {} }) + expect(result.isError).toBe(true) + expect(result.content).toEqual([ + { type: 'text', text: expect.stringContaining('tool blew up') }, + ]) + }) + + it('rejects calls to unknown tools', async () => { + const client = await connect([lookupUser]) + await expect( + client.callTool({ name: 'nope', arguments: {} }), + ).rejects.toThrow() + }) +}) diff --git a/packages/ai-claude-code/tests/prompt.test.ts b/packages/ai-claude-code/tests/prompt.test.ts new file mode 100644 index 000000000..6e8dfcdf3 --- /dev/null +++ b/packages/ai-claude-code/tests/prompt.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from 'vitest' +import { buildPrompt } from '../src/messages/prompt' +import type { ModelMessage } from '@tanstack/ai' + +const user = (content: ModelMessage['content']): ModelMessage => ({ + role: 'user', + content, +}) +const assistant = (content: ModelMessage['content']): ModelMessage => ({ + role: 'assistant', + content, +}) + +describe('buildPrompt', () => { + it('resumes with only the last user message when sessionId is provided', () => { + const result = buildPrompt( + [ + user('first question'), + assistant('first answer'), + user('follow-up question'), + ], + 'sess-1', + ) + expect(result).toEqual({ + prompt: 'follow-up question', + resume: 'sess-1', + }) + }) + + it('throws when sessionId is provided but there is no trailing user message', () => { + expect(() => buildPrompt([user('q'), assistant('a')], 'sess-1')).toThrow( + /user message/i, + ) + }) + + it('sends a single user message as-is for a fresh session', () => { + expect(buildPrompt([user('hello')], undefined)).toEqual({ + prompt: 'hello', + }) + }) + + it('flattens prior turns into a transcript preamble for fresh multi-turn history', () => { + const { prompt, resume } = buildPrompt( + [user('What is 2+2?'), assistant('4'), user('And times 3?')], + undefined, + ) + expect(resume).toBeUndefined() + expect(prompt).toBe( + 'Previous conversation:\nUser: What is 2+2?\nAssistant: 4\n\nAnd times 3?', + ) + }) + + it('skips tool messages and assistant tool-call-only turns when flattening', () => { + const messages: Array = [ + user('list files'), + { + role: 'assistant', + content: null, + toolCalls: [ + { + id: 't1', + type: 'function', + function: { name: 'ls', arguments: '{}' }, + }, + ], + } as unknown as ModelMessage, + { role: 'tool', content: 'file-a', toolCallId: 't1' }, + assistant('There is one file.'), + user('thanks, which one?'), + ] + const { prompt } = buildPrompt(messages, undefined) + expect(prompt).toBe( + 'Previous conversation:\nUser: list files\nAssistant: There is one file.\n\nthanks, which one?', + ) + }) + + it('extracts text from content-part arrays and ignores non-text parts', () => { + const { prompt } = buildPrompt( + [ + user([ + { type: 'text', content: 'describe ' }, + { + type: 'image', + source: { type: 'url', url: 'https://x/y.png' }, + } as never, + { type: 'text', content: 'this' }, + ] as ModelMessage['content']), + ], + undefined, + ) + expect(prompt).toBe('describe this') + }) + + it('throws when there is no usable user content at all', () => { + expect(() => buildPrompt([], undefined)).toThrow(/user message/i) + }) +}) diff --git a/packages/ai-claude-code/tests/text-adapter.test.ts b/packages/ai-claude-code/tests/text-adapter.test.ts new file mode 100644 index 000000000..4223d37a2 --- /dev/null +++ b/packages/ai-claude-code/tests/text-adapter.test.ts @@ -0,0 +1,370 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { query } from '@anthropic-ai/claude-agent-sdk' +import { claudeCodeText } from '../src/adapters/text' +import type { AgentSdkMessage } from '../src/stream/sdk-types' +import type { InternalLogger } from '@tanstack/ai/adapter-internals' +import type { StreamChunk, TextOptions } from '@tanstack/ai' + +vi.mock('@anthropic-ai/claude-agent-sdk', () => ({ + query: vi.fn(), +})) + +const queryMock = vi.mocked(query) + +const init: AgentSdkMessage = { + type: 'system', + subtype: 'init', + session_id: 'sess-1', + model: 'claude-opus-4-6', + tools: ['Bash'], +} + +const textTurn: Array = [ + init, + { + type: 'assistant', + message: { id: 'msg-1', content: [{ type: 'text', text: 'hi there' }] }, + parent_tool_use_id: null, + }, + { + type: 'result', + subtype: 'success', + result: 'hi there', + usage: { input_tokens: 10, output_tokens: 5 }, + total_cost_usd: 0.01, + }, +] + +function mockQueryReturning(messages: Array) { + queryMock.mockImplementation(() => { + async function* generate() { + for (const message of messages) yield message + } + return generate() as ReturnType + }) +} + +const noopLogger = { + request: vi.fn(), + provider: vi.fn(), + output: vi.fn(), + errors: vi.fn(), + middleware: vi.fn(), + tools: vi.fn(), + agentLoop: vi.fn(), + config: vi.fn(), + isEnabled: () => false, +} as unknown as InternalLogger + +function makeOptions( + overrides: Partial>> = {}, +): TextOptions> { + return { + model: 'claude-opus-4-6', + messages: [{ role: 'user', content: 'hello' }], + logger: noopLogger, + ...overrides, + } as TextOptions> +} + +async function collect( + stream: AsyncIterable, +): Promise> { + const chunks: Array = [] + for await (const chunk of stream) chunks.push(chunk) + return chunks +} + +beforeEach(() => { + queryMock.mockReset() +}) + +describe('claudeCodeText', () => { + it('creates an adapter with the claude-code provider name', () => { + const adapter = claudeCodeText('claude-opus-4-6') + expect(adapter.kind).toBe('text') + expect(adapter.name).toBe('claude-code') + expect(adapter.model).toBe('claude-opus-4-6') + }) +}) + +describe('chatStream', () => { + it('streams translated AG-UI events for a simple turn', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + const chunks = await collect(adapter.chatStream(makeOptions())) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks.at(-1)).toMatchObject({ finishReason: 'stop' }) + }) + + it('passes prompt, model, and resume to query()', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + await collect( + adapter.chatStream( + makeOptions({ + messages: [ + { role: 'user', content: 'first' }, + { role: 'assistant', content: 'answer' }, + { role: 'user', content: 'follow-up' }, + ], + modelOptions: { sessionId: 'sess-prior' }, + }), + ), + ) + + expect(queryMock).toHaveBeenCalledTimes(1) + const call = queryMock.mock.calls[0]![0] + expect(call.prompt).toBe('follow-up') + expect(call.options).toMatchObject({ + model: 'claude-opus-4-6', + resume: 'sess-prior', + includePartialMessages: true, + }) + }) + + it('isolates the harness from user-level settings by default (project only)', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + await collect(adapter.chatStream(makeOptions())) + const options = queryMock.mock.calls[0]![0].options! + expect(options.settingSources).toEqual(['project']) + }) + + it('honors a settingSources override', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6', { + settingSources: ['user', 'project', 'local'], + }) + await collect(adapter.chatStream(makeOptions())) + const options = queryMock.mock.calls[0]![0].options! + expect(options.settingSources).toEqual(['user', 'project', 'local']) + }) + + it('bridges executable tools into an mcpServers entry named tanstack', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'lookup_user', + description: 'Look up a user', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ ok: true }), + } as never, + ], + }), + ), + ) + + const options = queryMock.mock.calls[0]![0].options! + expect(options.mcpServers).toMatchObject({ + tanstack: { type: 'sdk', name: 'tanstack' }, + }) + }) + + it('does not create the bridge server when no tools are passed', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + await collect(adapter.chatStream(makeOptions())) + const options = queryMock.mock.calls[0]![0].options! + expect(options.mcpServers ?? {}).toEqual({}) + }) + + it('emits RUN_ERROR for client-side tools (no execute)', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + const chunks = await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'client_only', + description: 'runs in browser', + inputSchema: { type: 'object', properties: {} }, + } as never, + ], + }), + ), + ) + expect(queryMock).not.toHaveBeenCalled() + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + expect((chunks.at(-1) as { message: string }).message).toMatch( + /client-side/i, + ) + }) + + it('emits RUN_ERROR for approval-gated tools', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + const chunks = await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'needs_ok', + description: 'requires approval', + inputSchema: { type: 'object', properties: {} }, + execute: async () => 'x', + needsApproval: true, + } as never, + ], + }), + ), + ) + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + }) + + it('appends system prompts to the claude_code preset by default', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + await collect( + adapter.chatStream( + makeOptions({ systemPrompts: ['Be terse.', 'Use tabs.'] }), + ), + ) + const options = queryMock.mock.calls[0]![0].options! + expect(options.systemPrompt).toEqual({ + type: 'preset', + preset: 'claude_code', + append: 'Be terse.\n\nUse tabs.', + }) + }) + + it('replaces the system prompt entirely in replace mode', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6', { + systemPromptMode: 'replace', + }) + await collect( + adapter.chatStream(makeOptions({ systemPrompts: ['Only this.'] })), + ) + const options = queryMock.mock.calls[0]![0].options! + expect(options.systemPrompt).toBe('Only this.') + }) + + it('wires permissionMode bypassPermissions with the required safety flag', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6', { + permissionMode: 'bypassPermissions', + }) + await collect(adapter.chatStream(makeOptions())) + const options = queryMock.mock.calls[0]![0].options! + expect(options.permissionMode).toBe('bypassPermissions') + expect(options.allowDangerouslySkipPermissions).toBe(true) + }) + + it('passes an abort controller that follows the request signal', async () => { + mockQueryReturning(textTurn) + const adapter = claudeCodeText('claude-opus-4-6') + const controller = new AbortController() + await collect( + adapter.chatStream( + makeOptions({ request: { signal: controller.signal } }), + ), + ) + const options = queryMock.mock.calls[0]![0].options! + expect(options.abortController).toBeInstanceOf(AbortController) + expect(options.abortController!.signal.aborted).toBe(false) + controller.abort() + expect(options.abortController!.signal.aborted).toBe(true) + }) + + it('emits RUN_ERROR when query() throws', async () => { + queryMock.mockImplementation(() => { + throw new Error('spawn failed') + }) + const adapter = claudeCodeText('claude-opus-4-6') + const chunks = await collect(adapter.chatStream(makeOptions())) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'spawn failed', + }) + }) +}) + +describe('structuredOutput', () => { + it('uses the native outputFormat and returns structured_output', async () => { + mockQueryReturning([ + init, + { + type: 'result', + subtype: 'success', + result: '{"answer":42}', + structured_output: { answer: 42 }, + usage: { input_tokens: 7, output_tokens: 3 }, + total_cost_usd: 0, + }, + ]) + const adapter = claudeCodeText('claude-opus-4-6') + const result = await adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { + type: 'object', + properties: { answer: { type: 'number' } }, + }, + }) + + expect(result.data).toEqual({ answer: 42 }) + expect(result.rawText).toBe('{"answer":42}') + expect(result.usage).toMatchObject({ promptTokens: 7, completionTokens: 3 }) + + const options = queryMock.mock.calls[0]![0].options! + expect(options.outputFormat).toEqual({ + type: 'json_schema', + schema: { + type: 'object', + properties: { answer: { type: 'number' } }, + }, + }) + expect(options.maxTurns).toBe(1) + }) + + it('falls back to parsing result text when structured_output is missing', async () => { + mockQueryReturning([ + init, + { + type: 'result', + subtype: 'success', + result: '{"answer":7}', + usage: { input_tokens: 1, output_tokens: 1 }, + total_cost_usd: 0, + }, + ]) + const adapter = claudeCodeText('claude-opus-4-6') + const result = await adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }) + expect(result.data).toEqual({ answer: 7 }) + }) + + it('throws a descriptive error when the run fails', async () => { + mockQueryReturning([ + init, + { + type: 'result', + subtype: 'error_during_execution', + errors: ['harness exploded'], + usage: {}, + total_cost_usd: 0, + }, + ]) + const adapter = claudeCodeText('claude-opus-4-6') + await expect( + adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }), + ).rejects.toThrow(/harness exploded/) + }) +}) diff --git a/packages/ai-claude-code/tests/translate.test.ts b/packages/ai-claude-code/tests/translate.test.ts new file mode 100644 index 000000000..607d2c457 --- /dev/null +++ b/packages/ai-claude-code/tests/translate.test.ts @@ -0,0 +1,485 @@ +import { describe, expect, it } from 'vitest' +import { translateSdkStream } from '../src/stream/translate' +import type { AgentSdkMessage } from '../src/stream/sdk-types' +import type { StreamChunk } from '@tanstack/ai' + +function makeContext() { + let id = 0 + return { + model: 'claude-opus-4-6', + runId: 'run-1', + threadId: 'thread-1', + genId: () => `gen-${++id}`, + } +} + +async function* fromArray( + messages: Array, +): AsyncIterable { + for (const message of messages) { + yield message + } +} + +async function collect( + messages: Array, +): Promise> { + const chunks: Array = [] + for await (const chunk of translateSdkStream( + fromArray(messages), + makeContext(), + )) { + chunks.push(chunk) + } + return chunks +} + +const init: AgentSdkMessage = { + type: 'system', + subtype: 'init', + session_id: 'sess-abc', + model: 'claude-opus-4-6', + tools: ['Bash', 'Read'], + cwd: '/tmp', +} + +const usage = { + input_tokens: 100, + output_tokens: 50, + cache_read_input_tokens: 10, + cache_creation_input_tokens: 5, +} + +function assistantText(text: string, messageId = 'msg-1'): AgentSdkMessage { + return { + type: 'assistant', + message: { id: messageId, content: [{ type: 'text', text }] }, + parent_tool_use_id: null, + } +} + +const resultSuccess: AgentSdkMessage = { + type: 'result', + subtype: 'success', + result: 'done', + usage, + total_cost_usd: 0.12, +} + +describe('translateSdkStream', () => { + it('translates a simple text turn into RUN_STARTED → CUSTOM → TEXT_* → RUN_FINISHED(stop)', async () => { + const chunks = await collect([init, assistantText('Hello!'), resultSuccess]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + + expect(chunks[0]).toMatchObject({ + type: 'RUN_STARTED', + runId: 'run-1', + threadId: 'thread-1', + model: 'claude-opus-4-6', + }) + expect(chunks[3]).toMatchObject({ + type: 'TEXT_MESSAGE_CONTENT', + delta: 'Hello!', + content: 'Hello!', + }) + expect(chunks[5]).toMatchObject({ + type: 'RUN_FINISHED', + finishReason: 'stop', + }) + }) + + it('surfaces the session id via a CUSTOM claude-code.session-id event', async () => { + const chunks = await collect([init, assistantText('hi'), resultSuccess]) + const custom = chunks.find((c) => c.type === 'CUSTOM') + expect(custom).toMatchObject({ + type: 'CUSTOM', + name: 'claude-code.session-id', + value: { + sessionId: 'sess-abc', + model: 'claude-opus-4-6', + tools: ['Bash', 'Read'], + }, + }) + }) + + it('maps usage onto RUN_FINISHED including cache token details', async () => { + const chunks = await collect([init, assistantText('hi'), resultSuccess]) + const finished = chunks.find((c) => c.type === 'RUN_FINISHED') + expect(finished).toMatchObject({ + usage: { + promptTokens: 100, + completionTokens: 50, + totalTokens: 150, + promptTokensDetails: { cachedTokens: 10, cacheWriteTokens: 5 }, + }, + }) + }) + + it('emits resolved TOOL_CALL_* quadruples for harness tool activity and never finishes with tool_calls', async () => { + const messages: Array = [ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { + type: 'tool_use', + id: 'toolu_1', + name: 'Bash', + input: { command: 'ls' }, + }, + ], + }, + parent_tool_use_id: null, + }, + { + type: 'user', + message: { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'toolu_1', + content: 'file-a\nfile-b', + }, + ], + }, + parent_tool_use_id: null, + }, + assistantText('Found two files.', 'msg-2'), + resultSuccess, + ] + + const chunks = await collect(messages) + const types = chunks.map((c) => c.type) + expect(types).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TOOL_CALL_START', + 'TOOL_CALL_ARGS', + 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + + expect(chunks[2]).toMatchObject({ + toolCallId: 'toolu_1', + toolCallName: 'Bash', + }) + expect(chunks[3]).toMatchObject({ + toolCallId: 'toolu_1', + delta: JSON.stringify({ command: 'ls' }), + }) + expect(chunks[4]).toMatchObject({ + toolCallId: 'toolu_1', + input: { command: 'ls' }, + }) + expect(chunks[5]).toMatchObject({ + type: 'TOOL_CALL_RESULT', + toolCallId: 'toolu_1', + content: 'file-a\nfile-b', + }) + + const finished = chunks.filter((c) => c.type === 'RUN_FINISHED') + expect(finished).toHaveLength(1) + expect(finished[0]).toMatchObject({ finishReason: 'stop' }) + }) + + it('strips the mcp__tanstack__ prefix from bridged tool names', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { + type: 'tool_use', + id: 'toolu_2', + name: 'mcp__tanstack__lookup_user', + input: { userId: 'u1' }, + }, + ], + }, + parent_tool_use_id: null, + }, + resultSuccess, + ]) + + const start = chunks.find((c) => c.type === 'TOOL_CALL_START') + expect(start).toMatchObject({ toolCallName: 'lookup_user' }) + }) + + it('marks errored tool results with state output-error', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { type: 'tool_use', id: 'toolu_3', name: 'Bash', input: {} }, + ], + }, + parent_tool_use_id: null, + }, + { + type: 'user', + message: { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'toolu_3', + content: [{ type: 'text', text: 'command failed' }], + is_error: true, + }, + ], + }, + parent_tool_use_id: null, + }, + resultSuccess, + ]) + + const result = chunks.find((c) => c.type === 'TOOL_CALL_RESULT') + expect(result).toMatchObject({ + toolCallId: 'toolu_3', + content: 'command failed', + state: 'output-error', + }) + }) + + it('synthesizes interrupted tool results for unresolved tool calls before RUN_FINISHED', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { type: 'tool_use', id: 'toolu_4', name: 'Bash', input: {} }, + ], + }, + parent_tool_use_id: null, + }, + resultSuccess, + ]) + + const types = chunks.map((c) => c.type as string) + expect(types.indexOf('TOOL_CALL_RESULT')).toBeGreaterThan(-1) + expect(types.indexOf('TOOL_CALL_RESULT')).toBeLessThan( + types.indexOf('RUN_FINISHED'), + ) + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + toolCallId: 'toolu_4', + content: JSON.stringify({ status: 'interrupted' }), + }) + }) + + it('translates thinking blocks into REASONING_* events', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { type: 'thinking', thinking: 'pondering...' }, + { type: 'text', text: 'answer' }, + ], + }, + parent_tool_use_id: null, + }, + resultSuccess, + ]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'REASONING_START', + 'REASONING_MESSAGE_START', + 'REASONING_MESSAGE_CONTENT', + 'REASONING_MESSAGE_END', + 'REASONING_END', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect( + chunks.find((c) => c.type === 'REASONING_MESSAGE_CONTENT'), + ).toMatchObject({ delta: 'pondering...' }) + }) + + it('maps error_max_turns to RUN_FINISHED(length)', async () => { + const chunks = await collect([ + init, + assistantText('partial'), + { + type: 'result', + subtype: 'error_max_turns', + usage, + total_cost_usd: 0.5, + errors: [], + }, + ]) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_FINISHED', + finishReason: 'length', + }) + }) + + it('maps error_during_execution to RUN_ERROR', async () => { + const chunks = await collect([ + init, + { + type: 'result', + subtype: 'error_during_execution', + usage, + total_cost_usd: 0, + errors: ['boom'], + }, + ]) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'boom', + code: 'error_during_execution', + }) + }) + + it('skips subagent messages (parent_tool_use_id set)', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { id: 'msg-sub', content: [{ type: 'text', text: 'inner' }] }, + parent_tool_use_id: 'toolu_task', + }, + assistantText('outer'), + resultSuccess, + ]) + + const contents = chunks.filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + expect(contents).toHaveLength(1) + expect(contents[0]).toMatchObject({ delta: 'outer' }) + }) + + it('streams partial text deltas and dedupes the whole assistant message', async () => { + const chunks = await collect([ + init, + { + type: 'stream_event', + event: { type: 'message_start', message: { id: 'msg-1' } }, + parent_tool_use_id: null, + }, + { + type: 'stream_event', + event: { + type: 'content_block_start', + index: 0, + content_block: { type: 'text' }, + }, + parent_tool_use_id: null, + }, + { + type: 'stream_event', + event: { + type: 'content_block_delta', + index: 0, + delta: { type: 'text_delta', text: 'Hel' }, + }, + parent_tool_use_id: null, + }, + { + type: 'stream_event', + event: { + type: 'content_block_delta', + index: 0, + delta: { type: 'text_delta', text: 'lo' }, + }, + parent_tool_use_id: null, + }, + { + type: 'stream_event', + event: { type: 'content_block_stop', index: 0 }, + parent_tool_use_id: null, + }, + assistantText('Hello', 'msg-1'), + resultSuccess, + ]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks[3]).toMatchObject({ delta: 'Hel', content: 'Hel' }) + expect(chunks[4]).toMatchObject({ delta: 'lo', content: 'Hello' }) + }) + + it('emits synthetic tool results then rethrows when the SDK stream throws mid-run', async () => { + async function* throwing(): AsyncIterable { + yield init + yield { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { type: 'tool_use', id: 'toolu_5', name: 'Bash', input: {} }, + ], + }, + parent_tool_use_id: null, + } + throw new Error('aborted') + } + + const chunks: Array = [] + await expect(async () => { + for await (const chunk of translateSdkStream(throwing(), makeContext())) { + chunks.push(chunk) + } + }).rejects.toThrow('aborted') + + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + toolCallId: 'toolu_5', + content: JSON.stringify({ status: 'interrupted' }), + }) + }) + + it('ignores unknown SDK message types', async () => { + const chunks = await collect([ + init, + { + type: 'system', + subtype: 'status', + status: 'compacting', + } as unknown as AgentSdkMessage, + assistantText('hi'), + resultSuccess, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + }) +}) diff --git a/packages/ai-claude-code/tsconfig.json b/packages/ai-claude-code/tsconfig.json new file mode 100644 index 000000000..c38689f4e --- /dev/null +++ b/packages/ai-claude-code/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist" + }, + "include": ["src", "tests"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/ai-claude-code/vite.config.ts b/packages/ai-claude-code/vite.config.ts new file mode 100644 index 000000000..11f5b20b7 --- /dev/null +++ b/packages/ai-claude-code/vite.config.ts @@ -0,0 +1,37 @@ +import { defineConfig, mergeConfig } from 'vitest/config' +import { tanstackViteConfig } from '@tanstack/vite-config' +import packageJson from './package.json' + +const config = defineConfig({ + test: { + name: packageJson.name, + dir: './', + watch: false, + + globals: true, + environment: 'node', + include: ['tests/**/*.test.ts'], + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html', 'lcov'], + exclude: [ + 'node_modules/', + 'dist/', + 'tests/', + '**/*.test.ts', + '**/*.config.ts', + '**/types.ts', + ], + include: ['src/**/*.ts'], + }, + }, +}) + +export default mergeConfig( + config, + tanstackViteConfig({ + entry: ['./src/index.ts'], + srcDir: './src', + cjs: false, + }), +) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3fe7276d9..07b67f744 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -480,6 +480,70 @@ importers: specifier: ^5.1.0 version: 5.1.0 + examples/ts-react-coding-agent: + dependencies: + '@tailwindcss/vite': + specifier: ^4.1.18 + version: 4.1.18(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + '@tanstack/ai': + specifier: workspace:* + version: link:../../packages/ai + '@tanstack/ai-claude-code': + specifier: workspace:* + version: link:../../packages/ai-claude-code + '@tanstack/ai-client': + specifier: workspace:* + version: link:../../packages/ai-client + '@tanstack/ai-react': + specifier: workspace:* + version: link:../../packages/ai-react + '@tanstack/nitro-v2-vite-plugin': + specifier: ^1.154.7 + version: 1.154.7(rolldown@1.0.0-rc.17)(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + '@tanstack/react-router': + specifier: ^1.158.4 + version: 1.159.5(react-dom@19.2.3(react@19.2.3))(react@19.2.3) + '@tanstack/react-start': + specifier: ^1.159.0 + version: 1.159.5(crossws@0.4.5(srvx@0.11.15))(react-dom@19.2.3(react@19.2.3))(react@19.2.3)(vite-plugin-solid@2.11.10(solid-js@1.9.10)(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + '@tanstack/router-plugin': + specifier: ^1.158.4 + version: 1.159.5(@tanstack/react-router@1.159.5(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(vite-plugin-solid@2.11.10(solid-js@1.9.10)(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + react: + specifier: ^19.2.3 + version: 19.2.3 + react-dom: + specifier: ^19.2.3 + version: 19.2.3(react@19.2.3) + tailwindcss: + specifier: ^4.1.18 + version: 4.1.18 + vite-tsconfig-paths: + specifier: ^5.1.4 + version: 5.1.4(typescript@5.9.3)(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + zod: + specifier: ^4.2.0 + version: 4.3.6 + devDependencies: + '@types/node': + specifier: ^24.10.1 + version: 24.10.3 + '@types/react': + specifier: ^19.2.7 + version: 19.2.7 + '@types/react-dom': + specifier: ^19.2.3 + version: 19.2.3(@types/react@19.2.7) + '@vitejs/plugin-react': + specifier: ^5.1.2 + version: 5.1.2(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + typescript: + specifier: 5.9.3 + version: 5.9.3 + vite: + specifier: ^7.3.3 + version: 7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + examples/ts-react-media: dependencies: '@tailwindcss/vite': @@ -1035,6 +1099,22 @@ importers: specifier: ^4.2.0 version: 4.2.1 + packages/ai-claude-code: + dependencies: + '@anthropic-ai/claude-agent-sdk': + specifier: ^0.3.176 + version: 0.3.176(@anthropic-ai/sdk@0.97.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6) + '@modelcontextprotocol/sdk': + specifier: ^1.29.0 + version: 1.29.0(zod@4.3.6) + devDependencies: + '@tanstack/ai': + specifier: workspace:* + version: link:../ai + '@vitest/coverage-v8': + specifier: 4.0.14 + version: 4.0.14(vitest@4.0.14(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.15))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + packages/ai-client: dependencies: '@tanstack/ai': @@ -1856,6 +1936,9 @@ importers: '@tanstack/ai-anthropic': specifier: workspace:* version: link:../../packages/ai-anthropic + '@tanstack/ai-claude-code': + specifier: workspace:* + version: link:../../packages/ai-claude-code '@tanstack/ai-client': specifier: workspace:* version: link:../../packages/ai-client @@ -2120,6 +2203,58 @@ packages: '@ag-ui/core@0.0.52': resolution: {integrity: sha512-Xo0bUaNV56EqylzcrAuhUkQX7et7+SZIrqZZtEByGwEq/I1EHny6ZMkWHLkKR7UNi0FJZwJyhKYmKJS3B2SEgA==} + '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.176': + resolution: {integrity: sha512-QZLVv9Hlo5W7YEV23eTsAKYQTkA1V7TG4Z5oFESgvkVfx02TxguZKtUbqlpmzZ9JqXRu+qMY9iIpIgbI3PwRJw==} + cpu: [arm64] + os: [darwin] + + '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.176': + resolution: {integrity: sha512-rN1Jj0r0AGIU3x3KXgXWrIQz/NO2jiPmlsJS0lhr08KwteBXqK2+oWPf7oyD33ExQIOj/CZDCqQbSnYv6ttfFQ==} + cpu: [x64] + os: [darwin] + + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.176': + resolution: {integrity: sha512-8YeGD2ePf+SW1i9IjSYyJlpcSKymGRMBGZv7AibF6I7PuQEAHtfuBlYBv9a59T/s3kHyQdu3bZb0DUZUXXfhcA==} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.176': + resolution: {integrity: sha512-3peVLOJCtUP883To+LCFaeT00uP+wcb1nrMrrAa92HBOnRVtnQAPEjpsItmIEGErc91YhV9cpliwoTZam1fBlQ==} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.176': + resolution: {integrity: sha512-jcPb+L+D7TmihtfBO9quzVjR52hLqM+XKRXg9eMJpYH/T4DtVsFAQhdZ/Yf2dV2wNCcQeTtn7K//ghEXnO6jtA==} + cpu: [x64] + os: [linux] + libc: [musl] + + '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.176': + resolution: {integrity: sha512-nu378rqSXa9sAb1+P8jXjCUre2qPG1I6eBWiMMXSTCQE7sphUa/dKoLUh2L+E/S6/s/hA1kRm+PO389fPKw/Uw==} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.176': + resolution: {integrity: sha512-XiJHXo9+rCJGFriZQmGBKC0pZG+ZQgYv1PEtewbxcgbquJYnWPGIS2Itvg5sEVK0CxLQ3WO1Rgg69iqVFxo/OQ==} + cpu: [arm64] + os: [win32] + + '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.176': + resolution: {integrity: sha512-8UlbhNCVEsfTmFNUOBdS6/7GTOtbkl2gI1nsrX2dgHLHxNIqjgJOkbysFBLtwssm2X3l6d4QRMFrHjJ8mDjGfA==} + cpu: [x64] + os: [win32] + + '@anthropic-ai/claude-agent-sdk@0.3.176': + resolution: {integrity: sha512-uN7XatzLYFackK4WH43iCfW+QPi21zgavG2ZdY1gMbYKFrhbchVX1U0BBbFq8sFy1zqNc3WZ4GCHAdjOAHQe0A==} + engines: {node: '>=18.0.0'} + peerDependencies: + '@anthropic-ai/sdk': '>=0.93.0' + '@modelcontextprotocol/sdk': ^1.29.0 + zod: ^4.0.0 + '@anthropic-ai/sdk@0.97.1': resolution: {integrity: sha512-wOf7AUeJPitcVpvKO4UMu63mWH5SaVipkGd7OOQJt/G6VYGlV8D2Gp9dLxOrttDJh/9gqPqdaBwDGcBevumeAg==} hasBin: true @@ -13947,6 +14082,45 @@ snapshots: dependencies: zod: 3.25.76 + '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.176': + optional: true + + '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.176': + optional: true + + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.176': + optional: true + + '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.176': + optional: true + + '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.176': + optional: true + + '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.176': + optional: true + + '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.176': + optional: true + + '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.176': + optional: true + + '@anthropic-ai/claude-agent-sdk@0.3.176(@anthropic-ai/sdk@0.97.1(zod@4.3.6))(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(zod@4.3.6)': + dependencies: + '@anthropic-ai/sdk': 0.97.1(zod@4.3.6) + '@modelcontextprotocol/sdk': 1.29.0(zod@4.3.6) + zod: 4.3.6 + optionalDependencies: + '@anthropic-ai/claude-agent-sdk-darwin-arm64': 0.3.176 + '@anthropic-ai/claude-agent-sdk-darwin-x64': 0.3.176 + '@anthropic-ai/claude-agent-sdk-linux-arm64': 0.3.176 + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl': 0.3.176 + '@anthropic-ai/claude-agent-sdk-linux-x64': 0.3.176 + '@anthropic-ai/claude-agent-sdk-linux-x64-musl': 0.3.176 + '@anthropic-ai/claude-agent-sdk-win32-arm64': 0.3.176 + '@anthropic-ai/claude-agent-sdk-win32-x64': 0.3.176 + '@anthropic-ai/sdk@0.97.1(zod@4.2.1)': dependencies: json-schema-to-ts: 3.1.1 @@ -13954,6 +14128,13 @@ snapshots: optionalDependencies: zod: 4.2.1 + '@anthropic-ai/sdk@0.97.1(zod@4.3.6)': + dependencies: + json-schema-to-ts: 3.1.1 + standardwebhooks: 1.0.0 + optionalDependencies: + zod: 4.3.6 + '@apidevtools/json-schema-ref-parser@11.9.3': dependencies: '@jsdevtools/ono': 7.1.3 diff --git a/testing/e2e/README.md b/testing/e2e/README.md index ab0f13479..288fd1924 100644 --- a/testing/e2e/README.md +++ b/testing/e2e/README.md @@ -6,6 +6,8 @@ End-to-end tests for TanStack AI using Playwright and [aimock](https://github.co **Providers tested:** openai, anthropic, gemini, ollama, groq, grok, openrouter +> **Claude Code (`@tanstack/ai-claude-code`) is excluded from the standard matrix.** It's a harness adapter that spawns the Claude Code runtime as a subprocess, so aimock's per-test `X-Test-Id` header isolation can't be injected into its requests. It's covered by unit tests in the package plus a gated live smoke test in `tests/claude-code.spec.ts` — run it with `CLAUDE_CODE_E2E=1` and an `ANTHROPIC_API_KEY` (or a local `claude login`). + ## What's tested ### Provider-coverage tests diff --git a/testing/e2e/package.json b/testing/e2e/package.json index 68381f527..bcbe281b0 100644 --- a/testing/e2e/package.json +++ b/testing/e2e/package.json @@ -18,6 +18,7 @@ "@tailwindcss/vite": "^4.1.18", "@tanstack/ai": "workspace:*", "@tanstack/ai-anthropic": "workspace:*", + "@tanstack/ai-claude-code": "workspace:*", "@tanstack/ai-client": "workspace:*", "@tanstack/ai-elevenlabs": "workspace:*", "@tanstack/ai-gemini": "workspace:*", diff --git a/testing/e2e/tests/claude-code.spec.ts b/testing/e2e/tests/claude-code.spec.ts new file mode 100644 index 000000000..0a842f1b3 --- /dev/null +++ b/testing/e2e/tests/claude-code.spec.ts @@ -0,0 +1,72 @@ +/** + * Gated live smoke test for the Claude Code harness adapter. + * + * The standard e2e matrix mocks providers with aimock via per-test + * `X-Test-Id` header isolation. Claude Code spawns its bundled runtime as a + * subprocess, so that isolation can't be injected — this adapter is excluded + * from the matrix and covered here instead, gated behind CLAUDE_CODE_E2E. + * + * Run with: + * CLAUDE_CODE_E2E=1 ANTHROPIC_API_KEY=sk-... \ + * pnpm --filter @tanstack/ai-e2e test:e2e -- --grep "claude-code" + * + * (A local `claude login` works in place of ANTHROPIC_API_KEY.) + */ +import { expect, test } from '@playwright/test' +import { chat } from '@tanstack/ai' +import { claudeCodeText } from '@tanstack/ai-claude-code' +import type { StreamChunk } from '@tanstack/ai' + +test.describe('claude-code harness (gated live smoke)', () => { + test.skip( + !process.env.CLAUDE_CODE_E2E, + 'Set CLAUDE_CODE_E2E=1 (plus ANTHROPIC_API_KEY or a local Claude login) to run the Claude Code live smoke test', + ) + + test('streams a full harness turn with session id and stop finish', async () => { + test.setTimeout(180_000) + + const chunks: Array = [] + const stream = chat({ + adapter: claudeCodeText('haiku', { + maxTurns: 2, + // Read-only smoke: the default permission policy denies anything + // that would prompt, and no tools are bridged. + disallowedTools: ['Bash', 'Write', 'Edit'], + }), + messages: [ + { + role: 'user', + content: 'Reply with exactly the word: pong', + }, + ], + }) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + const types = chunks.map((chunk) => chunk.type as string) + expect(types[0]).toBe('RUN_STARTED') + + const sessionEvent = chunks.find( + (chunk) => + chunk.type === 'CUSTOM' && + (chunk as { name?: string }).name === 'claude-code.session-id', + ) + expect(sessionEvent).toBeDefined() + expect( + (sessionEvent as { value: { sessionId: string } }).value.sessionId, + ).toMatch(/.+/) + + const finished = chunks.find((chunk) => chunk.type === 'RUN_FINISHED') + expect(finished).toBeDefined() + expect((finished as { finishReason?: string }).finishReason).toBe('stop') + + const text = chunks + .filter((chunk) => chunk.type === 'TEXT_MESSAGE_CONTENT') + .map((chunk) => (chunk as { delta?: string }).delta ?? '') + .join('') + expect(text.toLowerCase()).toContain('pong') + }) +}) From b1627bc369a177d4df6f1cfe50de87f859797d50 Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Fri, 12 Jun 2026 20:00:08 -0700 Subject: [PATCH 05/12] feat: add @tanstack/ai-codex and @tanstack/ai-gemini-cli harness adapters Add two new coding-agent harness adapters alongside Claude Code: - @tanstack/ai-codex drives OpenAI Codex via @openai/codex-sdk with local tool execution, resumable sessions (modelOptions.sessionId), structured output, and a localhost MCP bridge for TanStack server tools. - @tanstack/ai-gemini-cli drives `gemini --acp` over the Agent Client Protocol with token-level streaming, resumable sessions, a configurable permission policy, and headless ACP auth method selection (authMethodId) so runs never stall on an interactive auth picker. Wire both into the ts-react-coding-agent example: the agent dropdown keeps every harness selectable, and a server function (createServerFn) reports which agents are actually configured at runtime so the UI can surface a setup dialog for unconfigured ones. Includes adapter docs and changesets. Co-authored-by: Cursor --- .changeset/ai-codex-initial.md | 5 + .changeset/ai-gemini-cli-initial.md | 5 + docs/adapters/codex.md | 182 ++++++ docs/adapters/gemini-cli.md | 205 +++++++ docs/config.json | 10 + examples/ts-react-coding-agent/README.md | 138 ++++- examples/ts-react-coding-agent/package.json | 2 + .../src/lib/agent-status.ts | 52 ++ .../ts-react-coding-agent/src/lib/agents.ts | 105 +++- .../src/routes/api.chat.ts | 30 +- .../src/routes/index.tsx | 131 +++- packages/ai-codex/README.md | 18 + packages/ai-codex/package.json | 59 ++ packages/ai-codex/src/adapters/text.ts | 366 ++++++++++++ packages/ai-codex/src/index.ts | 21 + packages/ai-codex/src/messages/prompt.ts | 67 +++ packages/ai-codex/src/model-meta.ts | 17 + packages/ai-codex/src/provider-options.ts | 29 + packages/ai-codex/src/stream/sdk-types.ts | 66 ++ packages/ai-codex/src/stream/translate.ts | 381 ++++++++++++ packages/ai-codex/src/tools/bridge.ts | 130 ++++ packages/ai-codex/tests/bridge.test.ts | 108 ++++ packages/ai-codex/tests/prompt.test.ts | 97 +++ packages/ai-codex/tests/text-adapter.test.ts | 430 ++++++++++++++ packages/ai-codex/tests/translate.test.ts | 454 ++++++++++++++ packages/ai-codex/tsconfig.json | 8 + packages/ai-codex/vite.config.ts | 37 ++ packages/ai-gemini-cli/README.md | 18 + packages/ai-gemini-cli/package.json | 60 ++ packages/ai-gemini-cli/src/adapters/text.ts | 386 ++++++++++++ packages/ai-gemini-cli/src/index.ts | 36 ++ packages/ai-gemini-cli/src/messages/prompt.ts | 67 +++ packages/ai-gemini-cli/src/model-meta.ts | 20 + .../ai-gemini-cli/src/process/acp-client.ts | 257 ++++++++ .../ai-gemini-cli/src/process/permissions.ts | 66 ++ .../ai-gemini-cli/src/provider-options.ts | 23 + .../ai-gemini-cli/src/stream/acp-types.ts | 82 +++ packages/ai-gemini-cli/src/stream/queue.ts | 64 ++ .../ai-gemini-cli/src/stream/translate.ts | 395 ++++++++++++ packages/ai-gemini-cli/src/tools/bridge.ts | 129 ++++ packages/ai-gemini-cli/tests/bridge.test.ts | 108 ++++ .../ai-gemini-cli/tests/permissions.test.ts | 103 ++++ packages/ai-gemini-cli/tests/prompt.test.ts | 97 +++ .../ai-gemini-cli/tests/text-adapter.test.ts | 562 ++++++++++++++++++ .../ai-gemini-cli/tests/translate.test.ts | 435 ++++++++++++++ packages/ai-gemini-cli/tsconfig.json | 8 + packages/ai-gemini-cli/vite.config.ts | 37 ++ pnpm-lock.yaml | 131 +++- 48 files changed, 6179 insertions(+), 58 deletions(-) create mode 100644 .changeset/ai-codex-initial.md create mode 100644 .changeset/ai-gemini-cli-initial.md create mode 100644 docs/adapters/codex.md create mode 100644 docs/adapters/gemini-cli.md create mode 100644 examples/ts-react-coding-agent/src/lib/agent-status.ts create mode 100644 packages/ai-codex/README.md create mode 100644 packages/ai-codex/package.json create mode 100644 packages/ai-codex/src/adapters/text.ts create mode 100644 packages/ai-codex/src/index.ts create mode 100644 packages/ai-codex/src/messages/prompt.ts create mode 100644 packages/ai-codex/src/model-meta.ts create mode 100644 packages/ai-codex/src/provider-options.ts create mode 100644 packages/ai-codex/src/stream/sdk-types.ts create mode 100644 packages/ai-codex/src/stream/translate.ts create mode 100644 packages/ai-codex/src/tools/bridge.ts create mode 100644 packages/ai-codex/tests/bridge.test.ts create mode 100644 packages/ai-codex/tests/prompt.test.ts create mode 100644 packages/ai-codex/tests/text-adapter.test.ts create mode 100644 packages/ai-codex/tests/translate.test.ts create mode 100644 packages/ai-codex/tsconfig.json create mode 100644 packages/ai-codex/vite.config.ts create mode 100644 packages/ai-gemini-cli/README.md create mode 100644 packages/ai-gemini-cli/package.json create mode 100644 packages/ai-gemini-cli/src/adapters/text.ts create mode 100644 packages/ai-gemini-cli/src/index.ts create mode 100644 packages/ai-gemini-cli/src/messages/prompt.ts create mode 100644 packages/ai-gemini-cli/src/model-meta.ts create mode 100644 packages/ai-gemini-cli/src/process/acp-client.ts create mode 100644 packages/ai-gemini-cli/src/process/permissions.ts create mode 100644 packages/ai-gemini-cli/src/provider-options.ts create mode 100644 packages/ai-gemini-cli/src/stream/acp-types.ts create mode 100644 packages/ai-gemini-cli/src/stream/queue.ts create mode 100644 packages/ai-gemini-cli/src/stream/translate.ts create mode 100644 packages/ai-gemini-cli/src/tools/bridge.ts create mode 100644 packages/ai-gemini-cli/tests/bridge.test.ts create mode 100644 packages/ai-gemini-cli/tests/permissions.test.ts create mode 100644 packages/ai-gemini-cli/tests/prompt.test.ts create mode 100644 packages/ai-gemini-cli/tests/text-adapter.test.ts create mode 100644 packages/ai-gemini-cli/tests/translate.test.ts create mode 100644 packages/ai-gemini-cli/tsconfig.json create mode 100644 packages/ai-gemini-cli/vite.config.ts diff --git a/.changeset/ai-codex-initial.md b/.changeset/ai-codex-initial.md new file mode 100644 index 000000000..4034a88b4 --- /dev/null +++ b/.changeset/ai-codex-initial.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-codex': minor +--- + +New `@tanstack/ai-codex` package: a Codex harness adapter that runs `@openai/codex-sdk` as a TanStack AI chat backend. Codex owns the agent loop and executes its built-in tools (shell commands, file changes, web search, todo lists) server-side inside its sandbox; their activity streams back as resolved tool-call events. TanStack `toolDefinition()` server tools are bridged into the harness via a localhost Streamable-HTTP MCP server, threads are resumable via `modelOptions.sessionId` (surfaced through a `codex.session-id` custom event), and structured output uses the harness's native `outputSchema` support. Note: the Codex SDK reports assistant text only as completed messages — tool activity streams live, text arrives message-at-a-time. diff --git a/.changeset/ai-gemini-cli-initial.md b/.changeset/ai-gemini-cli-initial.md new file mode 100644 index 000000000..e20180e86 --- /dev/null +++ b/.changeset/ai-gemini-cli-initial.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-gemini-cli': minor +--- + +New `@tanstack/ai-gemini-cli` package: a Gemini CLI harness adapter that drives `gemini --acp` (Agent Client Protocol) as a TanStack AI chat backend. Gemini CLI owns the agent loop and executes its built-in tools (shell, file edits, search) server-side; assistant text and thinking stream as true token-level deltas, and tool activity streams back as resolved tool-call events. TanStack `toolDefinition()` server tools are bridged into the harness via a localhost Streamable-HTTP MCP server, sessions are resumable via `modelOptions.sessionId` (surfaced through a `gemini-cli.session-id` custom event, with graceful fallback to transcript replay when the CLI can't load the session), and ACP permission requests are answered by a configurable never-hanging policy (`default` / `acceptEdits` / `bypassPermissions` or a custom handler). For headless hosts, the auth method is selectable up front via `authMethodId` (e.g. `'oauth-personal'`, `'gemini-api-key'`) — the adapter performs the ACP `authenticate` handshake before opening the session so a run never stalls on an interactive auth picker. Requires the `gemini` CLI to be installed and authenticated on the host. diff --git a/docs/adapters/codex.md b/docs/adapters/codex.md new file mode 100644 index 000000000..199cffe7f --- /dev/null +++ b/docs/adapters/codex.md @@ -0,0 +1,182 @@ +--- +title: Codex +id: codex-adapter +order: 12 +description: "Use OpenAI Codex as a chat backend in TanStack AI — agent harness with local tool execution, stateful coding sessions, and tool bridging via @tanstack/ai-codex." +keywords: + - tanstack ai + - codex + - codex sdk + - openai + - harness + - agent + - coding agent + - adapter +--- + +The Codex adapter runs [OpenAI Codex](https://developers.openai.com/codex) (via the `@openai/codex-sdk`) as a chat backend. Unlike HTTP provider adapters, this is a **harness adapter**: Codex runs its own agent loop and executes its own tools — shell commands, file changes, web search — locally on your server, inside its sandbox. Each `chat()` call runs one full harness turn; the harness's tool activity streams back as already-resolved tool-call events your UI can render. + +> **Server-only.** The harness spawns the Codex runtime (bundled with the SDK) as a subprocess, so this adapter only works in a Node.js server environment — never in the browser. The sandbox mode is the safety boundary; configure it deliberately. + +## Installation + +```bash +npm install @tanstack/ai-codex +``` + +A runnable demo lives at [`examples/ts-react-coding-agent`](https://github.com/TanStack/ai/tree/main/examples/ts-react-coding-agent) — session resume, the harness tool timeline, sandbox modes, and tool bridging, wired into a React app. + +## Authentication + +The harness resolves credentials the same way the Codex CLI does: + +- the `apiKey` config option (exported to the subprocess as `CODEX_API_KEY`; usage-based billing), or +- an existing ChatGPT login on the machine (`codex login`). + +## Basic Usage + +```typescript +import { chat } from "@tanstack/ai"; +import { codexText } from "@tanstack/ai-codex"; + +const stream = chat({ + adapter: codexText("gpt-5.1-codex", { + cwd: "/path/to/project", + sandboxMode: "workspace-write", + }), + messages: [{ role: "user", content: "Fix the failing test in utils.test.ts" }], +}); +``` + +## Configuration + +| Option | Description | +| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | +| `cwd` | Working directory for the harness session. Defaults to `process.cwd()`. | +| `sandboxMode` | Codex sandbox: `'read-only'` (harness default), `'workspace-write'`, or `'danger-full-access'`. This is the safety boundary on a server. | +| `approvalPolicy` | Codex approval policy. Defaults to `'never'` — headless runs have no approval UI, so anything else can stall a turn. | +| `modelReasoningEffort` | `'minimal'` \| `'low'` \| `'medium'` \| `'high'` \| `'xhigh'`. | +| `skipGitRepoCheck` | Skip the harness's git-repo safety check. Defaults to `true` (server adapters routinely point at scratch directories). | +| `networkAccessEnabled` | Allow network access inside the `workspace-write` sandbox. | +| `webSearchMode` | `'disabled'` \| `'cached'` \| `'live'`. | +| `additionalDirectories`| Extra writable directories beyond `cwd`. | +| `apiKey` | OpenAI API key for the harness subprocess. | +| `baseUrl` | Override the Codex backend base URL. | +| `codexPathOverride` | Use a specific codex executable instead of the SDK's bundled binary. | +| `env` | Environment variables for the subprocess. When set, `process.env` is **not** inherited (Codex SDK semantics). | +| `config` | Extra `--config key=value` overrides passed to the Codex CLI (e.g. additional `mcp_servers` entries). | + +Per-call overrides — `sessionId`, `sandboxMode`, `approvalPolicy`, `modelReasoningEffort`, `workingDirectory`, `skipGitRepoCheck` — go through `modelOptions`. + +## Stateful Sessions + +Codex threads are stateful — the harness keeps the full working context (files read, commands run, conclusions reached) between turns. The adapter surfaces the thread id of every fresh run as a custom stream event named `codex.session-id`; thread it back via `modelOptions.sessionId` to resume. When resuming, only the latest user message is sent — the harness already holds the prior context. + +Server endpoint: + +```typescript +import { + chat, + chatParamsFromRequest, + toServerSentEventsResponse, +} from "@tanstack/ai"; +import { codexText } from "@tanstack/ai-codex"; + +export async function POST(request: Request) { + const params = await chatParamsFromRequest(request); + + // Extra fields the client puts in the connection `body` arrive here. + const sessionId = + typeof params.forwardedProps.sessionId === "string" + ? params.forwardedProps.sessionId + : undefined; + + const stream = chat({ + adapter: codexText("gpt-5.1-codex", { + cwd: "/path/to/project", + sandboxMode: "workspace-write", + }), + messages: params.messages, + modelOptions: { sessionId }, + }); + + return toServerSentEventsResponse(stream); +} +``` + +Client (React) — capture the session id from the custom event and send it back on subsequent requests: + +```typescript +import { useState } from "react"; +import { useChat } from "@tanstack/ai-react"; +import { fetchServerSentEvents } from "@tanstack/ai-client"; + +function CodingAssistant() { + const [sessionId, setSessionId] = useState(undefined); + + const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents("/api/chat", () => ({ + body: { sessionId }, + })), + onCustomEvent: (name, value) => { + if ( + name === "codex.session-id" && + typeof value === "object" && + value !== null && + "sessionId" in value && + typeof value.sessionId === "string" + ) { + setSessionId(value.sessionId); + } + }, + }); + + // ... render messages; harness tool activity (command_execution, + // file_change, ...) arrives as regular tool-call parts with results. +} +``` + +Sessions are stored on the machine that ran them (`~/.codex/sessions/`), so resuming only works on the same server instance. + +## Tools + +Two kinds of tools flow through this adapter: + +1. **Built-in harness tools** are executed by Codex itself and stream back as tool-call events with results already attached: `command_execution` (shell), `file_change` (patches), `web_search`, and `todo_list` (the agent's running plan). Your code never executes them. + +2. **Your TanStack tools** are bridged *into* the harness: the adapter starts a short-lived Streamable-HTTP MCP server on `127.0.0.1` for the duration of the turn and points Codex at it. Define tools as usual with `toolDefinition().server()`; tool-call events come back under the names you registered. + +```typescript +import { z } from "zod"; +import { chat, toolDefinition } from "@tanstack/ai"; +import { codexText } from "@tanstack/ai-codex"; + +const lookupTicket = toolDefinition({ + name: "lookup_ticket", + description: "Look up an issue ticket by id", + inputSchema: z.object({ ticketId: z.string() }), +}).server(async ({ ticketId }) => { + return { ticketId, status: "open", title: "Crash on startup" }; +}); + +const stream = chat({ + adapter: codexText("gpt-5.1-codex"), + messages: [{ role: "user", content: "What's the status of ticket T-123?" }], + tools: [lookupTicket], +}); +``` + +**Client-side and approval-gated tools are not supported.** The harness executes tools inside a live subprocess, which cannot pause across HTTP requests to wait for a browser round-trip or a human approval. Passing a tool without a server `execute()` implementation — or one marked `needsApproval` — fails fast with a descriptive error. Run those tools outside the harness with a regular provider adapter. + +## Structured Output + +`structuredOutput()` uses Codex's native `outputSchema` support in a fresh, read-only, one-shot thread whose final message is a JSON string conforming to your schema. It works for finalization after a chat, but a plain provider adapter (e.g. `@tanstack/ai-openai`) is the better choice when structured extraction is the primary job — it's faster and doesn't spawn a subprocess. + +## Limitations + +- **No token-level text streaming.** The Codex SDK reports assistant text and reasoning only as completed items, so text arrives message-at-a-time. Tool activity (commands starting/finishing) still streams live, which keeps the UI feeling alive during long turns. +- **Server-only (Node).** The harness spawns a subprocess. +- **The harness owns the agent loop.** TanStack's agent-loop strategies and per-iteration middleware don't apply inside a harness turn. +- **No sampling controls.** `temperature`-style options don't exist here. +- **Sessions are machine-local.** Resume requires hitting the same server instance. +- **Cold starts.** Each call spawns a harness turn; expect higher first-token latency than HTTP adapters. diff --git a/docs/adapters/gemini-cli.md b/docs/adapters/gemini-cli.md new file mode 100644 index 000000000..9822c1298 --- /dev/null +++ b/docs/adapters/gemini-cli.md @@ -0,0 +1,205 @@ +--- +title: Gemini CLI +id: gemini-cli-adapter +order: 13 +description: "Use Gemini CLI as a chat backend in TanStack AI — agent harness with local tool execution, stateful coding sessions, and tool bridging via @tanstack/ai-gemini-cli." +keywords: + - tanstack ai + - gemini cli + - agent client protocol + - acp + - google + - harness + - agent + - coding agent + - adapter +--- + +The Gemini CLI adapter runs [Gemini CLI](https://github.com/google-gemini/gemini-cli) as a chat backend, driving it over the [Agent Client Protocol](https://agentclientprotocol.com) (`gemini --acp`) — the same interface editors like Zed use to embed it. Unlike HTTP provider adapters, this is a **harness adapter**: Gemini CLI runs its own agent loop and executes its own tools — shell commands, file reads and edits, search — locally on your server. Each `chat()` call runs one full harness turn; assistant text and thinking stream as true token-level deltas, and the harness's tool activity streams back as already-resolved tool-call events your UI can render. + +> **Server-only.** The adapter spawns the `gemini` CLI as a subprocess, so it only works in a Node.js server environment — never in the browser. Treat it like giving Gemini a shell on the machine it runs on, and configure permissions accordingly. + +## Installation + +```bash +npm install @tanstack/ai-gemini-cli +``` + +The `gemini` CLI itself is a prerequisite — it is **not** bundled: + +```bash +npm install -g @google/gemini-cli +``` + +A runnable demo lives at [`examples/ts-react-coding-agent`](https://github.com/TanStack/ai/tree/main/examples/ts-react-coding-agent) — session resume, the harness tool timeline, permission modes, and tool bridging, wired into a React app. + +## Authentication + +The harness resolves credentials the same way Gemini CLI does: + +- an existing Google login on the machine (run `gemini` once interactively), or +- `GEMINI_API_KEY` in the server's environment (pass it via the `env` config option if needed). + +**Headless ACP auth.** When driven over ACP, Gemini CLI can't pop an +interactive auth picker, so it needs to be told which method to use. Set +`authMethodId` to one of the methods the CLI advertises — commonly +`'oauth-personal'` (Log in with Google), `'gemini-api-key'`, or `'vertex-ai'`. +The adapter selects it (via the ACP `authenticate` call) before opening the +session, and fails fast with the list of available methods if the one you +asked for isn't offered. Some setups also require trusting the working +directory in headless mode — set `GEMINI_CLI_TRUST_WORKSPACE=true` (or pass +`--skip-trust` via `extraArgs`) when the CLI refuses an untrusted folder. + +```typescript +import { geminiCliText } from "@tanstack/ai-gemini-cli"; + +const adapter = geminiCliText("gemini-3-pro-preview", { + cwd: "/path/to/project", + authMethodId: "oauth-personal", // reuse the machine's Google login +}); +``` + +## Basic Usage + +```typescript +import { chat } from "@tanstack/ai"; +import { geminiCliText } from "@tanstack/ai-gemini-cli"; + +const stream = chat({ + adapter: geminiCliText("gemini-3-pro-preview", { + cwd: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: [{ role: "user", content: "Fix the failing test in utils.test.ts" }], +}); +``` + +## Configuration + +| Option | Description | +| --------------------- | --------------------------------------------------------------------------------------------------------------------- | +| `cwd` | Working directory for the harness session. Defaults to `process.cwd()`. | +| `executablePath` | Path to the Gemini CLI executable. Defaults to `gemini` on `PATH`. | +| `extraArgs` | Extra CLI arguments appended after `--acp` (e.g. `['--sandbox']`). | +| `env` | Extra environment variables merged over `process.env` for the subprocess. | +| `permissionMode` | `'default'`, `'acceptEdits'`, or `'bypassPermissions'`. See the permissions note below. | +| `onPermissionRequest` | Custom permission handler; replaces the adapter's default policy. | +| `authMethodId` | ACP auth method to select before the session starts, e.g. `'oauth-personal'`, `'gemini-api-key'`, `'vertex-ai'`. See Authentication. | + +Per-call overrides — `sessionId`, `permissionMode`, `cwd`, `authMethodId` — go through `modelOptions`. + +**Permissions on headless servers.** ACP routes the harness's tool-approval questions back to the embedding application. Without a custom `onPermissionRequest`, the adapter installs a safe default policy that always answers immediately: bridged TanStack tools are approved, `'acceptEdits'` additionally approves file-mutation tools (edit / move / delete kinds), `'bypassPermissions'` approves everything, and anything else is rejected — a headless server must never hang on a question only an interactive user could answer. + +## Stateful Sessions + +Gemini CLI sessions are stateful — the harness keeps the full working context between turns. The adapter surfaces the session id of every run as a custom stream event named `gemini-cli.session-id`; thread it back via `modelOptions.sessionId` to resume the session. When resuming, only the latest user message is sent — the harness already holds the prior context. If the installed CLI can't load the session (older CLI, different machine), the adapter transparently falls back to a fresh session seeded with the flattened transcript, and the new session id is emitted so the client can re-pin it. + +Server endpoint: + +```typescript +import { + chat, + chatParamsFromRequest, + toServerSentEventsResponse, +} from "@tanstack/ai"; +import { geminiCliText } from "@tanstack/ai-gemini-cli"; + +export async function POST(request: Request) { + const params = await chatParamsFromRequest(request); + + // Extra fields the client puts in the connection `body` arrive here. + const sessionId = + typeof params.forwardedProps.sessionId === "string" + ? params.forwardedProps.sessionId + : undefined; + + const stream = chat({ + adapter: geminiCliText("gemini-3-pro-preview", { + cwd: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: params.messages, + modelOptions: { sessionId }, + }); + + return toServerSentEventsResponse(stream); +} +``` + +Client (React) — capture the session id from the custom event and send it back on subsequent requests: + +```typescript +import { useState } from "react"; +import { useChat } from "@tanstack/ai-react"; +import { fetchServerSentEvents } from "@tanstack/ai-client"; + +function CodingAssistant() { + const [sessionId, setSessionId] = useState(undefined); + + const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents("/api/chat", () => ({ + body: { sessionId }, + })), + onCustomEvent: (name, value) => { + if ( + name === "gemini-cli.session-id" && + typeof value === "object" && + value !== null && + "sessionId" in value && + typeof value.sessionId === "string" + ) { + setSessionId(value.sessionId); + } + }, + }); + + // ... render messages; harness tool activity (execute, edit, read, ...) + // arrives as regular tool-call parts with their results attached. +} +``` + +Sessions are stored on the machine that ran them (under `~/.gemini/tmp/`), so resuming only works on the same server instance. + +## Tools + +Two kinds of tools flow through this adapter: + +1. **Built-in harness tools** (shell, file edits, reads, search, web fetch, ...) are executed by Gemini CLI itself. Their activity streams back as tool-call events — named by their ACP tool kind (`execute`, `edit`, `read`, `search`, ...), with the human-readable title in the arguments — and results attached, so `useChat` UIs render them with no extra wiring. Your code never executes them. The harness's running plan is surfaced as a CUSTOM `gemini-cli.plan` event. + +2. **Your TanStack tools** are bridged *into* the harness: the adapter starts a short-lived Streamable-HTTP MCP server on `127.0.0.1` for the duration of the turn and registers it with the ACP session. Define tools as usual with `toolDefinition().server()`; tool-call events come back under the names you registered, and the default permission policy auto-approves them. + +```typescript +import { z } from "zod"; +import { chat, toolDefinition } from "@tanstack/ai"; +import { geminiCliText } from "@tanstack/ai-gemini-cli"; + +const lookupTicket = toolDefinition({ + name: "lookup_ticket", + description: "Look up an issue ticket by id", + inputSchema: z.object({ ticketId: z.string() }), +}).server(async ({ ticketId }) => { + return { ticketId, status: "open", title: "Crash on startup" }; +}); + +const stream = chat({ + adapter: geminiCliText("gemini-3-pro-preview"), + messages: [{ role: "user", content: "What's the status of ticket T-123?" }], + tools: [lookupTicket], +}); +``` + +**Client-side and approval-gated tools are not supported.** The harness executes tools inside a live subprocess, which cannot pause across HTTP requests to wait for a browser round-trip or a human approval. Passing a tool without a server `execute()` implementation — or one marked `needsApproval` — fails fast with a descriptive error. Run those tools outside the harness with a regular provider adapter. + +## Structured Output + +ACP has no native JSON-schema output channel, so `structuredOutput()` is best-effort: the schema is embedded as a prompt instruction in a fresh one-shot session and the final text is parsed (markdown fences are stripped when present). For production structured extraction, use a plain provider adapter (e.g. `@tanstack/ai-gemini`) — it's faster, schema-enforced, and doesn't spawn a subprocess. + +## Limitations + +- **Server-only (Node)**, and the `gemini` CLI must be installed and authenticated on the host. +- **Token usage is usually unavailable.** ACP only recently added usage reporting; when the CLI doesn't report it, `RUN_FINISHED` carries no usage. +- **The harness owns the agent loop.** TanStack's agent-loop strategies and per-iteration middleware don't apply inside a harness turn. +- **No sampling controls.** `temperature`-style options don't exist here. +- **Sessions are machine-local.** Resume requires hitting the same server instance (with graceful fallback to a fresh transcript-seeded session). +- **Cold starts.** Each call spawns the CLI; expect higher first-token latency than HTTP adapters. +- **ACP is young.** Gemini CLI's ACP mode is still stabilizing; pin a known-good CLI version in production. diff --git a/docs/config.json b/docs/config.json index a53629291..05ab94858 100644 --- a/docs/config.json +++ b/docs/config.json @@ -451,6 +451,16 @@ "label": "Claude Code", "to": "adapters/claude-code", "addedAt": "2026-06-12" + }, + { + "label": "Codex", + "to": "adapters/codex", + "addedAt": "2026-06-12" + }, + { + "label": "Gemini CLI", + "to": "adapters/gemini-cli", + "addedAt": "2026-06-12" } ] }, diff --git a/examples/ts-react-coding-agent/README.md b/examples/ts-react-coding-agent/README.md index 4576afdea..98bc24bdc 100644 --- a/examples/ts-react-coding-agent/README.md +++ b/examples/ts-react-coding-agent/README.md @@ -1,9 +1,11 @@ # TanStack AI — Coding Agent Example -A React (TanStack Start) app that drives a **coding-agent harness** through -TanStack AI — currently [Claude Code](https://docs.anthropic.com/en/docs/claude-code) -via `@tanstack/ai-claude-code`, with the agent registry structured so future -harness adapters (Codex, Gemini CLI, ...) can slot in. +A React (TanStack Start) app that drives **coding-agent harnesses** through +TanStack AI — [Claude Code](https://docs.anthropic.com/en/docs/claude-code) +via `@tanstack/ai-claude-code`, [Codex](https://developers.openai.com/codex) +via `@tanstack/ai-codex`, and +[Gemini CLI](https://github.com/google-gemini/gemini-cli) via +`@tanstack/ai-gemini-cli`, switchable from a dropdown. Unlike a normal chat example, the agent here runs its own loop server-side and executes its own tools — reading, searching, and (in Edit mode) editing @@ -13,38 +15,116 @@ timeline of resolved tool calls. ## What it demonstrates - **Session resume** — the server emits the harness session id via a - `claude-code.session-id` custom event; the client pins it and sends it - back through `forwardedProps` → `modelOptions.sessionId`, so follow-ups - continue the same stateful session. -- **Harness tool timeline** — built-in tools (Read, Grep, Edit, ...) arrive - as already-resolved tool-call parts and render with their inputs/outputs. -- **Permission modes** — a Read-only/Edit toggle maps to `disallowedTools` - vs `permissionMode: 'acceptEdits'`. Shell commands are denied by the - adapter's default permission policy either way — ask it to run something - and watch the denial show up in the timeline. + `.session-id` custom event (`claude-code.session-id`, + `codex.session-id`, `gemini-cli.session-id`); the client pins it and sends + it back through `forwardedProps` → `modelOptions.sessionId`, so follow-ups + continue the same stateful session. Switching agents resets the session. +- **Harness tool timeline** — built-in tools (Read, Grep, Edit, + command_execution, ...) arrive as already-resolved tool-call parts and + render with their inputs/outputs. Note that Codex streams text + message-at-a-time (its SDK has no token deltas), while Claude Code and + Gemini CLI stream token-by-token. +- **Permission modes** — a Read-only/Edit toggle maps to each harness's + knobs: `disallowedTools` vs `permissionMode: 'acceptEdits'` for Claude + Code, `sandboxMode: 'read-only'` vs `'workspace-write'` for Codex, and + the default-deny vs `acceptEdits` permission policy for Gemini CLI. With + Claude Code and Gemini CLI, ask it to run a shell command and watch the + denial show up in the timeline. - **Tool bridging** — `lookup_style_guide` is an ordinary TanStack server - tool the harness calls from inside its own loop. + tool the harness calls from inside its own loop (in-process MCP for + Claude Code; a localhost Streamable-HTTP MCP bridge for Codex and + Gemini CLI). - **Sandboxed cwd** — the agent only works inside `workspace/`. ## Running -This is a server-spawning example: each chat turn launches the Claude Code -runtime as a subprocess on your machine. +This is a server-spawning example: each chat turn launches the selected +harness as a subprocess on your machine. You only need to set up the agent(s) +you actually want to try — the others stay selectable in the UI and pop a +setup dialog explaining what's missing (see [Runtime config detection](#runtime-config-detection)). -1. Auth: set `ANTHROPIC_API_KEY`, or have a local Claude Code login - (`claude login`). -2. From this directory: +### 1. Set up the agent(s) you want - ```bash - pnpm install - pnpm dev - ``` +**Claude Code** ([docs](https://docs.anthropic.com/en/docs/claude-code)) -3. Open http://localhost:3000 and try: - - "What files are in this project, and what do they do?" (Read-only) - - Switch to **Edit mode**: "Fix the bug in temperature.js" — note it - calls `lookup_style_guide` first. - - "Now update todo.md to check off what you did" — same session, no - re-explaining. +```bash +npm i -g @anthropic-ai/claude-code # install the CLI +claude login # log in with your Claude subscription +# …or, instead of `claude login`, set an API key in the server env: +export ANTHROPIC_API_KEY=sk-ant-… +``` + +The codex/gemini binaries are spawned per turn, so the CLI must be on `PATH`. + +**Codex** ([docs](https://developers.openai.com/codex)) + +```bash +codex login # log in interactively +# …or set an API key in the server env (forwarded as CODEX_API_KEY): +export OPENAI_API_KEY=sk-… +``` + +The `codex` binary ships with `@openai/codex-sdk`, so there's nothing extra to +install. Note: a **ChatGPT-account** login can't run codex models in headless +mode — use an API key or an entitled account, otherwise the run fails with an +entitlement error from OpenAI. + +**Gemini CLI** ([docs](https://github.com/google-gemini/gemini-cli)) + +```bash +npm i -g @google/gemini-cli # ACP mode needs a current build +gemini # log in with Google once (interactive) +``` + +Headless ACP runs can't show an interactive auth picker, so you must tell the +adapter which method to use via `GEMINI_ACP_AUTH_METHOD` (e.g. `oauth-personal` +for a Google login, or `gemini-api-key`). If the CLI refuses the scratch +workspace as untrusted, also export `GEMINI_CLI_TRUST_WORKSPACE=true`. So, for +a Google-login setup, start the dev server like this: + +```bash +GEMINI_ACP_AUTH_METHOD=oauth-personal GEMINI_CLI_TRUST_WORKSPACE=true pnpm dev +``` + +To use an API key instead, set `GEMINI_API_KEY` and +`GEMINI_ACP_AUTH_METHOD=gemini-api-key`. + +### 2. Install and run + +```bash +pnpm install +pnpm dev +``` + +### 3. Try it out + +Open http://localhost:3000 and try: + +- "What files are in this project, and what do they do?" (Read-only) +- Switch to **Edit mode**: "Fix the bug in temperature.js" — note it + calls `lookup_style_guide` first. +- "Now update todo.md to check off what you did" — same session, no + re-explaining. Reset the demo workspace afterwards with `git checkout -- workspace/`. + +## Runtime config detection + +Environment variables and CLI logins live on the server, not in the browser, so +the route loader calls a `createServerFn` (`src/lib/agent-status.ts`) that +reports which agents are actually runnable. Every agent stays selectable in the +dropdown; picking one that isn't configured — or trying to send to it — opens a +dialog with the exact setup steps (sourced from `AGENT_SETUP` in +`src/lib/agents.ts`, which mirrors the instructions above). An agent counts as +configured when: + +- **Claude Code** — `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` is set, or + a `~/.claude.json` login exists. +- **Codex** — `OPENAI_API_KEY` / `CODEX_API_KEY` is set, or a + `~/.codex/auth.json` login exists. +- **Gemini CLI** — `GEMINI_API_KEY` or `GEMINI_ACP_AUTH_METHOD` is set (a + cached Google login alone isn't enough for headless ACP, so it isn't + counted). + +Detection runs at server startup time per request to the loader, so set your +env vars / log in **before** `pnpm dev` (or restart it after). diff --git a/examples/ts-react-coding-agent/package.json b/examples/ts-react-coding-agent/package.json index cccab8e65..a6b6555a4 100644 --- a/examples/ts-react-coding-agent/package.json +++ b/examples/ts-react-coding-agent/package.json @@ -14,6 +14,8 @@ "@tanstack/ai": "workspace:*", "@tanstack/ai-claude-code": "workspace:*", "@tanstack/ai-client": "workspace:*", + "@tanstack/ai-codex": "workspace:*", + "@tanstack/ai-gemini-cli": "workspace:*", "@tanstack/ai-react": "workspace:*", "@tanstack/nitro-v2-vite-plugin": "^1.154.7", "@tanstack/react-router": "^1.158.4", diff --git a/examples/ts-react-coding-agent/src/lib/agent-status.ts b/examples/ts-react-coding-agent/src/lib/agent-status.ts new file mode 100644 index 000000000..3a28cae1b --- /dev/null +++ b/examples/ts-react-coding-agent/src/lib/agent-status.ts @@ -0,0 +1,52 @@ +import { createServerFn } from '@tanstack/react-start' +import type { AgentId } from './agents' + +/** Whether a path exists, swallowing any access error. */ +async function fileExists(filePath: string): Promise { + try { + const { access } = await import('node:fs/promises') + await access(filePath) + return true + } catch { + return false + } +} + +/** + * Reports, per agent, whether the server has credentials/config to actually + * run it. Environment variables aren't visible to the browser, so the client + * gets this through a server function (called from the route loader). Each + * agent counts as configured when an API key is present in the environment, or + * when a local CLI login exists — except Gemini CLI, whose headless ACP mode + * additionally needs an auth method selected up front (so we gate on the env + * vars the example's adapter actually reads). + */ +export const getAgentConfigFn = createServerFn({ method: 'GET' }).handler( + async (): Promise> => { + const os = await import('node:os') + const path = await import('node:path') + const home = os.homedir() + const env = process.env + + const claudeCode = + Boolean(env.ANTHROPIC_API_KEY) || + Boolean(env.CLAUDE_CODE_OAUTH_TOKEN) || + (await fileExists(path.join(home, '.claude.json'))) + + const codex = + Boolean(env.OPENAI_API_KEY) || + Boolean(env.CODEX_API_KEY) || + (await fileExists(path.join(home, '.codex', 'auth.json'))) + + // Gemini's headless ACP path needs an auth method (or an API key) chosen + // explicitly — a cached Google login alone isn't enough, so don't count it. + const geminiCli = + Boolean(env.GEMINI_API_KEY) || Boolean(env.GEMINI_ACP_AUTH_METHOD) + + return { + 'claude-code': claudeCode, + codex, + 'gemini-cli': geminiCli, + } + }, +) diff --git a/examples/ts-react-coding-agent/src/lib/agents.ts b/examples/ts-react-coding-agent/src/lib/agents.ts index 49e236ec0..319a7e6cd 100644 --- a/examples/ts-react-coding-agent/src/lib/agents.ts +++ b/examples/ts-react-coding-agent/src/lib/agents.ts @@ -2,31 +2,116 @@ * Registry of coding-agent harnesses this example can drive. * * Each entry maps to a harness adapter on the server (see - * `src/routes/api.chat.ts`). Today only Claude Code ships; Codex and - * Gemini CLI slots are reserved for future harness adapters. + * `src/routes/api.chat.ts`): Claude Code (`@tanstack/ai-claude-code`), + * Codex (`@tanstack/ai-codex`), and Gemini CLI (`@tanstack/ai-gemini-cli`). */ export const AGENTS = [ - { id: 'claude-code', label: 'Claude Code', available: true }, - { id: 'codex', label: 'Codex (coming soon)', available: false }, - { id: 'gemini-cli', label: 'Gemini CLI (coming soon)', available: false }, + { id: 'claude-code', label: 'Claude Code' }, + { id: 'codex', label: 'Codex' }, + { id: 'gemini-cli', label: 'Gemini CLI' }, ] as const /** Agent ids with a working adapter behind them. */ -export type AgentId = 'claude-code' +export type AgentId = 'claude-code' | 'codex' | 'gemini-cli' export const DEFAULT_AGENT: AgentId = 'claude-code' export function isAgentId(value: unknown): value is AgentId { - return value === 'claude-code' + return value === 'claude-code' || value === 'codex' || value === 'gemini-cli' +} + +/** A single, optionally command-bearing step in an agent's setup guide. */ +export interface SetupStep { + text: string + /** A shell command to show in a copyable code block. */ + code?: string +} + +export interface AgentSetup { + /** Human label (mirrors the AGENTS entry). */ + label: string + /** One-line description of what drives this agent. */ + summary: string + /** Ordered setup steps shown in the "not configured" dialog. */ + steps: Array + /** Docs link for the underlying CLI/tool. */ + docsUrl: string +} + +/** + * Setup instructions surfaced in the UI when an agent isn't configured on the + * server at runtime. Mirrors the README "Running" section — keep them in sync. + */ +export const AGENT_SETUP: Record = { + 'claude-code': { + label: 'Claude Code', + summary: + 'Drives the Claude Code CLI through @tanstack/ai-claude-code. Needs the CLI installed and authenticated on the server.', + steps: [ + { + text: 'Install the Claude Code CLI:', + code: 'npm i -g @anthropic-ai/claude-code', + }, + { + text: 'Log in interactively (uses your Claude subscription):', + code: 'claude login', + }, + { + text: '…or set an API key in the server environment instead:', + code: 'export ANTHROPIC_API_KEY=sk-ant-…', + }, + { text: 'Restart the dev server so it picks up new credentials.' }, + ], + docsUrl: 'https://docs.anthropic.com/en/docs/claude-code', + }, + codex: { + label: 'Codex', + summary: + 'Drives OpenAI Codex through @tanstack/ai-codex. The codex binary ships with the SDK; you only need credentials.', + steps: [ + { text: 'Log in interactively:', code: 'codex login' }, + { + text: '…or set an API key in the server environment instead:', + code: 'export OPENAI_API_KEY=sk-…', + }, + { + text: 'Heads up: ChatGPT-account logins cannot run codex models in headless mode — an API key or an entitled account is required.', + }, + { text: 'Restart the dev server so it picks up new credentials.' }, + ], + docsUrl: 'https://developers.openai.com/codex', + }, + 'gemini-cli': { + label: 'Gemini CLI', + summary: + 'Drives the Gemini CLI over ACP through @tanstack/ai-gemini-cli. Needs a recent CLI and an ACP auth method chosen up front.', + steps: [ + { + text: 'Install a current Gemini CLI (ACP mode needs a recent build):', + code: 'npm i -g @google/gemini-cli', + }, + { text: 'Log in with Google once (interactive):', code: 'gemini' }, + { + text: 'Headless ACP runs can’t show an auth picker, so tell the adapter which method to use and start the server:', + code: 'GEMINI_ACP_AUTH_METHOD=oauth-personal GEMINI_CLI_TRUST_WORKSPACE=true pnpm dev', + }, + { + text: '…or use an API key instead (set GEMINI_ACP_AUTH_METHOD=gemini-api-key):', + code: 'export GEMINI_API_KEY=…', + }, + ], + docsUrl: 'https://github.com/google-gemini/gemini-cli', + }, } /** * What the agent is allowed to do in the workspace: * - `read-only`: it can read and search, but file edits and shell commands * are blocked. - * - `edit`: file edits are auto-approved; shell commands still get denied by - * the adapter's default permission policy (a deliberate demo of the - * permission system). + * - `edit`: file edits are auto-approved; with Claude Code and Gemini CLI, + * shell commands still get denied by each adapter's default permission + * policy (a deliberate demo of the permission system), while Codex + * sandboxes them inside the workspace instead. */ export type AgentMode = 'read-only' | 'edit' diff --git a/examples/ts-react-coding-agent/src/routes/api.chat.ts b/examples/ts-react-coding-agent/src/routes/api.chat.ts index 445d6be05..63b08b416 100644 --- a/examples/ts-react-coding-agent/src/routes/api.chat.ts +++ b/examples/ts-react-coding-agent/src/routes/api.chat.ts @@ -6,6 +6,8 @@ import { toServerSentEventsResponse, } from '@tanstack/ai' import { claudeCodeText } from '@tanstack/ai-claude-code' +import { codexText } from '@tanstack/ai-codex' +import { geminiCliText } from '@tanstack/ai-gemini-cli' import { isAgentId, isAgentMode } from '@/lib/agents' import { lookupStyleGuide } from '@/lib/style-guide-tool' import type { AgentId, AgentMode } from '@/lib/agents' @@ -16,10 +18,7 @@ project mounted in your working directory. Before writing or editing any code, call the lookup_style_guide tool and follow what it says. Keep your answers short — the user is watching your tool activity stream by.` -/** - * One harness adapter per agent id. This is the seam where future harness - * adapters (Codex, Gemini CLI, ...) slot in as additional cases. - */ +/** One harness adapter per agent id. */ function createAdapter( agentId: AgentId, mode: AgentMode, @@ -39,6 +38,29 @@ function createAdapter( // removed from the harness entirely. { disallowedTools: ['Write', 'Edit', 'NotebookEdit', 'Bash'] }), }) + case 'codex': + // Codex has no per-tool permission prompts in headless mode; the + // sandbox is the safety boundary. Edit mode lets it write inside the + // workspace, read-only keeps every command non-mutating. + return codexText('gpt-5.1-codex', { + cwd, + sandboxMode: mode === 'edit' ? 'workspace-write' : 'read-only', + }) + case 'gemini-cli': + return geminiCliText('gemini-3-pro-preview', { + cwd, + // Edit mode auto-approves file edits; shell commands still get + // rejected by the adapter's default permission policy, same demo + // as Claude Code above. + permissionMode: mode === 'edit' ? 'acceptEdits' : 'default', + // Headless ACP runs must select an auth method up front (the CLI + // can't pop an interactive picker). Set GEMINI_ACP_AUTH_METHOD to + // the method your CLI is set up for, e.g. `oauth-personal` (Log in + // with Google) or `gemini-api-key`. See this example's README. + ...(process.env.GEMINI_ACP_AUTH_METHOD && { + authMethodId: process.env.GEMINI_ACP_AUTH_METHOD, + }), + }) } } diff --git a/examples/ts-react-coding-agent/src/routes/index.tsx b/examples/ts-react-coding-agent/src/routes/index.tsx index 0938968cd..27c487849 100644 --- a/examples/ts-react-coding-agent/src/routes/index.tsx +++ b/examples/ts-react-coding-agent/src/routes/index.tsx @@ -1,12 +1,16 @@ import { useMemo, useState } from 'react' import { createFileRoute } from '@tanstack/react-router' import { fetchServerSentEvents, useChat } from '@tanstack/ai-react' -import { AGENTS, DEFAULT_AGENT } from '@/lib/agents' +import { AGENTS, AGENT_SETUP, DEFAULT_AGENT, isAgentId } from '@/lib/agents' +import { getAgentConfigFn } from '@/lib/agent-status' import type { UIMessage } from '@tanstack/ai-react' -import type { AgentMode } from '@/lib/agents' +import type { AgentId, AgentMode } from '@/lib/agents' export const Route = createFileRoute('/')({ component: CodingAgentPage, + // Env vars aren't available client-side, so the loader asks the server which + // agents are actually configured (see src/lib/agent-status.ts). + loader: () => getAgentConfigFn(), }) function ToolCallCard({ @@ -92,11 +96,87 @@ function Message({ message }: { message: UIMessage }) { ) } +function SetupDialog({ + agentId, + onClose, +}: { + agentId: AgentId + onClose: () => void +}) { + const setup = AGENT_SETUP[agentId] + return ( +
+
event.stopPropagation()} + role="dialog" + aria-modal="true" + aria-label={`${setup.label} setup`} + > +
+

Set up {setup.label}

+ +
+

{setup.summary}

+
    + {setup.steps.map((step, index) => ( +
  1. +
    + + {index + 1}. + +
    +

    {step.text}

    + {step.code && ( +
    +                      {step.code}
    +                    
    + )} +
    +
    +
  2. + ))} +
+
+ + Documentation ↗ + + +
+
+
+ ) +} + function CodingAgentPage() { - const [agentId, setAgentId] = useState(DEFAULT_AGENT) + const configured = Route.useLoaderData() + const [agentId, setAgentId] = useState(DEFAULT_AGENT) const [mode, setMode] = useState('read-only') const [sessionId, setSessionId] = useState(undefined) const [input, setInput] = useState('') + const [setupOpen, setSetupOpen] = useState(false) + + const isConfigured = configured[agentId] const body = useMemo( () => ({ agentId, mode, sessionId }), @@ -107,8 +187,10 @@ function CodingAgentPage() { connection: fetchServerSentEvents('/api/chat'), body, onCustomEvent: (eventType, data) => { + // Every harness adapter pins its session with a `.session-id` + // CUSTOM event (claude-code.session-id, codex.session-id, ...). if ( - eventType === 'claude-code.session-id' && + eventType.endsWith('.session-id') && typeof data === 'object' && data !== null && 'sessionId' in data && @@ -127,22 +209,37 @@ function CodingAgentPage() { const send = () => { const text = input.trim() if (!text || isLoading) return + // Don't fire a request the server can't fulfil — explain the setup instead. + if (!isConfigured) { + setSetupOpen(true) + return + } setInput('') void sendMessage(text) } + const selectAgent = (value: string) => { + if (!isAgentId(value)) return + // Sessions aren't portable across harnesses — switching agents starts fresh. + setAgentId(value) + setSessionId(undefined) + // Selecting is always allowed; if it isn't set up, show how to fix it. + if (!configured[value]) setSetupOpen(true) + } + return (

Coding Agent

@@ -162,10 +259,24 @@ function CodingAgentPage() {
+ {!isConfigured && ( +
+ + ⚠️ {AGENT_SETUP[agentId].label} isn’t configured on the server. + + +
+ )} +
{sessionId - ? `Resuming Claude Code session ${sessionId.slice(0, 8)}… — follow-ups send only your latest message.` - : 'No session yet — the first reply starts one and pins it via the claude-code.session-id event.'} + ? `Resuming session ${sessionId.slice(0, 8)}… — follow-ups send only your latest message.` + : `No session yet — the first reply starts one and pins it via the ${agentId}.session-id event.`}
@@ -204,6 +315,10 @@ function CodingAgentPage() { {isLoading ? 'Working…' : 'Send'} + + {setupOpen && ( + setSetupOpen(false)} /> + )}
) } diff --git a/packages/ai-codex/README.md b/packages/ai-codex/README.md new file mode 100644 index 000000000..21284d4a1 --- /dev/null +++ b/packages/ai-codex/README.md @@ -0,0 +1,18 @@ +# @tanstack/ai-codex + +Codex harness adapter for [TanStack AI](https://tanstack.com/ai) — run [OpenAI Codex](https://developers.openai.com/codex) (via `@openai/codex-sdk`) as a chat backend with local tool execution, stateful coding sessions, and TanStack tool bridging. + +```typescript +import { chat } from '@tanstack/ai' +import { codexText } from '@tanstack/ai-codex' + +const stream = chat({ + adapter: codexText('gpt-5.1-codex', { + cwd: '/path/to/project', + sandboxMode: 'workspace-write', + }), + messages: [{ role: 'user', content: 'Fix the failing test.' }], +}) +``` + +Server-only (Node). See the [Codex adapter docs](https://tanstack.com/ai/latest/docs/adapters/codex) for sessions, tool bridging, sandboxing, and limitations. diff --git a/packages/ai-codex/package.json b/packages/ai-codex/package.json new file mode 100644 index 000000000..39ca349b4 --- /dev/null +++ b/packages/ai-codex/package.json @@ -0,0 +1,59 @@ +{ + "name": "@tanstack/ai-codex", + "version": "0.1.0", + "description": "Codex harness adapter for TanStack AI — run OpenAI Codex as a chat backend with local tool execution and stateful sessions.", + "author": "", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/TanStack/ai.git", + "directory": "packages/ai-codex" + }, + "keywords": [ + "ai", + "ai-sdk", + "typescript", + "tanstack", + "openai", + "codex", + "harness", + "agent", + "adapter", + "chat", + "tool-calling" + ], + "type": "module", + "module": "./dist/esm/index.js", + "types": "./dist/esm/index.d.ts", + "exports": { + ".": { + "types": "./dist/esm/index.d.ts", + "import": "./dist/esm/index.js" + } + }, + "files": [ + "dist", + "src" + ], + "scripts": { + "build": "vite build", + "clean": "premove ./build ./dist", + "lint:fix": "eslint ./src --fix", + "test:build": "publint --strict", + "test:eslint": "eslint ./src", + "test:lib": "vitest", + "test:lib:dev": "pnpm test:lib --watch", + "test:types": "tsc" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.29.0", + "@openai/codex-sdk": "^0.139.0" + }, + "peerDependencies": { + "@tanstack/ai": "workspace:^" + }, + "devDependencies": { + "@tanstack/ai": "workspace:*", + "@vitest/coverage-v8": "4.0.14" + } +} diff --git a/packages/ai-codex/src/adapters/text.ts b/packages/ai-codex/src/adapters/text.ts new file mode 100644 index 000000000..9e27963ba --- /dev/null +++ b/packages/ai-codex/src/adapters/text.ts @@ -0,0 +1,366 @@ +import { Codex } from '@openai/codex-sdk' +import { EventType, normalizeSystemPrompts } from '@tanstack/ai' +import { toRunErrorRawEvent } from '@tanstack/ai/adapter-internals' +import { BaseTextAdapter } from '@tanstack/ai/adapters' +import { buildPrompt } from '../messages/prompt' +import { startToolBridge } from '../tools/bridge' +import { + BRIDGED_MCP_SERVER_NAME, + translateThreadEvents, +} from '../stream/translate' +import type { + StructuredOutputOptions, + StructuredOutputResult, +} from '@tanstack/ai/adapters' +import type { + AnyTool, + DefaultMessageMetadataByModality, + Modality, + StreamChunk, + TextOptions, +} from '@tanstack/ai' +import type { + ApprovalMode, + CodexOptions, + ModelReasoningEffort, + SandboxMode, + ThreadOptions, + WebSearchMode, +} from '@openai/codex-sdk' +import type { CodexModel } from '../model-meta' +import type { CodexTextProviderOptions } from '../provider-options' +import type { CodexThreadEvent, CodexUsage } from '../stream/sdk-types' + +type CodexConfigValue = NonNullable[string] + +export interface CodexTextConfig { + /** Working directory for the harness session. Defaults to `process.cwd()`. */ + cwd?: string + /** + * Codex sandbox mode. Defaults to the harness default (`read-only`); set + * `'workspace-write'` to let the harness edit files and run commands + * inside the working directory. + */ + sandboxMode?: SandboxMode + /** + * Codex approval policy. Headless runs have no approval UI, so this + * defaults to `'never'` — the sandbox mode is the safety boundary. + */ + approvalPolicy?: ApprovalMode + /** Model reasoning effort forwarded to the harness. */ + modelReasoningEffort?: ModelReasoningEffort + /** + * Whether to skip the harness's git-repo safety check. Defaults to `true`: + * a server adapter routinely points at scratch directories that aren't + * repositories, and the sandbox mode is the real safety boundary. + */ + skipGitRepoCheck?: boolean + /** Allow network access inside the `workspace-write` sandbox. */ + networkAccessEnabled?: boolean + /** Web search mode forwarded to the harness. */ + webSearchMode?: WebSearchMode + /** Extra writable directories beyond the working directory. */ + additionalDirectories?: Array + /** + * OpenAI API key for the harness subprocess (exported as `CODEX_API_KEY`). + * Falls back to the local `codex login` credentials when omitted. + */ + apiKey?: string + /** Override the Codex backend base URL. */ + baseUrl?: string + /** Path to a Codex executable (defaults to the SDK's bundled binary). */ + codexPathOverride?: string + /** + * Environment variables for the harness subprocess. When set, the + * subprocess does NOT inherit `process.env` (Codex SDK semantics). + */ + env?: Record + /** + * Extra `--config key=value` overrides passed to the Codex CLI, e.g. + * additional `mcp_servers` entries. Merged with (and overridden by) the + * adapter's own bridged-tools server config. + */ + config?: CodexOptions['config'] +} + +function validateTools(tools: Array | undefined): void { + if (!tools || tools.length === 0) return + const unsupported = tools.filter( + (tool) => typeof tool.execute !== 'function' || tool.needsApproval === true, + ) + if (unsupported.length > 0) { + throw new Error( + `Codex harness cannot execute client-side or approval-gated tools: ${unsupported + .map((tool) => tool.name) + .join( + ', ', + )}. Provide server execute() implementations without needsApproval, or run these tools outside the harness.`, + ) + } +} + +export class CodexTextAdapter< + TModel extends CodexModel, +> extends BaseTextAdapter< + TModel, + CodexTextProviderOptions, + ReadonlyArray & readonly ['text'], + DefaultMessageMetadataByModality, + ReadonlyArray, + unknown, + never +> { + readonly name = 'codex' as const + + private readonly adapterConfig: CodexTextConfig + + constructor(config: CodexTextConfig, model: TModel) { + super({}, model) + this.adapterConfig = config + } + + async *chatStream( + options: TextOptions, + ): AsyncIterable { + const { logger } = options + let bridge: Awaited> | undefined + try { + validateTools(options.tools) + + const modelOptions = options.modelOptions + const { prompt, resume } = buildPrompt( + options.messages, + modelOptions?.sessionId, + ) + + if (options.tools && options.tools.length > 0) { + bridge = await startToolBridge(options.tools) + } + + const codex = this.createCodex(bridge?.url) + const threadOptions = this.buildThreadOptions(options) + const thread = + resume !== undefined + ? codex.resumeThread(resume, threadOptions) + : codex.startThread(threadOptions) + + logger.request( + `activity=chat provider=codex model=${this.model} messages=${options.messages.length} tools=${options.tools?.length ?? 0} resume=${resume ?? 'none'}`, + { provider: 'codex', model: this.model }, + ) + + const signal = + options.abortController?.signal ?? options.request?.signal ?? undefined + const { events } = await thread.runStreamed( + this.applySystemPrompts(options, prompt), + signal !== undefined ? { signal } : {}, + ) + + yield* translateThreadEvents(events as AsyncIterable, { + model: this.model, + runId: options.runId ?? this.generateId(), + threadId: options.threadId ?? this.generateId(), + ...(options.parentRunId !== undefined && { + parentRunId: options.parentRunId, + }), + genId: () => this.generateId(), + onThreadEvent: (event) => + logger.provider(`provider=codex type=${event.type}`, { + chunk: event, + }), + }) + } catch (error: unknown) { + const err = error as Error & { code?: string } + const rawEvent = toRunErrorRawEvent(error) + logger.errors('codex.chatStream fatal', { + error, + source: 'codex.chatStream', + }) + yield { + type: EventType.RUN_ERROR, + model: options.model, + timestamp: Date.now(), + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + ...(rawEvent !== undefined && { rawEvent }), + error: { + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + }, + } + } finally { + await bridge?.close() + } + } + + /** + * Structured output via the harness's native `outputSchema` support: a + * fresh one-shot read-only thread whose final agent message is a JSON + * string conforming to the schema. + */ + async structuredOutput( + options: StructuredOutputOptions, + ): Promise> { + const { chatOptions, outputSchema } = options + const { logger } = chatOptions + + // Fresh one-shot run: deliberately no `resume`, so finalization never + // mutates the caller's interactive session. No bridge either — tools + // are a chat concern. + const { prompt } = buildPrompt(chatOptions.messages, undefined) + + const codex = this.createCodex(undefined) + const thread = codex.startThread({ + ...this.buildThreadOptions(chatOptions), + sandboxMode: 'read-only', + }) + + logger.request( + `activity=structured-output provider=codex model=${this.model}`, + { provider: 'codex', model: this.model }, + ) + + const signal = + chatOptions.abortController?.signal ?? + chatOptions.request?.signal ?? + undefined + const { events } = await thread.runStreamed( + this.applySystemPrompts(chatOptions, prompt), + { + outputSchema, + ...(signal !== undefined && { signal }), + }, + ) + + let rawText = '' + let usage: CodexUsage | undefined + for await (const event of events as AsyncIterable) { + logger.provider(`provider=codex type=${event.type}`, { chunk: event }) + if ( + event.type === 'item.completed' && + event.item.type === 'agent_message' + ) { + rawText = event.item.text + } else if (event.type === 'turn.completed') { + usage = event.usage + } else if (event.type === 'turn.failed') { + throw new Error(event.error?.message ?? 'Codex turn failed') + } else if (event.type === 'error') { + throw new Error(event.message) + } + } + + if (rawText === '') { + throw new Error( + 'Codex run ended without an agent message during structured output generation.', + ) + } + + const promptTokens = usage?.input_tokens ?? 0 + const completionTokens = usage?.output_tokens ?? 0 + return { + data: JSON.parse(rawText), + rawText, + usage: { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }, + } + } + + /** + * Codex threads have no system-prompt channel, so `systemPrompts` from + * `chat()` are prepended to the prompt text as an instruction preamble. + */ + private applySystemPrompts( + options: TextOptions, + prompt: string, + ): string { + const systemPrompts = normalizeSystemPrompts(options.systemPrompts) + .map((systemPrompt) => systemPrompt.content) + .filter((content) => content.trim() !== '') + if (systemPrompts.length === 0) return prompt + return `${systemPrompts.join('\n\n')}\n\n${prompt}` + } + + private createCodex(bridgeUrl: string | undefined): Codex { + const config = this.adapterConfig + const mergedConfig: CodexOptions['config'] = { + ...config.config, + ...(bridgeUrl !== undefined && { + mcp_servers: { + ...(config.config?.mcp_servers as + | Record + | undefined), + [BRIDGED_MCP_SERVER_NAME]: { url: bridgeUrl }, + }, + }), + } + return new Codex({ + ...(config.apiKey !== undefined && { apiKey: config.apiKey }), + ...(config.baseUrl !== undefined && { baseUrl: config.baseUrl }), + ...(config.codexPathOverride !== undefined && { + codexPathOverride: config.codexPathOverride, + }), + ...(config.env !== undefined && { env: config.env }), + ...(Object.keys(mergedConfig).length > 0 && { config: mergedConfig }), + }) + } + + private buildThreadOptions( + options: TextOptions, + ): ThreadOptions { + const config = this.adapterConfig + const modelOptions = options.modelOptions + + const sandboxMode = modelOptions?.sandboxMode ?? config.sandboxMode + const approvalPolicy = + modelOptions?.approvalPolicy ?? config.approvalPolicy ?? 'never' + const modelReasoningEffort = + modelOptions?.modelReasoningEffort ?? config.modelReasoningEffort + const workingDirectory = modelOptions?.workingDirectory ?? config.cwd + const skipGitRepoCheck = + modelOptions?.skipGitRepoCheck ?? config.skipGitRepoCheck ?? true + + return { + model: this.model, + approvalPolicy, + skipGitRepoCheck, + ...(sandboxMode !== undefined && { sandboxMode }), + ...(modelReasoningEffort !== undefined && { modelReasoningEffort }), + ...(workingDirectory !== undefined && { workingDirectory }), + ...(config.networkAccessEnabled !== undefined && { + networkAccessEnabled: config.networkAccessEnabled, + }), + ...(config.webSearchMode !== undefined && { + webSearchMode: config.webSearchMode, + }), + ...(config.additionalDirectories !== undefined && { + additionalDirectories: config.additionalDirectories, + }), + } + } +} + +/** + * Creates a Codex text adapter. + * + * Unlike HTTP provider adapters, this is a *harness* adapter: Codex runs its + * own agent loop and executes its own tools (shell commands, file edits, + * web search, ...) locally, server-side, inside its sandbox. Each `chat()` + * call runs one full harness turn; harness tool activity streams back as + * already-resolved tool-call events, and the thread id is surfaced via a + * CUSTOM `codex.session-id` event so follow-up calls can resume the session + * through `modelOptions.sessionId`. + * + * Note: Codex reports assistant text only as completed messages — there are + * no token-level text deltas, so text arrives message-at-a-time while tool + * activity still streams live. + */ +export function codexText( + model: TModel, + config: CodexTextConfig = {}, +): CodexTextAdapter { + return new CodexTextAdapter(config, model) +} diff --git a/packages/ai-codex/src/index.ts b/packages/ai-codex/src/index.ts new file mode 100644 index 000000000..2bfdf23d3 --- /dev/null +++ b/packages/ai-codex/src/index.ts @@ -0,0 +1,21 @@ +export { CodexTextAdapter, codexText } from './adapters/text' +export type { CodexTextConfig } from './adapters/text' +export type { CodexTextProviderOptions } from './provider-options' +export { CODEX_MODELS } from './model-meta' +export type { CodexModel, KnownCodexModel } from './model-meta' +export { + SESSION_ID_EVENT, + BRIDGED_MCP_SERVER_NAME, + translateThreadEvents, + toolNameForItem, +} from './stream/translate' +export type { TranslateContext } from './stream/translate' +export type { + CodexThreadEvent, + CodexThreadItem, + CodexUsage, +} from './stream/sdk-types' +export { buildPrompt } from './messages/prompt' +export type { BuiltPrompt } from './messages/prompt' +export { startToolBridge } from './tools/bridge' +export type { ToolBridgeHandle } from './tools/bridge' diff --git a/packages/ai-codex/src/messages/prompt.ts b/packages/ai-codex/src/messages/prompt.ts new file mode 100644 index 000000000..73b9ac549 --- /dev/null +++ b/packages/ai-codex/src/messages/prompt.ts @@ -0,0 +1,67 @@ +import type { ModelMessage } from '@tanstack/ai' + +export interface BuiltPrompt { + prompt: string + /** Codex thread id to resume, when the caller threaded one through. */ + resume?: string +} + +function extractText(content: ModelMessage['content']): string { + if (content === null) return '' + if (typeof content === 'string') return content + return content + .map((part) => + part.type === 'text' && typeof part.content === 'string' + ? part.content + : '', + ) + .join('') +} + +/** + * Convert TanStack chat history into the Codex SDK's prompt + resume inputs. + * + * With a `sessionId`, the harness already holds the conversation context, so + * only the trailing user message is sent and the thread is resumed. Without + * one, prior turns are flattened into a plain-text transcript preamble (tool + * messages and tool-call-only assistant turns are harness-internal noise and + * are skipped; prompts are text-only in v1). + */ +export function buildPrompt( + messages: Array, + sessionId: string | undefined, +): BuiltPrompt { + const lastMessage = messages.at(-1) + const lastUserText = + lastMessage?.role === 'user' ? extractText(lastMessage.content).trim() : '' + + if (!lastUserText) { + throw new Error( + 'Codex adapter requires a trailing user message with text content.', + ) + } + + if (sessionId !== undefined) { + return { prompt: lastUserText, resume: sessionId } + } + + const priorTurns = messages + .slice(0, -1) + .filter( + (message) => + (message.role === 'user' || message.role === 'assistant') && + extractText(message.content).trim() !== '', + ) + .map( + (message) => + `${message.role === 'user' ? 'User' : 'Assistant'}: ${extractText(message.content).trim()}`, + ) + + if (priorTurns.length === 0) { + return { prompt: lastUserText } + } + + return { + prompt: `Previous conversation:\n${priorTurns.join('\n')}\n\n${lastUserText}`, + } +} diff --git a/packages/ai-codex/src/model-meta.ts b/packages/ai-codex/src/model-meta.ts new file mode 100644 index 000000000..dbf2af0fe --- /dev/null +++ b/packages/ai-codex/src/model-meta.ts @@ -0,0 +1,17 @@ +/** + * Models known to work with Codex. The harness accepts any OpenAI model id + * its backend supports, so this list exists for autocomplete — any string is + * accepted via the `(string & {})` escape hatch in {@link CodexModel}. + */ +export const CODEX_MODELS = [ + 'gpt-5.3-codex', + 'gpt-5.2-codex', + 'gpt-5.1-codex', + 'gpt-5.1-codex-mini', + 'gpt-5.1', +] as const + +export type KnownCodexModel = (typeof CODEX_MODELS)[number] + +/** Any model id accepted by Codex; known ids get autocomplete. */ +export type CodexModel = KnownCodexModel | (string & {}) diff --git a/packages/ai-codex/src/provider-options.ts b/packages/ai-codex/src/provider-options.ts new file mode 100644 index 000000000..819702cf4 --- /dev/null +++ b/packages/ai-codex/src/provider-options.ts @@ -0,0 +1,29 @@ +import type { + ApprovalMode, + ModelReasoningEffort, + SandboxMode, +} from '@openai/codex-sdk' + +/** + * Per-call provider options for the Codex adapter, passed via `modelOptions` + * on `chat()`. + */ +export interface CodexTextProviderOptions { + /** + * Resume an existing Codex thread. The adapter emits the thread id of + * every fresh run via a CUSTOM `codex.session-id` stream event; thread it + * back here to continue that session (only the latest user message is + * sent — the harness already holds the prior context). + */ + sessionId?: string + /** Per-call override of the configured sandbox mode. */ + sandboxMode?: SandboxMode + /** Per-call override of the configured approval policy. */ + approvalPolicy?: ApprovalMode + /** Per-call override of the model reasoning effort. */ + modelReasoningEffort?: ModelReasoningEffort + /** Per-call override of the harness working directory. */ + workingDirectory?: string + /** Per-call override of the git-repo safety check (defaults to skipping). */ + skipGitRepoCheck?: boolean +} diff --git a/packages/ai-codex/src/stream/sdk-types.ts b/packages/ai-codex/src/stream/sdk-types.ts new file mode 100644 index 000000000..1e7d36726 --- /dev/null +++ b/packages/ai-codex/src/stream/sdk-types.ts @@ -0,0 +1,66 @@ +/** + * Structural subset of the `@openai/codex-sdk` event types that the stream + * translator consumes. + * + * These are intentionally defined structurally (rather than imported from the + * Codex SDK) so the translator stays a pure, fixture-testable state machine + * and the package's public types don't depend on the SDK's type exports. + * Unknown item or event types fall through every branch at runtime. + */ + +export interface CodexUsage { + input_tokens?: number + cached_input_tokens?: number + output_tokens?: number + reasoning_output_tokens?: number +} + +export interface CodexMcpToolCallResult { + content?: Array<{ type: string; text?: string; [key: string]: unknown }> + structured_content?: unknown +} + +export type CodexThreadItem = + | { id: string; type: 'agent_message'; text: string } + | { id: string; type: 'reasoning'; text: string } + | { + id: string + type: 'command_execution' + command: string + aggregated_output?: string + exit_code?: number + status: string + } + | { + id: string + type: 'file_change' + changes: Array<{ path: string; kind: string }> + status: string + } + | { + id: string + type: 'mcp_tool_call' + server: string + tool: string + arguments?: unknown + result?: CodexMcpToolCallResult + error?: { message: string } + status: string + } + | { id: string; type: 'web_search'; query: string } + | { + id: string + type: 'todo_list' + items: Array<{ text: string; completed: boolean }> + } + | { id: string; type: 'error'; message: string } + +export type CodexThreadEvent = + | { type: 'thread.started'; thread_id: string } + | { type: 'turn.started' } + | { type: 'turn.completed'; usage?: CodexUsage } + | { type: 'turn.failed'; error?: { message?: string } } + | { type: 'item.started'; item: CodexThreadItem } + | { type: 'item.updated'; item: CodexThreadItem } + | { type: 'item.completed'; item: CodexThreadItem } + | { type: 'error'; message: string } diff --git a/packages/ai-codex/src/stream/translate.ts b/packages/ai-codex/src/stream/translate.ts new file mode 100644 index 000000000..082e26ba3 --- /dev/null +++ b/packages/ai-codex/src/stream/translate.ts @@ -0,0 +1,381 @@ +import { EventType, buildBaseUsage } from '@tanstack/ai' +import type { StreamChunk, TokenUsage } from '@tanstack/ai' +import type { CodexThreadEvent, CodexThreadItem, CodexUsage } from './sdk-types' + +/** Name of the CUSTOM event carrying the Codex thread (session) id. */ +export const SESSION_ID_EVENT = 'codex.session-id' + +/** Server name used for bridged TanStack tools. */ +export const BRIDGED_MCP_SERVER_NAME = 'tanstack' + +export interface TranslateContext { + model: string + runId: string + threadId: string + parentRunId?: string + genId: () => string + /** Called as soon as the harness reports its thread id. */ + onSessionId?: (sessionId: string) => void + /** Called for each raw SDK thread event, for logging. */ + onThreadEvent?: (event: CodexThreadEvent) => void +} + +/** + * Resolve the AG-UI tool-call name for a Codex thread item. Bridged TanStack + * tools come back as `mcp_tool_call` items on the `tanstack` server and are + * surfaced under the names the application registered; foreign MCP tools are + * namespaced `mcp____`; harness-native items use their item + * type verbatim (`command_execution`, `file_change`, ...). + */ +export function toolNameForItem(item: CodexThreadItem): string { + if (item.type === 'mcp_tool_call') { + return item.server === BRIDGED_MCP_SERVER_NAME + ? item.tool + : `mcp__${item.server}__${item.tool}` + } + return item.type +} + +/** Thread items the translator surfaces as already-resolved tool calls. */ +type CodexToolItem = Extract< + CodexThreadItem, + { + type: + | 'command_execution' + | 'mcp_tool_call' + | 'file_change' + | 'web_search' + | 'todo_list' + } +> + +function toolArgsForItem(item: CodexToolItem): unknown { + switch (item.type) { + case 'command_execution': + return { command: item.command } + case 'mcp_tool_call': + return item.arguments ?? {} + case 'file_change': + return { changes: item.changes } + case 'web_search': + return { query: item.query } + case 'todo_list': + return {} + } +} + +function toolResultForItem(item: CodexToolItem): { + content: string + isError: boolean +} { + switch (item.type) { + case 'command_execution': + return { + content: JSON.stringify({ + aggregated_output: item.aggregated_output ?? '', + ...(item.exit_code !== undefined && { exit_code: item.exit_code }), + status: item.status, + }), + isError: item.status === 'failed', + } + case 'mcp_tool_call': { + if (item.error) { + return { content: item.error.message, isError: true } + } + const text = (item.result?.content ?? []) + .map((block) => (typeof block.text === 'string' ? block.text : '')) + .join('') + if (text !== '') { + return { content: text, isError: item.status === 'failed' } + } + if (item.result?.structured_content !== undefined) { + return { + content: JSON.stringify(item.result.structured_content), + isError: item.status === 'failed', + } + } + return { + content: JSON.stringify({ status: item.status }), + isError: item.status === 'failed', + } + } + case 'file_change': + return { + content: JSON.stringify({ changes: item.changes, status: item.status }), + isError: item.status === 'failed', + } + case 'web_search': + return { + content: JSON.stringify({ status: 'completed' }), + isError: false, + } + case 'todo_list': + return { content: JSON.stringify({ items: item.items }), isError: false } + } +} + +function isToolItem(item: CodexThreadItem): item is CodexToolItem { + return ( + item.type === 'command_execution' || + item.type === 'mcp_tool_call' || + item.type === 'file_change' || + item.type === 'web_search' || + item.type === 'todo_list' + ) +} + +function buildUsage(usage: CodexUsage | undefined): TokenUsage | undefined { + if (!usage) return undefined + const promptTokens = usage.input_tokens ?? 0 + const completionTokens = usage.output_tokens ?? 0 + const result = buildBaseUsage({ + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }) + if (usage.cached_input_tokens) { + result.promptTokensDetails = { cachedTokens: usage.cached_input_tokens } + } + if (usage.reasoning_output_tokens) { + result.completionTokensDetails = { + reasoningTokens: usage.reasoning_output_tokens, + } + } + return result +} + +/** + * Translate a Codex SDK thread-event stream into AG-UI StreamChunk events. + * + * The harness runs its own agent loop and executes its own tools, so the + * translation always ends with `finishReason: 'stop'` (or RUN_ERROR) — never + * `'tool_calls'`. Harness tool activity (commands, file changes, MCP calls, + * web searches, todo lists) is emitted as already-resolved + * TOOL_CALL_START/ARGS/END + TOOL_CALL_RESULT sequences so UIs can render it + * while the TanStack engine never tries to execute them. + * + * Codex reports assistant text and reasoning only as completed items (no + * token-level deltas), so each `agent_message` / `reasoning` item becomes a + * single START/CONTENT/END burst. + * + * Invariant: every TOOL_CALL_START is eventually paired with a + * TOOL_CALL_RESULT (synthesized as `{"status":"interrupted"}` when the run + * ends or aborts before the harness reported one) so the engine's + * pending-tool-call scan on the next request never force-executes them. + */ +export async function* translateThreadEvents( + events: AsyncIterable, + ctx: TranslateContext, +): AsyncIterable { + const { model, runId, threadId, genId } = ctx + const now = () => Date.now() + + let runStarted = false + /** Tool calls started but with no result yet. */ + const unresolvedToolCalls = new Set() + /** Item ids that already emitted TOOL_CALL_START/ARGS/END. */ + const openedToolItems = new Set() + + function* startRun(): Generator { + if (runStarted) return + runStarted = true + yield { + type: EventType.RUN_STARTED, + runId, + threadId, + model, + timestamp: now(), + ...(ctx.parentRunId !== undefined && { parentRunId: ctx.parentRunId }), + } + } + + function* synthesizeUnresolvedResults(): Generator { + for (const toolCallId of unresolvedToolCalls) { + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId, + messageId: genId(), + model, + timestamp: now(), + content: JSON.stringify({ status: 'interrupted' }), + } + } + unresolvedToolCalls.clear() + } + + function* openToolCall(item: CodexToolItem): Generator { + if (openedToolItems.has(item.id)) return + openedToolItems.add(item.id) + const toolCallName = toolNameForItem(item) + const input = toolArgsForItem(item) + const args = JSON.stringify(input) + yield { + type: EventType.TOOL_CALL_START, + toolCallId: item.id, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + } + yield { + type: EventType.TOOL_CALL_ARGS, + toolCallId: item.id, + model, + timestamp: now(), + delta: args, + args, + } + yield { + type: EventType.TOOL_CALL_END, + toolCallId: item.id, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + input, + } + unresolvedToolCalls.add(item.id) + } + + function* handleItemCompleted(item: CodexThreadItem): Generator { + if (item.type === 'agent_message') { + const messageId = item.id + yield { + type: EventType.TEXT_MESSAGE_START, + messageId, + model, + timestamp: now(), + role: 'assistant', + } + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId, + model, + timestamp: now(), + delta: item.text, + content: item.text, + } + yield { + type: EventType.TEXT_MESSAGE_END, + messageId, + model, + timestamp: now(), + } + } else if (item.type === 'reasoning') { + const reasoningId = item.id + yield { + type: EventType.REASONING_START, + messageId: reasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_START, + messageId: reasoningId, + role: 'reasoning' as const, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_CONTENT, + messageId: reasoningId, + delta: item.text, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_END, + messageId: reasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_END, + messageId: reasoningId, + model, + timestamp: now(), + } + } else if (isToolItem(item)) { + yield* openToolCall(item) + unresolvedToolCalls.delete(item.id) + const { content, isError } = toolResultForItem(item) + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId: item.id, + messageId: genId(), + model, + timestamp: now(), + content, + ...(isError && { state: 'output-error' as const }), + } + } + // `error` items are non-fatal diagnostics; `turn.failed` is the fatal + // signal. They are surfaced via onThreadEvent logging only. + } + + try { + for await (const event of events) { + ctx.onThreadEvent?.(event) + + if (event.type === 'thread.started') { + yield* startRun() + ctx.onSessionId?.(event.thread_id) + yield { + type: EventType.CUSTOM, + model, + timestamp: now(), + name: SESSION_ID_EVENT, + value: { sessionId: event.thread_id }, + } + continue + } + + // Resumed threads don't re-emit thread.started; anything else still + // needs RUN_STARTED first. + yield* startRun() + + if (event.type === 'item.started') { + if (isToolItem(event.item)) { + yield* openToolCall(event.item) + } + } else if (event.type === 'item.completed') { + yield* handleItemCompleted(event.item) + } else if (event.type === 'turn.completed') { + yield* synthesizeUnresolvedResults() + const usage = buildUsage(event.usage) + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + timestamp: now(), + finishReason: 'stop', + ...(usage !== undefined && { usage }), + } + } else if (event.type === 'turn.failed' || event.type === 'error') { + yield* synthesizeUnresolvedResults() + const message = + event.type === 'turn.failed' + ? (event.error?.message ?? 'Codex turn failed') + : event.message + yield { + type: EventType.RUN_ERROR, + model, + timestamp: now(), + message, + error: { message }, + } + } + // turn.started and item.updated carry no state the chunk stream needs: + // long-running items resolve via item.completed, and intermediate + // updates (e.g. streaming command output) are intentionally dropped. + } + } catch (error) { + // The run is dying (abort or SDK failure). Pair any started tool calls + // with a synthetic result first so the next request's pending-tool-call + // scan doesn't try to execute them, then let the adapter surface the + // error as RUN_ERROR. + yield* synthesizeUnresolvedResults() + throw error + } +} diff --git a/packages/ai-codex/src/tools/bridge.ts b/packages/ai-codex/src/tools/bridge.ts new file mode 100644 index 000000000..daafcc47d --- /dev/null +++ b/packages/ai-codex/src/tools/bridge.ts @@ -0,0 +1,130 @@ +import { createServer } from 'node:http' +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js' +import { + CallToolRequestSchema, + ListToolsRequestSchema, +} from '@modelcontextprotocol/sdk/types.js' +import { BRIDGED_MCP_SERVER_NAME } from '../stream/translate' +import type { AddressInfo } from 'node:net' +import type { AnyTool } from '@tanstack/ai' + +/** A running localhost MCP server exposing TanStack tools to the harness. */ +export interface ToolBridgeHandle { + /** Streamable-HTTP MCP endpoint, e.g. `http://127.0.0.1:54321/mcp`. */ + url: string + /** Stop the HTTP server and drop any open connections. */ + close: () => Promise +} + +function createMcpServer(tools: Array): McpServer { + const instance = new McpServer( + { name: BRIDGED_MCP_SERVER_NAME, version: '1.0.0' }, + { capabilities: { tools: {} } }, + ) + + const toolsByName = new Map(tools.map((tool) => [tool.name, tool])) + + instance.server.setRequestHandler(ListToolsRequestSchema, () => ({ + tools: tools.map((tool) => ({ + name: tool.name, + description: tool.description, + inputSchema: (tool.inputSchema ?? { + type: 'object', + properties: {}, + }) as { type: 'object'; [key: string]: unknown }, + })), + })) + + instance.server.setRequestHandler(CallToolRequestSchema, async (request) => { + const tool = toolsByName.get(request.params.name) + if (!tool?.execute) { + throw new Error(`Unknown tool: ${request.params.name}`) + } + try { + const result: unknown = await tool.execute(request.params.arguments ?? {}) + const text = typeof result === 'string' ? result : JSON.stringify(result) + return { content: [{ type: 'text', text }] } + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + return { + isError: true, + content: [{ type: 'text', text: `Tool execution failed: ${message}` }], + } + } + }) + + return instance +} + +/** + * Expose TanStack tools to the Codex harness as a Streamable-HTTP MCP server + * on an ephemeral localhost port. + * + * Codex runs as a separate subprocess, so unlike the Claude Code adapter + * there is no in-process MCP option — the bridge listens on `127.0.0.1` and + * the adapter points the harness at it via a `mcp_servers.tanstack.url` + * config override. Each request is handled statelessly with a fresh + * `McpServer` + transport pair, which is all the harness's list/call traffic + * needs. + * + * The engine has already converted each tool's schema to JSON Schema before + * the adapter sees it, and JSON Schema is exactly what MCP's `tools/list` + * wants — so the low-level request handlers pass schemas through verbatim + * instead of round-tripping them through zod. + * + * The caller owns the lifecycle: `close()` must run when the chat stream + * ends (the adapter does this in a `finally`) so the port is never leaked. + */ +export async function startToolBridge( + tools: Array, +): Promise { + const httpServer = createServer((req, res) => { + void (async () => { + if (req.method !== 'POST') { + res.writeHead(405).end() + return + } + const chunks: Array = [] + for await (const chunk of req) { + chunks.push(chunk as Buffer) + } + let parsedBody: unknown + try { + parsedBody = JSON.parse(Buffer.concat(chunks).toString('utf8')) + } catch { + res.writeHead(400).end() + return + } + const mcpServer = createMcpServer(tools) + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: undefined, + }) + res.on('close', () => { + void transport.close() + void mcpServer.close() + }) + await mcpServer.connect(transport) + await transport.handleRequest(req, res, parsedBody) + })().catch(() => { + if (!res.headersSent) res.writeHead(500) + res.end() + }) + }) + + await new Promise((resolve, reject) => { + httpServer.once('error', reject) + httpServer.listen(0, '127.0.0.1', resolve) + }) + + const { port } = httpServer.address() as AddressInfo + + return { + url: `http://127.0.0.1:${port}/mcp`, + close: () => + new Promise((resolve, reject) => { + httpServer.closeAllConnections() + httpServer.close((error) => (error ? reject(error) : resolve())) + }), + } +} diff --git a/packages/ai-codex/tests/bridge.test.ts b/packages/ai-codex/tests/bridge.test.ts new file mode 100644 index 000000000..48acf57aa --- /dev/null +++ b/packages/ai-codex/tests/bridge.test.ts @@ -0,0 +1,108 @@ +import { describe, expect, it } from 'vitest' +import { Client } from '@modelcontextprotocol/sdk/client/index.js' +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js' +import { startToolBridge } from '../src/tools/bridge' +import type { AnyTool } from '@tanstack/ai' + +function makeTool(overrides: Partial = {}): AnyTool { + return { + name: 'echo', + description: 'Echo the input back', + inputSchema: { + type: 'object', + properties: { value: { type: 'string' } }, + }, + execute: async (args: unknown) => args, + ...overrides, + } as unknown as AnyTool +} + +async function connectClient(url: string): Promise { + const client = new Client({ name: 'test-client', version: '1.0.0' }) + await client.connect(new StreamableHTTPClientTransport(new URL(url))) + return client +} + +describe('startToolBridge', () => { + it('listens on an ephemeral localhost port', async () => { + const bridge = await startToolBridge([makeTool()]) + try { + expect(bridge.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+\/mcp$/) + } finally { + await bridge.close() + } + }) + + it('lists tools with their JSON schemas passed through verbatim', async () => { + const bridge = await startToolBridge([makeTool()]) + try { + const client = await connectClient(bridge.url) + const { tools } = await client.listTools() + expect(tools).toHaveLength(1) + expect(tools[0]).toMatchObject({ + name: 'echo', + description: 'Echo the input back', + inputSchema: { + type: 'object', + properties: { value: { type: 'string' } }, + }, + }) + await client.close() + } finally { + await bridge.close() + } + }) + + it('executes tool calls and returns stringified results', async () => { + const bridge = await startToolBridge([ + makeTool({ + execute: async (args: unknown) => ({ + echoed: (args as { value: string }).value, + }), + } as Partial), + ]) + try { + const client = await connectClient(bridge.url) + const result = await client.callTool({ + name: 'echo', + arguments: { value: 'hi' }, + }) + expect(result.content).toEqual([ + { type: 'text', text: JSON.stringify({ echoed: 'hi' }) }, + ]) + await client.close() + } finally { + await bridge.close() + } + }) + + it('returns isError content when the tool throws', async () => { + const bridge = await startToolBridge([ + makeTool({ + execute: async () => { + throw new Error('tool blew up') + }, + } as Partial), + ]) + try { + const client = await connectClient(bridge.url) + const result = await client.callTool({ + name: 'echo', + arguments: {}, + }) + expect(result.isError).toBe(true) + expect(result.content).toEqual([ + { type: 'text', text: 'Tool execution failed: tool blew up' }, + ]) + await client.close() + } finally { + await bridge.close() + } + }) + + it('refuses connections after close()', async () => { + const bridge = await startToolBridge([makeTool()]) + await bridge.close() + await expect(connectClient(bridge.url)).rejects.toThrow() + }) +}) diff --git a/packages/ai-codex/tests/prompt.test.ts b/packages/ai-codex/tests/prompt.test.ts new file mode 100644 index 000000000..6e8dfcdf3 --- /dev/null +++ b/packages/ai-codex/tests/prompt.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from 'vitest' +import { buildPrompt } from '../src/messages/prompt' +import type { ModelMessage } from '@tanstack/ai' + +const user = (content: ModelMessage['content']): ModelMessage => ({ + role: 'user', + content, +}) +const assistant = (content: ModelMessage['content']): ModelMessage => ({ + role: 'assistant', + content, +}) + +describe('buildPrompt', () => { + it('resumes with only the last user message when sessionId is provided', () => { + const result = buildPrompt( + [ + user('first question'), + assistant('first answer'), + user('follow-up question'), + ], + 'sess-1', + ) + expect(result).toEqual({ + prompt: 'follow-up question', + resume: 'sess-1', + }) + }) + + it('throws when sessionId is provided but there is no trailing user message', () => { + expect(() => buildPrompt([user('q'), assistant('a')], 'sess-1')).toThrow( + /user message/i, + ) + }) + + it('sends a single user message as-is for a fresh session', () => { + expect(buildPrompt([user('hello')], undefined)).toEqual({ + prompt: 'hello', + }) + }) + + it('flattens prior turns into a transcript preamble for fresh multi-turn history', () => { + const { prompt, resume } = buildPrompt( + [user('What is 2+2?'), assistant('4'), user('And times 3?')], + undefined, + ) + expect(resume).toBeUndefined() + expect(prompt).toBe( + 'Previous conversation:\nUser: What is 2+2?\nAssistant: 4\n\nAnd times 3?', + ) + }) + + it('skips tool messages and assistant tool-call-only turns when flattening', () => { + const messages: Array = [ + user('list files'), + { + role: 'assistant', + content: null, + toolCalls: [ + { + id: 't1', + type: 'function', + function: { name: 'ls', arguments: '{}' }, + }, + ], + } as unknown as ModelMessage, + { role: 'tool', content: 'file-a', toolCallId: 't1' }, + assistant('There is one file.'), + user('thanks, which one?'), + ] + const { prompt } = buildPrompt(messages, undefined) + expect(prompt).toBe( + 'Previous conversation:\nUser: list files\nAssistant: There is one file.\n\nthanks, which one?', + ) + }) + + it('extracts text from content-part arrays and ignores non-text parts', () => { + const { prompt } = buildPrompt( + [ + user([ + { type: 'text', content: 'describe ' }, + { + type: 'image', + source: { type: 'url', url: 'https://x/y.png' }, + } as never, + { type: 'text', content: 'this' }, + ] as ModelMessage['content']), + ], + undefined, + ) + expect(prompt).toBe('describe this') + }) + + it('throws when there is no usable user content at all', () => { + expect(() => buildPrompt([], undefined)).toThrow(/user message/i) + }) +}) diff --git a/packages/ai-codex/tests/text-adapter.test.ts b/packages/ai-codex/tests/text-adapter.test.ts new file mode 100644 index 000000000..e43031316 --- /dev/null +++ b/packages/ai-codex/tests/text-adapter.test.ts @@ -0,0 +1,430 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { Codex } from '@openai/codex-sdk' +import { codexText } from '../src/adapters/text' +import type { CodexThreadEvent } from '../src/stream/sdk-types' +import type { InternalLogger } from '@tanstack/ai/adapter-internals' +import type { StreamChunk, TextOptions } from '@tanstack/ai' + +vi.mock('@openai/codex-sdk', () => ({ + Codex: vi.fn(), +})) + +const codexMock = vi.mocked(Codex) + +const startThreadMock = vi.fn() +const resumeThreadMock = vi.fn() +const runStreamedMock = vi.fn() + +const textTurn: Array = [ + { type: 'thread.started', thread_id: 'sess-1' }, + { type: 'turn.started' }, + { + type: 'item.completed', + item: { id: 'item-1', type: 'agent_message', text: 'hi there' }, + }, + { + type: 'turn.completed', + usage: { + input_tokens: 10, + cached_input_tokens: 0, + output_tokens: 5, + reasoning_output_tokens: 0, + }, + }, +] + +function mockRunReturning(events: Array) { + runStreamedMock.mockImplementation(() => { + async function* generate() { + for (const event of events) yield event + } + return Promise.resolve({ events: generate() }) + }) +} + +const noopLogger = { + request: vi.fn(), + provider: vi.fn(), + output: vi.fn(), + errors: vi.fn(), + middleware: vi.fn(), + tools: vi.fn(), + agentLoop: vi.fn(), + config: vi.fn(), + isEnabled: () => false, +} as unknown as InternalLogger + +function makeOptions( + overrides: Partial>> = {}, +): TextOptions> { + return { + model: 'gpt-5.1-codex', + messages: [{ role: 'user', content: 'hello' }], + logger: noopLogger, + ...overrides, + } as TextOptions> +} + +async function collect( + stream: AsyncIterable, +): Promise> { + const chunks: Array = [] + for await (const chunk of stream) chunks.push(chunk) + return chunks +} + +beforeEach(() => { + codexMock.mockReset() + startThreadMock.mockReset() + resumeThreadMock.mockReset() + runStreamedMock.mockReset() + const thread = { runStreamed: runStreamedMock } + startThreadMock.mockReturnValue(thread) + resumeThreadMock.mockReturnValue(thread) + codexMock.mockImplementation(function (this: unknown) { + return { + startThread: startThreadMock, + resumeThread: resumeThreadMock, + } as unknown as Codex + }) +}) + +describe('codexText', () => { + it('creates an adapter with the codex provider name', () => { + const adapter = codexText('gpt-5.1-codex') + expect(adapter.kind).toBe('text') + expect(adapter.name).toBe('codex') + expect(adapter.model).toBe('gpt-5.1-codex') + }) +}) + +describe('chatStream', () => { + it('streams translated AG-UI events for a simple turn', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + const chunks = await collect(adapter.chatStream(makeOptions())) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks.at(-1)).toMatchObject({ finishReason: 'stop' }) + }) + + it('starts a fresh thread without a sessionId', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + await collect(adapter.chatStream(makeOptions())) + expect(startThreadMock).toHaveBeenCalledTimes(1) + expect(resumeThreadMock).not.toHaveBeenCalled() + expect(runStreamedMock.mock.calls[0]![0]).toBe('hello') + }) + + it('resumes the thread and sends only the trailing user message', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + await collect( + adapter.chatStream( + makeOptions({ + messages: [ + { role: 'user', content: 'first' }, + { role: 'assistant', content: 'answer' }, + { role: 'user', content: 'follow-up' }, + ], + modelOptions: { sessionId: 'sess-prior' }, + }), + ), + ) + expect(resumeThreadMock).toHaveBeenCalledTimes(1) + expect(resumeThreadMock.mock.calls[0]![0]).toBe('sess-prior') + expect(runStreamedMock.mock.calls[0]![0]).toBe('follow-up') + }) + + it('flattens prior turns into the prompt without a sessionId', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + await collect( + adapter.chatStream( + makeOptions({ + messages: [ + { role: 'user', content: 'first' }, + { role: 'assistant', content: 'answer' }, + { role: 'user', content: 'follow-up' }, + ], + }), + ), + ) + expect(runStreamedMock.mock.calls[0]![0]).toBe( + 'Previous conversation:\nUser: first\nAssistant: answer\n\nfollow-up', + ) + }) + + it('builds thread options from config with safe defaults', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex', { + cwd: '/workspace', + sandboxMode: 'workspace-write', + }) + await collect(adapter.chatStream(makeOptions())) + expect(startThreadMock.mock.calls[0]![0]).toMatchObject({ + model: 'gpt-5.1-codex', + sandboxMode: 'workspace-write', + workingDirectory: '/workspace', + approvalPolicy: 'never', + skipGitRepoCheck: true, + }) + }) + + it('lets modelOptions override config thread options', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex', { + cwd: '/workspace', + sandboxMode: 'workspace-write', + }) + await collect( + adapter.chatStream( + makeOptions({ + modelOptions: { + sandboxMode: 'read-only', + workingDirectory: '/elsewhere', + }, + }), + ), + ) + expect(startThreadMock.mock.calls[0]![0]).toMatchObject({ + sandboxMode: 'read-only', + workingDirectory: '/elsewhere', + }) + }) + + it('prepends system prompts to the prompt text', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + await collect( + adapter.chatStream( + makeOptions({ systemPrompts: ['Be terse.', 'Use tabs.'] }), + ), + ) + expect(runStreamedMock.mock.calls[0]![0]).toBe( + 'Be terse.\n\nUse tabs.\n\nhello', + ) + }) + + it('starts a localhost MCP bridge and points codex at it when tools are passed', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'lookup_user', + description: 'Look up a user', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ ok: true }), + } as never, + ], + }), + ), + ) + expect(codexMock).toHaveBeenCalledTimes(1) + const codexOptions = codexMock.mock.calls[0]![0]! + const servers = codexOptions.config!.mcp_servers as Record< + string, + { url: string } + > + expect(servers.tanstack!.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+\/mcp$/) + }) + + it('does not configure the bridge when no tools are passed', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + await collect(adapter.chatStream(makeOptions())) + const codexOptions = codexMock.mock.calls[0]![0] ?? {} + expect(codexOptions.config).toBeUndefined() + }) + + it('emits RUN_ERROR for client-side tools (no execute)', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + const chunks = await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'client_only', + description: 'runs in browser', + inputSchema: { type: 'object', properties: {} }, + } as never, + ], + }), + ), + ) + expect(runStreamedMock).not.toHaveBeenCalled() + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + expect((chunks.at(-1) as { message: string }).message).toMatch( + /client-side/i, + ) + }) + + it('emits RUN_ERROR for approval-gated tools', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + const chunks = await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'needs_ok', + description: 'requires approval', + inputSchema: { type: 'object', properties: {} }, + execute: async () => 'x', + needsApproval: true, + } as never, + ], + }), + ), + ) + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + }) + + it('passes the abort signal through to runStreamed', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex') + const controller = new AbortController() + await collect( + adapter.chatStream(makeOptions({ abortController: controller })), + ) + expect(runStreamedMock.mock.calls[0]![1]).toMatchObject({ + signal: controller.signal, + }) + }) + + it('forwards apiKey and env to the Codex constructor', async () => { + mockRunReturning(textTurn) + const adapter = codexText('gpt-5.1-codex', { + apiKey: 'sk-test', + env: { PATH: '/usr/bin' }, + }) + await collect(adapter.chatStream(makeOptions())) + expect(codexMock.mock.calls[0]![0]).toMatchObject({ + apiKey: 'sk-test', + env: { PATH: '/usr/bin' }, + }) + }) + + it('emits RUN_ERROR when the SDK throws', async () => { + runStreamedMock.mockImplementation(() => { + throw new Error('spawn failed') + }) + const adapter = codexText('gpt-5.1-codex') + const chunks = await collect(adapter.chatStream(makeOptions())) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'spawn failed', + }) + }) +}) + +describe('structuredOutput', () => { + it('passes the schema as outputSchema and parses the final agent message', async () => { + mockRunReturning([ + { type: 'thread.started', thread_id: 'sess-so' }, + { + type: 'item.completed', + item: { id: 'item-1', type: 'agent_message', text: '{"answer":42}' }, + }, + { + type: 'turn.completed', + usage: { + input_tokens: 7, + cached_input_tokens: 0, + output_tokens: 3, + reasoning_output_tokens: 0, + }, + }, + ]) + const adapter = codexText('gpt-5.1-codex') + const schema = { + type: 'object', + properties: { answer: { type: 'number' } }, + } + const result = await adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: schema, + }) + + expect(result.data).toEqual({ answer: 42 }) + expect(result.rawText).toBe('{"answer":42}') + expect(result.usage).toMatchObject({ promptTokens: 7, completionTokens: 3 }) + expect(runStreamedMock.mock.calls[0]![1]).toMatchObject({ + outputSchema: schema, + }) + }) + + it('runs structured output in a fresh read-only thread', async () => { + mockRunReturning([ + { + type: 'item.completed', + item: { id: 'item-1', type: 'agent_message', text: '{}' }, + }, + { + type: 'turn.completed', + usage: { + input_tokens: 1, + cached_input_tokens: 0, + output_tokens: 1, + reasoning_output_tokens: 0, + }, + }, + ]) + const adapter = codexText('gpt-5.1-codex', { + sandboxMode: 'workspace-write', + }) + await adapter.structuredOutput({ + chatOptions: makeOptions({ modelOptions: { sessionId: 'sess-live' } }), + outputSchema: { type: 'object' }, + }) + expect(resumeThreadMock).not.toHaveBeenCalled() + expect(startThreadMock.mock.calls[0]![0]).toMatchObject({ + sandboxMode: 'read-only', + }) + }) + + it('throws a descriptive error when the turn fails', async () => { + mockRunReturning([ + { type: 'turn.failed', error: { message: 'harness exploded' } }, + ]) + const adapter = codexText('gpt-5.1-codex') + await expect( + adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }), + ).rejects.toThrow(/harness exploded/) + }) + + it('throws when the run ends without an agent message', async () => { + mockRunReturning([ + { + type: 'turn.completed', + usage: { + input_tokens: 1, + cached_input_tokens: 0, + output_tokens: 0, + reasoning_output_tokens: 0, + }, + }, + ]) + const adapter = codexText('gpt-5.1-codex') + await expect( + adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }), + ).rejects.toThrow(/without an agent message/) + }) +}) diff --git a/packages/ai-codex/tests/translate.test.ts b/packages/ai-codex/tests/translate.test.ts new file mode 100644 index 000000000..b76e1d48f --- /dev/null +++ b/packages/ai-codex/tests/translate.test.ts @@ -0,0 +1,454 @@ +import { describe, expect, it } from 'vitest' +import { + SESSION_ID_EVENT, + toolNameForItem, + translateThreadEvents, +} from '../src/stream/translate' +import type { TranslateContext } from '../src/stream/translate' +import type { CodexThreadEvent } from '../src/stream/sdk-types' +import type { StreamChunk } from '@tanstack/ai' + +function makeCtx(overrides: Partial = {}): TranslateContext { + let id = 0 + return { + model: 'gpt-5.1-codex', + runId: 'run-1', + threadId: 'thread-1', + genId: () => `gen-${++id}`, + ...overrides, + } +} + +async function* fromArray( + events: Array, +): AsyncIterable { + for (const event of events) yield event +} + +async function collect( + events: Array, + ctx: TranslateContext = makeCtx(), +): Promise> { + const chunks: Array = [] + for await (const chunk of translateThreadEvents(fromArray(events), ctx)) { + chunks.push(chunk) + } + return chunks +} + +const started: CodexThreadEvent = { + type: 'thread.started', + thread_id: 'sess-1', +} + +const completedTurn: CodexThreadEvent = { + type: 'turn.completed', + usage: { + input_tokens: 100, + cached_input_tokens: 40, + output_tokens: 20, + reasoning_output_tokens: 5, + }, +} + +describe('translateThreadEvents', () => { + it('translates a simple text turn', async () => { + const chunks = await collect([ + started, + { type: 'turn.started' }, + { + type: 'item.completed', + item: { id: 'item-1', type: 'agent_message', text: 'hi there' }, + }, + completedTurn, + ]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks[1]).toMatchObject({ + name: SESSION_ID_EVENT, + value: { sessionId: 'sess-1' }, + }) + expect(chunks[3]).toMatchObject({ delta: 'hi there', content: 'hi there' }) + expect(chunks.at(-1)).toMatchObject({ finishReason: 'stop' }) + }) + + it('reports usage with cache and reasoning details', async () => { + const chunks = await collect([started, completedTurn]) + const finished = chunks.at(-1) as unknown as { + usage: Record + } + expect(finished.usage).toMatchObject({ + promptTokens: 100, + completionTokens: 20, + totalTokens: 120, + promptTokensDetails: { cachedTokens: 40 }, + completionTokensDetails: { reasoningTokens: 5 }, + }) + }) + + it('notifies onSessionId and forwards raw events to onThreadEvent', async () => { + const sessionIds: Array = [] + const raw: Array = [] + await collect( + [started, completedTurn], + makeCtx({ + onSessionId: (id) => sessionIds.push(id), + onThreadEvent: (event) => raw.push(event.type), + }), + ) + expect(sessionIds).toEqual(['sess-1']) + expect(raw).toEqual(['thread.started', 'turn.completed']) + }) + + it('starts the run without a session event on resumed threads', async () => { + const chunks = await collect([ + { type: 'turn.started' }, + { + type: 'item.completed', + item: { id: 'item-1', type: 'agent_message', text: 'resumed' }, + }, + completedTurn, + ]) + expect(chunks[0]).toMatchObject({ type: 'RUN_STARTED' }) + expect(chunks.some((c) => c.type === 'CUSTOM')).toBe(false) + }) + + it('translates reasoning items into a reasoning burst', async () => { + const chunks = await collect([ + started, + { + type: 'item.completed', + item: { id: 'item-r', type: 'reasoning', text: 'thinking...' }, + }, + completedTurn, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'REASONING_START', + 'REASONING_MESSAGE_START', + 'REASONING_MESSAGE_CONTENT', + 'REASONING_MESSAGE_END', + 'REASONING_END', + 'RUN_FINISHED', + ]) + }) + + it('pairs command executions across item.started and item.completed', async () => { + const chunks = await collect([ + started, + { + type: 'item.started', + item: { + id: 'cmd-1', + type: 'command_execution', + command: 'ls -la', + status: 'in_progress', + }, + }, + { + type: 'item.completed', + item: { + id: 'cmd-1', + type: 'command_execution', + command: 'ls -la', + aggregated_output: 'file.txt', + exit_code: 0, + status: 'completed', + }, + }, + completedTurn, + ]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TOOL_CALL_START', + 'TOOL_CALL_ARGS', + 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', + 'RUN_FINISHED', + ]) + expect(chunks[2]).toMatchObject({ + toolCallId: 'cmd-1', + toolCallName: 'command_execution', + }) + expect(chunks[3]).toMatchObject({ + args: JSON.stringify({ command: 'ls -la' }), + }) + const result = chunks[5] as { content: string; state?: string } + expect(JSON.parse(result.content)).toMatchObject({ + aggregated_output: 'file.txt', + exit_code: 0, + status: 'completed', + }) + expect(result.state).toBeUndefined() + }) + + it('marks failed command executions as output-error', async () => { + const chunks = await collect([ + started, + { + type: 'item.completed', + item: { + id: 'cmd-2', + type: 'command_execution', + command: 'false', + aggregated_output: '', + exit_code: 1, + status: 'failed', + }, + }, + completedTurn, + ]) + const result = chunks.find((c) => c.type === 'TOOL_CALL_RESULT') + expect(result).toMatchObject({ state: 'output-error' }) + }) + + it('emits a full tool pair when item.completed arrives without item.started', async () => { + const chunks = await collect([ + started, + { + type: 'item.completed', + item: { + id: 'fc-1', + type: 'file_change', + changes: [{ path: 'a.ts', kind: 'update' }], + status: 'completed', + }, + }, + completedTurn, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TOOL_CALL_START', + 'TOOL_CALL_ARGS', + 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', + 'RUN_FINISHED', + ]) + expect(chunks[2]).toMatchObject({ toolCallName: 'file_change' }) + }) + + it('does not duplicate START events when both started and completed fire', async () => { + const chunks = await collect([ + started, + { + type: 'item.started', + item: { + id: 'ws-1', + type: 'web_search', + query: 'tanstack ai', + }, + }, + { + type: 'item.completed', + item: { id: 'ws-1', type: 'web_search', query: 'tanstack ai' }, + }, + completedTurn, + ]) + const startEvents = chunks.filter((c) => c.type === 'TOOL_CALL_START') + expect(startEvents).toHaveLength(1) + }) + + it('strips the tanstack server prefix from bridged MCP tool calls', async () => { + const chunks = await collect([ + started, + { + type: 'item.completed', + item: { + id: 'mcp-1', + type: 'mcp_tool_call', + server: 'tanstack', + tool: 'lookup_user', + arguments: { userId: '7' }, + result: { content: [{ type: 'text', text: '{"name":"Ada"}' }] }, + status: 'completed', + }, + }, + completedTurn, + ]) + expect(chunks.find((c) => c.type === 'TOOL_CALL_START')).toMatchObject({ + toolCallName: 'lookup_user', + }) + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + content: '{"name":"Ada"}', + }) + }) + + it('namespaces foreign MCP tool calls as mcp__server__tool', async () => { + expect( + toolNameForItem({ + id: 'x', + type: 'mcp_tool_call', + server: 'github', + tool: 'create_issue', + status: 'completed', + }), + ).toBe('mcp__github__create_issue') + }) + + it('surfaces MCP tool errors as output-error results', async () => { + const chunks = await collect([ + started, + { + type: 'item.completed', + item: { + id: 'mcp-2', + type: 'mcp_tool_call', + server: 'tanstack', + tool: 'boom', + error: { message: 'kaboom' }, + status: 'failed', + }, + }, + completedTurn, + ]) + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + content: 'kaboom', + state: 'output-error', + }) + }) + + it('ignores item.updated events', async () => { + const chunks = await collect([ + started, + { + type: 'item.started', + item: { + id: 'todo-1', + type: 'todo_list', + items: [{ text: 'step 1', completed: false }], + }, + }, + { + type: 'item.updated', + item: { + id: 'todo-1', + type: 'todo_list', + items: [{ text: 'step 1', completed: true }], + }, + }, + { + type: 'item.completed', + item: { + id: 'todo-1', + type: 'todo_list', + items: [{ text: 'step 1', completed: true }], + }, + }, + completedTurn, + ]) + expect(chunks.filter((c) => c.type === 'TOOL_CALL_ARGS')).toHaveLength(1) + expect(chunks.filter((c) => c.type === 'TOOL_CALL_RESULT')).toHaveLength(1) + }) + + it('synthesizes interrupted results for unresolved tool calls on turn.completed', async () => { + const chunks = await collect([ + started, + { + type: 'item.started', + item: { + id: 'cmd-9', + type: 'command_execution', + command: 'sleep 100', + status: 'in_progress', + }, + }, + completedTurn, + ]) + const result = chunks.find((c) => c.type === 'TOOL_CALL_RESULT') + expect(result).toMatchObject({ + toolCallId: 'cmd-9', + content: JSON.stringify({ status: 'interrupted' }), + }) + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_FINISHED' }) + }) + + it('maps turn.failed to RUN_ERROR after synthesizing results', async () => { + const chunks = await collect([ + started, + { + type: 'item.started', + item: { + id: 'cmd-8', + type: 'command_execution', + command: 'x', + status: 'in_progress', + }, + }, + { type: 'turn.failed', error: { message: 'model exploded' } }, + ]) + const types: Array = chunks.map((c) => c.type) + expect(types.indexOf('TOOL_CALL_RESULT')).toBeLessThan( + types.indexOf('RUN_ERROR'), + ) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'model exploded', + }) + }) + + it('maps stream error events to RUN_ERROR', async () => { + const chunks = await collect([ + started, + { type: 'error', message: 'stream broke' }, + ]) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'stream broke', + }) + }) + + it('synthesizes results then rethrows when the source stream throws', async () => { + async function* failing(): AsyncIterable { + yield started + yield { + type: 'item.started', + item: { + id: 'cmd-7', + type: 'command_execution', + command: 'x', + status: 'in_progress', + }, + } + throw new Error('aborted') + } + + const chunks: Array = [] + await expect(async () => { + for await (const chunk of translateThreadEvents(failing(), makeCtx())) { + chunks.push(chunk) + } + }).rejects.toThrow('aborted') + expect(chunks.at(-1)).toMatchObject({ + type: 'TOOL_CALL_RESULT', + toolCallId: 'cmd-7', + content: JSON.stringify({ status: 'interrupted' }), + }) + }) + + it('ignores non-fatal error items', async () => { + const chunks = await collect([ + started, + { + type: 'item.completed', + item: { id: 'err-1', type: 'error', message: 'transient hiccup' }, + }, + completedTurn, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'RUN_FINISHED', + ]) + }) +}) diff --git a/packages/ai-codex/tsconfig.json b/packages/ai-codex/tsconfig.json new file mode 100644 index 000000000..c38689f4e --- /dev/null +++ b/packages/ai-codex/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist" + }, + "include": ["src", "tests"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/ai-codex/vite.config.ts b/packages/ai-codex/vite.config.ts new file mode 100644 index 000000000..11f5b20b7 --- /dev/null +++ b/packages/ai-codex/vite.config.ts @@ -0,0 +1,37 @@ +import { defineConfig, mergeConfig } from 'vitest/config' +import { tanstackViteConfig } from '@tanstack/vite-config' +import packageJson from './package.json' + +const config = defineConfig({ + test: { + name: packageJson.name, + dir: './', + watch: false, + + globals: true, + environment: 'node', + include: ['tests/**/*.test.ts'], + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html', 'lcov'], + exclude: [ + 'node_modules/', + 'dist/', + 'tests/', + '**/*.test.ts', + '**/*.config.ts', + '**/types.ts', + ], + include: ['src/**/*.ts'], + }, + }, +}) + +export default mergeConfig( + config, + tanstackViteConfig({ + entry: ['./src/index.ts'], + srcDir: './src', + cjs: false, + }), +) diff --git a/packages/ai-gemini-cli/README.md b/packages/ai-gemini-cli/README.md new file mode 100644 index 000000000..46876a4dd --- /dev/null +++ b/packages/ai-gemini-cli/README.md @@ -0,0 +1,18 @@ +# @tanstack/ai-gemini-cli + +Gemini CLI harness adapter for [TanStack AI](https://tanstack.com/ai) — run [Gemini CLI](https://github.com/google-gemini/gemini-cli) (via the Agent Client Protocol) as a chat backend with local tool execution, stateful coding sessions, and TanStack tool bridging. + +```typescript +import { chat } from '@tanstack/ai' +import { geminiCliText } from '@tanstack/ai-gemini-cli' + +const stream = chat({ + adapter: geminiCliText('gemini-3-pro-preview', { + cwd: '/path/to/project', + permissionMode: 'acceptEdits', + }), + messages: [{ role: 'user', content: 'Fix the failing test.' }], +}) +``` + +Server-only (Node). Requires the `gemini` CLI to be installed (`npm i -g @google/gemini-cli`) and authenticated. See the [Gemini CLI adapter docs](https://tanstack.com/ai/latest/docs/adapters/gemini-cli) for sessions, tool bridging, permissions, and limitations. diff --git a/packages/ai-gemini-cli/package.json b/packages/ai-gemini-cli/package.json new file mode 100644 index 000000000..fdc13adae --- /dev/null +++ b/packages/ai-gemini-cli/package.json @@ -0,0 +1,60 @@ +{ + "name": "@tanstack/ai-gemini-cli", + "version": "0.1.0", + "description": "Gemini CLI harness adapter for TanStack AI — run Gemini CLI as a chat backend with local tool execution and stateful sessions.", + "author": "", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/TanStack/ai.git", + "directory": "packages/ai-gemini-cli" + }, + "keywords": [ + "ai", + "ai-sdk", + "typescript", + "tanstack", + "google", + "gemini", + "gemini-cli", + "harness", + "agent", + "adapter", + "chat", + "tool-calling" + ], + "type": "module", + "module": "./dist/esm/index.js", + "types": "./dist/esm/index.d.ts", + "exports": { + ".": { + "types": "./dist/esm/index.d.ts", + "import": "./dist/esm/index.js" + } + }, + "files": [ + "dist", + "src" + ], + "scripts": { + "build": "vite build", + "clean": "premove ./build ./dist", + "lint:fix": "eslint ./src --fix", + "test:build": "publint --strict", + "test:eslint": "eslint ./src", + "test:lib": "vitest", + "test:lib:dev": "pnpm test:lib --watch", + "test:types": "tsc" + }, + "dependencies": { + "@agentclientprotocol/sdk": "^0.25.0", + "@modelcontextprotocol/sdk": "^1.29.0" + }, + "peerDependencies": { + "@tanstack/ai": "workspace:^" + }, + "devDependencies": { + "@tanstack/ai": "workspace:*", + "@vitest/coverage-v8": "4.0.14" + } +} diff --git a/packages/ai-gemini-cli/src/adapters/text.ts b/packages/ai-gemini-cli/src/adapters/text.ts new file mode 100644 index 000000000..af0b645ff --- /dev/null +++ b/packages/ai-gemini-cli/src/adapters/text.ts @@ -0,0 +1,386 @@ +import { EventType, normalizeSystemPrompts } from '@tanstack/ai' +import { toRunErrorRawEvent } from '@tanstack/ai/adapter-internals' +import { BaseTextAdapter } from '@tanstack/ai/adapters' +import { buildPrompt } from '../messages/prompt' +import { startToolBridge } from '../tools/bridge' +import { startAcpSession } from '../process/acp-client' +import { resolvePermission } from '../process/permissions' +import { AsyncQueue } from '../stream/queue' +import { + BRIDGED_MCP_SERVER_NAME, + translateAcpStream, +} from '../stream/translate' +import type { + StructuredOutputOptions, + StructuredOutputResult, +} from '@tanstack/ai/adapters' +import type { + AnyTool, + DefaultMessageMetadataByModality, + Modality, + StreamChunk, + TextOptions, +} from '@tanstack/ai' +import type { AcpSessionHandle } from '../process/acp-client' +import type { + GeminiCliPermissionMode, + PermissionHandler, +} from '../process/permissions' +import type { AcpUsage } from '../stream/acp-types' +import type { AcpStreamEvent } from '../stream/translate' +import type { GeminiCliModel } from '../model-meta' +import type { GeminiCliTextProviderOptions } from '../provider-options' +import type { ToolBridgeHandle } from '../tools/bridge' + +export interface GeminiCliTextConfig { + /** Working directory for the harness session. Defaults to `process.cwd()`. */ + cwd?: string + /** Path to the Gemini CLI executable. Defaults to `gemini` on PATH. */ + executablePath?: string + /** Extra CLI arguments appended after `--acp`. */ + extraArgs?: Array + /** Extra environment variables merged over `process.env`. */ + env?: Record + /** + * Gemini CLI permission mode. Without an explicit mode or a custom + * `onPermissionRequest`, the adapter's default policy auto-allows bridged + * TanStack tools and rejects anything else that would normally prompt — + * set `'acceptEdits'` / `'bypassPermissions'` to let the harness edit + * files and run commands on a headless server. + */ + permissionMode?: GeminiCliPermissionMode + /** Custom permission handler; replaces the adapter's default policy. */ + onPermissionRequest?: PermissionHandler + /** + * ACP auth method to select before starting the session, e.g. + * `'oauth-personal'` (Log in with Google), `'gemini-api-key'`, or + * `'vertex-ai'`. Needed when the installed CLI isn't already authenticated + * for headless use; the agent advertises the available method ids in its + * ACP initialize response. Overridable per call via + * `modelOptions.authMethodId`. + */ + authMethodId?: string +} + +function validateTools(tools: Array | undefined): void { + if (!tools || tools.length === 0) return + const unsupported = tools.filter( + (tool) => typeof tool.execute !== 'function' || tool.needsApproval === true, + ) + if (unsupported.length > 0) { + throw new Error( + `Gemini CLI harness cannot execute client-side or approval-gated tools: ${unsupported + .map((tool) => tool.name) + .join( + ', ', + )}. Provide server execute() implementations without needsApproval, or run these tools outside the harness.`, + ) + } +} + +/** Extract the first JSON object/array from possibly fenced model output. */ +function extractJson(text: string): unknown { + const trimmed = text.trim() + const unfenced = trimmed.startsWith('```') + ? trimmed.replace(/^```[a-zA-Z]*\n?/, '').replace(/\n?```$/, '') + : trimmed + try { + return JSON.parse(unfenced) + } catch { + const start = unfenced.search(/[{[]/) + if (start === -1) { + throw new Error( + `Gemini CLI structured output is not valid JSON: ${text.slice(0, 200)}`, + ) + } + const end = Math.max(unfenced.lastIndexOf('}'), unfenced.lastIndexOf(']')) + return JSON.parse(unfenced.slice(start, end + 1)) + } +} + +export class GeminiCliTextAdapter< + TModel extends GeminiCliModel, +> extends BaseTextAdapter< + TModel, + GeminiCliTextProviderOptions, + ReadonlyArray & readonly ['text'], + DefaultMessageMetadataByModality, + ReadonlyArray, + unknown, + never +> { + readonly name = 'gemini-cli' as const + + private readonly adapterConfig: GeminiCliTextConfig + + constructor(config: GeminiCliTextConfig, model: TModel) { + super({}, model) + this.adapterConfig = config + } + + async *chatStream( + options: TextOptions, + ): AsyncIterable { + const { logger } = options + let bridge: ToolBridgeHandle | undefined + let handle: AcpSessionHandle | undefined + const externalSignal = + options.abortController?.signal ?? options.request?.signal ?? undefined + let onAbort: (() => void) | undefined + + try { + validateTools(options.tools) + + const modelOptions = options.modelOptions + const sessionId = modelOptions?.sessionId + // Validates the trailing user message up front (throws before any + // subprocess is spawned) and prepares the resume-path prompt. + const { prompt: resumePrompt } = buildPrompt(options.messages, sessionId) + + if (options.tools && options.tools.length > 0) { + bridge = await startToolBridge(options.tools) + } + const bridgedToolNames = new Set( + (options.tools ?? []).map((tool) => tool.name), + ) + + const queue = new AsyncQueue() + const mode = + modelOptions?.permissionMode ?? + this.adapterConfig.permissionMode ?? + 'default' + const permissionHandler: PermissionHandler = + this.adapterConfig.onPermissionRequest ?? + ((request) => resolvePermission(request, mode, bridgedToolNames)) + + logger.request( + `activity=chat provider=gemini-cli model=${this.model} messages=${options.messages.length} tools=${options.tools?.length ?? 0} resume=${sessionId ?? 'none'}`, + { provider: 'gemini-cli', model: this.model }, + ) + + handle = await startAcpSession({ + ...(this.adapterConfig.executablePath !== undefined && { + executablePath: this.adapterConfig.executablePath, + }), + ...(this.adapterConfig.extraArgs !== undefined && { + extraArgs: this.adapterConfig.extraArgs, + }), + ...(this.adapterConfig.env !== undefined && { + env: this.adapterConfig.env, + }), + ...((modelOptions?.authMethodId ?? this.adapterConfig.authMethodId) !== + undefined && { + authMethodId: + modelOptions?.authMethodId ?? this.adapterConfig.authMethodId, + }), + model: this.model, + cwd: modelOptions?.cwd ?? this.adapterConfig.cwd ?? process.cwd(), + ...(bridge !== undefined && { + mcpServers: [{ name: BRIDGED_MCP_SERVER_NAME, url: bridge.url }], + }), + ...(sessionId !== undefined && { resumeSessionId: sessionId }), + onUpdate: (update) => queue.push({ kind: 'update', update }), + onPermissionRequest: permissionHandler, + }) + const session = handle + + if (externalSignal !== undefined) { + onAbort = () => void session.cancel().catch(() => undefined) + if (externalSignal.aborted) onAbort() + else externalSignal.addEventListener('abort', onAbort, { once: true }) + } + + queue.push({ kind: 'session', sessionId: session.sessionId }) + + // When resume was requested but the CLI couldn't load the session, + // fall back to seeding a fresh session with the whole transcript. + const promptText = this.applySystemPrompts( + options, + session.resumed || sessionId === undefined + ? resumePrompt + : buildPrompt(options.messages, undefined).prompt, + ) + + session + .prompt(promptText) + .then(({ stopReason, usage }) => { + queue.push({ + kind: 'done', + stopReason, + ...(usage !== undefined && { usage }), + }) + queue.end() + }) + .catch((error: unknown) => queue.fail(error)) + + yield* translateAcpStream(queue, { + model: this.model, + runId: options.runId ?? this.generateId(), + threadId: options.threadId ?? this.generateId(), + ...(options.parentRunId !== undefined && { + parentRunId: options.parentRunId, + }), + genId: () => this.generateId(), + bridgedToolNames, + onAcpEvent: (event) => + logger.provider(`provider=gemini-cli kind=${event.kind}`, { + chunk: event, + }), + }) + } catch (error: unknown) { + const err = error as Error & { code?: string } + const rawEvent = toRunErrorRawEvent(error) + logger.errors('gemini-cli.chatStream fatal', { + error, + source: 'gemini-cli.chatStream', + }) + yield { + type: EventType.RUN_ERROR, + model: options.model, + timestamp: Date.now(), + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + ...(rawEvent !== undefined && { rawEvent }), + error: { + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + }, + } + } finally { + if (externalSignal !== undefined && onAbort !== undefined) { + externalSignal.removeEventListener('abort', onAbort) + } + await handle?.dispose() + await bridge?.close() + } + } + + /** + * Structured output, best-effort: ACP has no native JSON-schema channel, + * so the schema is embedded as a prompt instruction and the final text is + * parsed (stripping markdown fences when present). Runs in a fresh + * session with the default deny-everything permission policy. + */ + async structuredOutput( + options: StructuredOutputOptions, + ): Promise> { + const { chatOptions, outputSchema } = options + const { logger } = chatOptions + + // Fresh one-shot run: deliberately no resume, so finalization never + // mutates the caller's interactive session. No bridge either — tools + // are a chat concern. + const { prompt } = buildPrompt(chatOptions.messages, undefined) + const instruction = `Respond with ONLY a JSON value that conforms to this JSON Schema — no prose, no markdown fences:\n${JSON.stringify(outputSchema)}` + const promptText = this.applySystemPrompts( + chatOptions, + `${prompt}\n\n${instruction}`, + ) + + logger.request( + `activity=structured-output provider=gemini-cli model=${this.model}`, + { provider: 'gemini-cli', model: this.model }, + ) + + let rawText = '' + const handle = await startAcpSession({ + ...(this.adapterConfig.executablePath !== undefined && { + executablePath: this.adapterConfig.executablePath, + }), + ...(this.adapterConfig.extraArgs !== undefined && { + extraArgs: this.adapterConfig.extraArgs, + }), + ...(this.adapterConfig.env !== undefined && { + env: this.adapterConfig.env, + }), + ...((chatOptions.modelOptions?.authMethodId ?? + this.adapterConfig.authMethodId) !== undefined && { + authMethodId: + chatOptions.modelOptions?.authMethodId ?? + this.adapterConfig.authMethodId, + }), + model: this.model, + cwd: + chatOptions.modelOptions?.cwd ?? + this.adapterConfig.cwd ?? + process.cwd(), + onUpdate: (update) => { + if ( + update.sessionUpdate === 'agent_message_chunk' && + typeof update.content.text === 'string' + ) { + rawText += update.content.text + } + }, + onPermissionRequest: (request) => + resolvePermission(request, 'default', undefined), + }) + + let usage: AcpUsage | undefined + try { + const result = await handle.prompt(promptText) + usage = result.usage + if (result.stopReason === 'refusal') { + throw new Error('Gemini CLI refused the structured output request.') + } + } finally { + await handle.dispose() + } + + if (rawText.trim() === '') { + throw new Error( + 'Gemini CLI run ended without a response during structured output generation.', + ) + } + + const promptTokens = usage?.inputTokens ?? 0 + const completionTokens = usage?.outputTokens ?? 0 + return { + data: extractJson(rawText), + rawText, + usage: { + promptTokens, + completionTokens, + totalTokens: usage?.totalTokens ?? promptTokens + completionTokens, + }, + } + } + + /** + * ACP has no system-prompt channel, so `systemPrompts` from `chat()` are + * prepended to the prompt text as an instruction preamble. + */ + private applySystemPrompts( + options: TextOptions, + prompt: string, + ): string { + const systemPrompts = normalizeSystemPrompts(options.systemPrompts) + .map((systemPrompt) => systemPrompt.content) + .filter((content) => content.trim() !== '') + if (systemPrompts.length === 0) return prompt + return `${systemPrompts.join('\n\n')}\n\n${prompt}` + } +} + +/** + * Creates a Gemini CLI text adapter. + * + * Unlike HTTP provider adapters, this is a *harness* adapter: Gemini CLI + * runs its own agent loop and executes its own tools (shell commands, file + * edits, search, ...) locally, server-side. The adapter drives the CLI over + * the Agent Client Protocol (`gemini --acp`), so assistant text and thinking + * stream as true token-level deltas. Each `chat()` call runs one full + * harness turn; harness tool activity streams back as already-resolved + * tool-call events, and the session id is surfaced via a CUSTOM + * `gemini-cli.session-id` event so follow-up calls can resume the session + * through `modelOptions.sessionId`. + * + * Requires the `gemini` CLI to be installed and authenticated on the host + * (`npm i -g @google/gemini-cli`). + */ +export function geminiCliText( + model: TModel, + config: GeminiCliTextConfig = {}, +): GeminiCliTextAdapter { + return new GeminiCliTextAdapter(config, model) +} diff --git a/packages/ai-gemini-cli/src/index.ts b/packages/ai-gemini-cli/src/index.ts new file mode 100644 index 000000000..41add73fd --- /dev/null +++ b/packages/ai-gemini-cli/src/index.ts @@ -0,0 +1,36 @@ +export { GeminiCliTextAdapter, geminiCliText } from './adapters/text' +export type { GeminiCliTextConfig } from './adapters/text' +export type { GeminiCliTextProviderOptions } from './provider-options' +export { GEMINI_CLI_MODELS } from './model-meta' +export type { GeminiCliModel, KnownGeminiCliModel } from './model-meta' +export { + SESSION_ID_EVENT, + PLAN_EVENT, + BRIDGED_MCP_SERVER_NAME, + translateAcpStream, + matchBridgedToolName, +} from './stream/translate' +export type { AcpStreamEvent, TranslateContext } from './stream/translate' +export type { + AcpPermissionOption, + AcpPermissionOutcome, + AcpPermissionRequest, + AcpSessionUpdate, + AcpStopReason, + AcpToolCallUpdate, + AcpUsage, +} from './stream/acp-types' +export { resolvePermission } from './process/permissions' +export type { + GeminiCliPermissionMode, + PermissionHandler, +} from './process/permissions' +export { startAcpSession } from './process/acp-client' +export type { + AcpSessionHandle, + StartAcpSessionOptions, +} from './process/acp-client' +export { buildPrompt } from './messages/prompt' +export type { BuiltPrompt } from './messages/prompt' +export { startToolBridge } from './tools/bridge' +export type { ToolBridgeHandle } from './tools/bridge' diff --git a/packages/ai-gemini-cli/src/messages/prompt.ts b/packages/ai-gemini-cli/src/messages/prompt.ts new file mode 100644 index 000000000..ad1b069a6 --- /dev/null +++ b/packages/ai-gemini-cli/src/messages/prompt.ts @@ -0,0 +1,67 @@ +import type { ModelMessage } from '@tanstack/ai' + +export interface BuiltPrompt { + prompt: string + /** Gemini CLI session id to resume, when the caller threaded one through. */ + resume?: string +} + +function extractText(content: ModelMessage['content']): string { + if (content === null) return '' + if (typeof content === 'string') return content + return content + .map((part) => + part.type === 'text' && typeof part.content === 'string' + ? part.content + : '', + ) + .join('') +} + +/** + * Convert TanStack chat history into the harness's prompt + resume inputs. + * + * With a `sessionId`, the harness already holds the conversation context, so + * only the trailing user message is sent and the session is resumed. Without + * one, prior turns are flattened into a plain-text transcript preamble (tool + * messages and tool-call-only assistant turns are harness-internal noise and + * are skipped; prompts are text-only in v1). + */ +export function buildPrompt( + messages: Array, + sessionId: string | undefined, +): BuiltPrompt { + const lastMessage = messages.at(-1) + const lastUserText = + lastMessage?.role === 'user' ? extractText(lastMessage.content).trim() : '' + + if (!lastUserText) { + throw new Error( + 'Gemini CLI adapter requires a trailing user message with text content.', + ) + } + + if (sessionId !== undefined) { + return { prompt: lastUserText, resume: sessionId } + } + + const priorTurns = messages + .slice(0, -1) + .filter( + (message) => + (message.role === 'user' || message.role === 'assistant') && + extractText(message.content).trim() !== '', + ) + .map( + (message) => + `${message.role === 'user' ? 'User' : 'Assistant'}: ${extractText(message.content).trim()}`, + ) + + if (priorTurns.length === 0) { + return { prompt: lastUserText } + } + + return { + prompt: `Previous conversation:\n${priorTurns.join('\n')}\n\n${lastUserText}`, + } +} diff --git a/packages/ai-gemini-cli/src/model-meta.ts b/packages/ai-gemini-cli/src/model-meta.ts new file mode 100644 index 000000000..039769910 --- /dev/null +++ b/packages/ai-gemini-cli/src/model-meta.ts @@ -0,0 +1,20 @@ +/** + * Models known to work with Gemini CLI. The harness accepts any Gemini model + * id (and the `auto` / `pro` / `flash` aliases resolved by the CLI), so this + * list exists for autocomplete — any string is accepted via the + * `(string & {})` escape hatch in {@link GeminiCliModel}. + */ +export const GEMINI_CLI_MODELS = [ + 'gemini-3-pro-preview', + 'gemini-3-flash-preview', + 'gemini-2.5-pro', + 'gemini-2.5-flash', + 'auto', + 'pro', + 'flash', +] as const + +export type KnownGeminiCliModel = (typeof GEMINI_CLI_MODELS)[number] + +/** Any model id accepted by Gemini CLI; known ids get autocomplete. */ +export type GeminiCliModel = KnownGeminiCliModel | (string & {}) diff --git a/packages/ai-gemini-cli/src/process/acp-client.ts b/packages/ai-gemini-cli/src/process/acp-client.ts new file mode 100644 index 000000000..0c9aae2a0 --- /dev/null +++ b/packages/ai-gemini-cli/src/process/acp-client.ts @@ -0,0 +1,257 @@ +import { spawn } from 'node:child_process' +import { Readable, Writable } from 'node:stream' +import { + ClientSideConnection, + PROTOCOL_VERSION, + ndJsonStream, +} from '@agentclientprotocol/sdk' +import type { + Client, + McpServer, + RequestPermissionRequest, + RequestPermissionResponse, + SessionNotification, +} from '@agentclientprotocol/sdk' +import type { ChildProcess } from 'node:child_process' +import type { + AcpPermissionOutcome, + AcpPermissionRequest, + AcpSessionUpdate, + AcpStopReason, + AcpUsage, +} from '../stream/acp-types' + +/** A live ACP session backed by a `gemini --acp` subprocess. */ +export interface AcpSessionHandle { + sessionId: string + /** Whether an existing session was actually resumed via `session/load`. */ + resumed: boolean + /** Run one prompt turn; resolves with the harness's stop reason. */ + prompt: ( + text: string, + ) => Promise<{ stopReason: AcpStopReason; usage?: AcpUsage }> + /** Ask the harness to cancel the in-flight prompt turn. */ + cancel: () => Promise + /** Tear down the subprocess (SIGTERM, then SIGKILL after a grace period). */ + dispose: () => Promise +} + +export interface StartAcpSessionOptions { + /** Path to the Gemini CLI executable. Defaults to `gemini` on PATH. */ + executablePath?: string + /** Extra CLI arguments appended after `--acp`. */ + extraArgs?: Array + /** Model id passed via `-m`. */ + model?: string + /** Working directory for the session (absolute path). */ + cwd: string + /** Extra environment variables merged over `process.env`. */ + env?: Record + /** + * ACP auth method to select (via `authenticate`) before opening a session. + * The agent advertises the available method ids in its `initialize` + * response (e.g. `'oauth-personal'`, `'gemini-api-key'`, `'vertex-ai'`). + * Required when the installed CLI isn't already authenticated for headless + * use — without it, `prompt` fails with an auth error. + */ + authMethodId?: string + /** MCP servers (e.g. the TanStack tool bridge) for the session. */ + mcpServers?: Array<{ name: string; url: string }> + /** Session id to resume via `session/load`, when supported by the CLI. */ + resumeSessionId?: string + onUpdate: (update: AcpSessionUpdate) => void + onPermissionRequest: ( + request: AcpPermissionRequest, + ) => Promise | AcpPermissionOutcome +} + +const KILL_GRACE_MS = 2000 + +function waitForExit(child: ChildProcess): Promise { + return new Promise((resolve) => { + if (child.exitCode !== null || child.signalCode !== null) { + resolve() + return + } + child.once('exit', () => resolve()) + }) +} + +/** + * Spawn `gemini --acp` and drive it over the Agent Client Protocol + * (JSON-RPC 2.0 on stdio). + * + * This module is the only place that touches `@agentclientprotocol/sdk`; the + * rest of the package works with the structural types in `acp-types.ts`. + * + * Resume semantics: when `resumeSessionId` is set and the CLI advertises the + * `loadSession` capability, the session is loaded by id — the CLI streams + * the prior conversation back as `session/update` notifications, which are + * deliberately swallowed (the TanStack client already has that history). + * When loading is unsupported or fails, a fresh session is created and + * `resumed: false` tells the adapter to send the flattened transcript. + */ +export async function startAcpSession( + options: StartAcpSessionOptions, +): Promise { + const args = ['--acp', ...(options.extraArgs ?? [])] + if (options.model !== undefined) { + args.push('-m', options.model) + } + + const child = spawn(options.executablePath ?? 'gemini', args, { + cwd: options.cwd, + env: { ...process.env, ...options.env }, + stdio: ['pipe', 'pipe', 'pipe'], + }) + + let stderrTail = '' + child.stderr.on('data', (chunk: Buffer) => { + stderrTail = (stderrTail + chunk.toString('utf8')).slice(-4096) + }) + + const spawned = new Promise((resolve, reject) => { + child.once('spawn', () => resolve()) + child.once('error', (error) => reject(error)) + }) + + const exited = waitForExit(child).then(() => { + throw new Error( + `Gemini CLI exited unexpectedly (code ${child.exitCode ?? 'null'}, signal ${child.signalCode ?? 'null'}).${ + stderrTail !== '' ? `\nstderr: ${stderrTail.trim()}` : '' + }`, + ) + }) + + /** Suppressed while session/load replays prior history. */ + let replaying = false + + const client: Client = { + requestPermission: async ( + params: RequestPermissionRequest, + ): Promise => { + const outcome = await options.onPermissionRequest(params) + return { outcome } + }, + sessionUpdate: (params: SessionNotification): Promise => { + if (!replaying) { + options.onUpdate(params.update as AcpSessionUpdate) + } + return Promise.resolve() + }, + } + + const teardown = async (): Promise => { + if (child.exitCode === null && child.signalCode === null) { + child.kill('SIGTERM') + const timer = setTimeout(() => child.kill('SIGKILL'), KILL_GRACE_MS) + await waitForExit(child) + clearTimeout(timer) + } + } + + try { + await spawned + + const connection = new ClientSideConnection( + () => client, + ndJsonStream( + Writable.toWeb(child.stdin) as WritableStream, + Readable.toWeb(child.stdout) as ReadableStream, + ), + ) + + const race = (work: Promise): Promise => + Promise.race([work, exited]) + + const initResult = await race( + connection.initialize({ + protocolVersion: PROTOCOL_VERSION, + clientCapabilities: { + fs: { readTextFile: false, writeTextFile: false }, + }, + }), + ) + + // Select an auth method before opening a session. The agent advertises + // its supported methods in the initialize response; pick the requested + // one (failing loudly if it isn't offered) so a headless run never hangs + // on an interactive auth picker. + if (options.authMethodId !== undefined) { + const available = initResult.authMethods ?? [] + if (!available.some((method) => method.id === options.authMethodId)) { + throw new Error( + `Gemini CLI does not advertise the ACP auth method '${options.authMethodId}'. Available: ${ + available.map((method) => method.id).join(', ') || '(none)' + }.`, + ) + } + await race(connection.authenticate({ methodId: options.authMethodId })) + } + + const mcpServers: Array = (options.mcpServers ?? []).map( + (server) => ({ + type: 'http' as const, + name: server.name, + url: server.url, + headers: [], + }), + ) + + let sessionId: string | undefined + let resumed = false + if ( + options.resumeSessionId !== undefined && + initResult.agentCapabilities?.loadSession === true + ) { + // loadSession streams prior history back as session/update + // notifications; swallow them so the chat stream only carries the + // new turn. + replaying = true + try { + await race( + connection.loadSession({ + sessionId: options.resumeSessionId, + cwd: options.cwd, + mcpServers, + }), + ) + sessionId = options.resumeSessionId + resumed = true + } catch { + // Session unknown to this CLI install — fall through to a fresh one. + } finally { + replaying = false + } + } + + if (sessionId === undefined) { + const session = await race( + connection.newSession({ cwd: options.cwd, mcpServers }), + ) + sessionId = session.sessionId + } + + return { + sessionId, + resumed, + prompt: async (text: string) => { + const response = await race( + connection.prompt({ + sessionId, + prompt: [{ type: 'text', text }], + }), + ) + return { + stopReason: response.stopReason, + ...(response.usage != null && { usage: response.usage }), + } + }, + cancel: () => connection.cancel({ sessionId }), + dispose: teardown, + } + } catch (error) { + await teardown() + throw error + } +} diff --git a/packages/ai-gemini-cli/src/process/permissions.ts b/packages/ai-gemini-cli/src/process/permissions.ts new file mode 100644 index 000000000..8ccb7505d --- /dev/null +++ b/packages/ai-gemini-cli/src/process/permissions.ts @@ -0,0 +1,66 @@ +import { matchBridgedToolName } from '../stream/translate' +import type { + AcpPermissionOutcome, + AcpPermissionRequest, +} from '../stream/acp-types' + +/** + * Permission modes for the Gemini CLI adapter, mirroring the Claude Code + * adapter's semantics: + * + * - `'default'`: bridged TanStack tools run; anything else that asks for + * permission is rejected with no prompt (a headless server must never + * hang on an interactive question). + * - `'acceptEdits'`: additionally auto-approves file-mutation tools + * (edit / move / delete kinds). + * - `'bypassPermissions'`: approves everything. + */ +export type GeminiCliPermissionMode = + | 'default' + | 'acceptEdits' + | 'bypassPermissions' + +/** Custom permission handler; replaces the adapter's default policy. */ +export type PermissionHandler = ( + request: AcpPermissionRequest, +) => Promise | AcpPermissionOutcome + +const EDIT_KINDS = new Set(['edit', 'move', 'delete']) + +function pickOption( + request: AcpPermissionRequest, + kinds: Array, +): AcpPermissionOutcome { + for (const kind of kinds) { + const option = request.options.find((candidate) => candidate.kind === kind) + if (option) return { outcome: 'selected', optionId: option.optionId } + } + return { outcome: 'cancelled' } +} + +/** + * The adapter's default permission policy. Always answers immediately — + * never hangs a headless server on a question only an interactive user + * could answer. + */ +export function resolvePermission( + request: AcpPermissionRequest, + mode: GeminiCliPermissionMode, + bridgedToolNames: ReadonlySet | undefined, +): AcpPermissionOutcome { + const allow = () => pickOption(request, ['allow_once', 'allow_always']) + const reject = () => pickOption(request, ['reject_once', 'reject_always']) + + if ( + matchBridgedToolName(request.toolCall.title, bridgedToolNames) !== undefined + ) { + return allow() + } + if (mode === 'bypassPermissions') { + return allow() + } + if (mode === 'acceptEdits' && EDIT_KINDS.has(request.toolCall.kind ?? '')) { + return allow() + } + return reject() +} diff --git a/packages/ai-gemini-cli/src/provider-options.ts b/packages/ai-gemini-cli/src/provider-options.ts new file mode 100644 index 000000000..3d74cdc18 --- /dev/null +++ b/packages/ai-gemini-cli/src/provider-options.ts @@ -0,0 +1,23 @@ +import type { GeminiCliPermissionMode } from './process/permissions' + +/** + * Per-call provider options for the Gemini CLI adapter, passed via + * `modelOptions` on `chat()`. + */ +export interface GeminiCliTextProviderOptions { + /** + * Resume an existing Gemini CLI session. The adapter emits the session id + * of every run via a CUSTOM `gemini-cli.session-id` stream event; thread + * it back here to continue that session (only the latest user message is + * sent — the harness already holds the prior context). If the installed + * CLI doesn't support session loading, the adapter falls back to a fresh + * session seeded with the flattened transcript. + */ + sessionId?: string + /** Per-call override of the configured permission mode. */ + permissionMode?: GeminiCliPermissionMode + /** Per-call override of the harness working directory. */ + cwd?: string + /** Per-call override of the configured ACP auth method id. */ + authMethodId?: string +} diff --git a/packages/ai-gemini-cli/src/stream/acp-types.ts b/packages/ai-gemini-cli/src/stream/acp-types.ts new file mode 100644 index 000000000..c59c4aa9e --- /dev/null +++ b/packages/ai-gemini-cli/src/stream/acp-types.ts @@ -0,0 +1,82 @@ +/** + * Structural subset of the Agent Client Protocol (ACP) types that the + * adapter consumes. + * + * These are intentionally defined structurally (rather than imported from + * `@agentclientprotocol/sdk`) so the stream translator stays a pure, + * fixture-testable state machine and the package's public types don't depend + * on the ACP SDK's generated schema types. Unknown update types fall through + * every branch at runtime. + */ + +export type AcpContentBlock = + | { type: 'text'; text: string } + | { type: string; [key: string]: unknown } + +export type AcpToolCallStatus = + | 'pending' + | 'in_progress' + | 'completed' + | 'failed' + +export interface AcpToolCallUpdate { + toolCallId: string + title?: string | null + kind?: string | null + status?: AcpToolCallStatus | null + rawInput?: unknown + rawOutput?: unknown + content?: Array<{ + type: string + content?: AcpContentBlock + [key: string]: unknown + }> | null +} + +/** + * The session-update variants the translator consumes. The harness can send + * other update types (`available_commands_update`, `current_mode_update`, + * ...); they fall through every branch and are ignored. + */ +export type AcpSessionUpdate = + | { sessionUpdate: 'agent_message_chunk'; content: AcpContentBlock } + | { sessionUpdate: 'agent_thought_chunk'; content: AcpContentBlock } + | ({ sessionUpdate: 'tool_call' } & AcpToolCallUpdate) + | ({ sessionUpdate: 'tool_call_update' } & AcpToolCallUpdate) + | { sessionUpdate: 'plan'; entries: Array } + | { sessionUpdate: 'available_commands_update' } + | { sessionUpdate: 'current_mode_update' } + | { sessionUpdate: 'user_message_chunk'; content: AcpContentBlock } + +export type AcpStopReason = + | 'end_turn' + | 'max_tokens' + | 'max_turn_requests' + | 'refusal' + | 'cancelled' + | (string & {}) + +/** Experimental per-turn token usage reported by the ACP prompt response. */ +export interface AcpUsage { + inputTokens?: number | null + outputTokens?: number | null + totalTokens?: number | null + cachedReadTokens?: number | null + thoughtTokens?: number | null +} + +export interface AcpPermissionOption { + optionId: string + name: string + kind: 'allow_once' | 'allow_always' | 'reject_once' | 'reject_always' +} + +export interface AcpPermissionRequest { + sessionId: string + toolCall: AcpToolCallUpdate + options: Array +} + +export type AcpPermissionOutcome = + | { outcome: 'cancelled' } + | { outcome: 'selected'; optionId: string } diff --git a/packages/ai-gemini-cli/src/stream/queue.ts b/packages/ai-gemini-cli/src/stream/queue.ts new file mode 100644 index 000000000..0f095feb1 --- /dev/null +++ b/packages/ai-gemini-cli/src/stream/queue.ts @@ -0,0 +1,64 @@ +/** + * Minimal promise-based async queue bridging the ACP connection's + * callback-style `session/update` notifications into the async-iterable + * world the stream translator consumes. + */ +export class AsyncQueue implements AsyncIterable { + private readonly values: Array = [] + private readonly waiters: Array<{ + resolve: (result: IteratorResult) => void + reject: (error: unknown) => void + }> = [] + private ended = false + private error: unknown = undefined + private failed = false + + push(value: T): void { + if (this.ended || this.failed) return + const waiter = this.waiters.shift() + if (waiter) { + waiter.resolve({ value, done: false }) + } else { + this.values.push(value) + } + } + + /** Signal normal completion; pending and future reads resolve as done. */ + end(): void { + if (this.ended || this.failed) return + this.ended = true + for (const waiter of this.waiters.splice(0)) { + waiter.resolve({ value: undefined, done: true }) + } + } + + /** Signal failure; pending and future reads reject (after buffered values drain). */ + fail(error: unknown): void { + if (this.ended || this.failed) return + this.failed = true + this.error = error + for (const waiter of this.waiters.splice(0)) { + waiter.reject(error) + } + } + + [Symbol.asyncIterator](): AsyncIterator { + return { + next: (): Promise> => { + if (this.values.length > 0) { + return Promise.resolve({ + value: this.values.shift() as T, + done: false, + }) + } + if (this.failed) return Promise.reject(this.error) + if (this.ended) { + return Promise.resolve({ value: undefined, done: true }) + } + return new Promise((resolve, reject) => { + this.waiters.push({ resolve, reject }) + }) + }, + } + } +} diff --git a/packages/ai-gemini-cli/src/stream/translate.ts b/packages/ai-gemini-cli/src/stream/translate.ts new file mode 100644 index 000000000..db18c28c8 --- /dev/null +++ b/packages/ai-gemini-cli/src/stream/translate.ts @@ -0,0 +1,395 @@ +import { EventType, buildBaseUsage } from '@tanstack/ai' +import type { StreamChunk, TokenUsage } from '@tanstack/ai' +import type { + AcpSessionUpdate, + AcpStopReason, + AcpToolCallUpdate, + AcpUsage, +} from './acp-types' + +/** Name of the CUSTOM event carrying the Gemini CLI session id. */ +export const SESSION_ID_EVENT = 'gemini-cli.session-id' + +/** Name of the CUSTOM event carrying the harness's plan updates. */ +export const PLAN_EVENT = 'gemini-cli.plan' + +/** Server name used for bridged TanStack tools. */ +export const BRIDGED_MCP_SERVER_NAME = 'tanstack' + +/** + * Events fed to the translator: the session id once established, every ACP + * `session/update` notification, and a terminal `done` carrying the prompt + * response's stop reason (the adapter's async queue produces these). + */ +export type AcpStreamEvent = + | { kind: 'session'; sessionId: string } + | { kind: 'update'; update: AcpSessionUpdate } + | { kind: 'done'; stopReason: AcpStopReason; usage?: AcpUsage } + +export interface TranslateContext { + model: string + runId: string + threadId: string + parentRunId?: string + genId: () => string + /** + * Names of bridged TanStack tools, used to surface the harness's MCP tool + * calls under the names the application registered. + */ + bridgedToolNames?: ReadonlySet + /** Called for each raw ACP stream event, for logging. */ + onAcpEvent?: (event: AcpStreamEvent) => void +} + +/** + * Match an ACP tool-call title against the bridged TanStack tool names. + * Gemini CLI labels MCP tools with the tool name, optionally suffixed with + * the server it came from (e.g. `lookup_user (tanstack MCP Server)`). + */ +export function matchBridgedToolName( + title: string | null | undefined, + bridgedToolNames: ReadonlySet | undefined, +): string | undefined { + if (!title || !bridgedToolNames) return undefined + if (bridgedToolNames.has(title)) return title + for (const name of bridgedToolNames) { + if (title.startsWith(`${name} (`)) return name + } + return undefined +} + +function resolveToolName( + update: AcpToolCallUpdate, + bridgedToolNames: ReadonlySet | undefined, +): string { + return ( + matchBridgedToolName(update.title, bridgedToolNames) ?? + update.kind ?? + 'tool' + ) +} + +function stringifyToolOutput(update: AcpToolCallUpdate): string { + if (update.rawOutput !== undefined) { + return typeof update.rawOutput === 'string' + ? update.rawOutput + : JSON.stringify(update.rawOutput) + } + const text = (update.content ?? []) + .map((block) => + block.content && typeof block.content.text === 'string' + ? block.content.text + : '', + ) + .join('') + if (text !== '') return text + return JSON.stringify({ status: update.status ?? 'completed' }) +} + +function buildUsage(usage: AcpUsage | undefined): TokenUsage | undefined { + if (!usage) return undefined + const promptTokens = usage.inputTokens ?? 0 + const completionTokens = usage.outputTokens ?? 0 + const result = buildBaseUsage({ + promptTokens, + completionTokens, + totalTokens: usage.totalTokens ?? promptTokens + completionTokens, + }) + if (usage.cachedReadTokens) { + result.promptTokensDetails = { cachedTokens: usage.cachedReadTokens } + } + if (usage.thoughtTokens) { + result.completionTokensDetails = { reasoningTokens: usage.thoughtTokens } + } + return result +} + +/** + * Translate a Gemini CLI ACP event stream into AG-UI StreamChunk events. + * + * The harness runs its own agent loop and executes its own tools, so the + * translation always ends with `finishReason: 'stop'` (or `'length'` / + * RUN_ERROR) — never `'tool_calls'`. Harness tool activity is emitted as + * already-resolved TOOL_CALL_START/ARGS/END + TOOL_CALL_RESULT sequences so + * UIs can render it, while the TanStack engine never tries to execute them. + * + * ACP delivers true token-level deltas for both assistant text + * (`agent_message_chunk`) and thinking (`agent_thought_chunk`). + * + * Invariant: every TOOL_CALL_START is eventually paired with a + * TOOL_CALL_RESULT (synthesized as `{"status":"interrupted"}` when the run + * ends or aborts before the harness reported one) so the engine's + * pending-tool-call scan on the next request never force-executes them. + */ +export async function* translateAcpStream( + events: AsyncIterable, + ctx: TranslateContext, +): AsyncIterable { + const { model, runId, threadId, genId } = ctx + const now = () => Date.now() + + let runStarted = false + /** Tool calls started but with no result yet. */ + const unresolvedToolCalls = new Set() + /** Tool names by id, for synthetic opens on unknown tool_call_update ids. */ + const knownToolCalls = new Set() + + let textMessageId: string | null = null + let textContent = '' + let reasoningId: string | null = null + + function* startRun(): Generator { + if (runStarted) return + runStarted = true + yield { + type: EventType.RUN_STARTED, + runId, + threadId, + model, + timestamp: now(), + ...(ctx.parentRunId !== undefined && { parentRunId: ctx.parentRunId }), + } + } + + function* closeText(): Generator { + if (textMessageId !== null) { + yield { + type: EventType.TEXT_MESSAGE_END, + messageId: textMessageId, + model, + timestamp: now(), + } + } + textMessageId = null + textContent = '' + } + + function* closeReasoning(): Generator { + if (reasoningId !== null) { + yield { + type: EventType.REASONING_MESSAGE_END, + messageId: reasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_END, + messageId: reasoningId, + model, + timestamp: now(), + } + } + reasoningId = null + } + + function* synthesizeUnresolvedResults(): Generator { + for (const toolCallId of unresolvedToolCalls) { + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId, + messageId: genId(), + model, + timestamp: now(), + content: JSON.stringify({ status: 'interrupted' }), + } + } + unresolvedToolCalls.clear() + } + + function* openToolCall(update: AcpToolCallUpdate): Generator { + if (knownToolCalls.has(update.toolCallId)) return + knownToolCalls.add(update.toolCallId) + const toolCallName = resolveToolName(update, ctx.bridgedToolNames) + const input = { + ...(update.title != null && { title: update.title }), + ...(update.rawInput !== undefined && update.rawInput !== null + ? typeof update.rawInput === 'object' + ? (update.rawInput as Record) + : { input: update.rawInput } + : {}), + } + const args = JSON.stringify(input) + yield { + type: EventType.TOOL_CALL_START, + toolCallId: update.toolCallId, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + } + yield { + type: EventType.TOOL_CALL_ARGS, + toolCallId: update.toolCallId, + model, + timestamp: now(), + delta: args, + args, + } + yield { + type: EventType.TOOL_CALL_END, + toolCallId: update.toolCallId, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + input, + } + unresolvedToolCalls.add(update.toolCallId) + } + + function* resolveToolCall(update: AcpToolCallUpdate): Generator { + yield* openToolCall(update) + unresolvedToolCalls.delete(update.toolCallId) + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId: update.toolCallId, + messageId: genId(), + model, + timestamp: now(), + content: stringifyToolOutput(update), + ...(update.status === 'failed' && { state: 'output-error' as const }), + } + } + + function* handleUpdate(update: AcpSessionUpdate): Generator { + if (update.sessionUpdate === 'agent_message_chunk') { + yield* closeReasoning() + const text = + typeof update.content.text === 'string' ? update.content.text : '' + if (text === '') return + if (textMessageId === null) { + textMessageId = genId() + yield { + type: EventType.TEXT_MESSAGE_START, + messageId: textMessageId, + model, + timestamp: now(), + role: 'assistant', + } + } + textContent += text + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId: textMessageId, + model, + timestamp: now(), + delta: text, + content: textContent, + } + } else if (update.sessionUpdate === 'agent_thought_chunk') { + yield* closeText() + const thought = + typeof update.content.text === 'string' ? update.content.text : '' + if (thought === '') return + if (reasoningId === null) { + reasoningId = genId() + yield { + type: EventType.REASONING_START, + messageId: reasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_START, + messageId: reasoningId, + role: 'reasoning' as const, + model, + timestamp: now(), + } + } + yield { + type: EventType.REASONING_MESSAGE_CONTENT, + messageId: reasoningId, + delta: thought, + model, + timestamp: now(), + } + } else if (update.sessionUpdate === 'tool_call') { + yield* closeText() + yield* closeReasoning() + yield* openToolCall(update) + if (update.status === 'completed' || update.status === 'failed') { + yield* resolveToolCall(update) + } + } else if (update.sessionUpdate === 'tool_call_update') { + if (update.status === 'completed' || update.status === 'failed') { + yield* resolveToolCall(update) + } + // pending / in_progress updates carry no state the chunk stream needs. + } else if (update.sessionUpdate === 'plan') { + yield { + type: EventType.CUSTOM, + model, + timestamp: now(), + name: PLAN_EVENT, + value: { entries: update.entries }, + } + } + // Other update types (available_commands_update, current_mode_update, + // user_message_chunk replays, ...) are harness-internal and ignored. + } + + try { + for await (const event of events) { + ctx.onAcpEvent?.(event) + + if (event.kind === 'session') { + yield* startRun() + yield { + type: EventType.CUSTOM, + model, + timestamp: now(), + name: SESSION_ID_EVENT, + value: { sessionId: event.sessionId }, + } + } else if (event.kind === 'update') { + yield* startRun() + yield* handleUpdate(event.update) + } else { + yield* startRun() + yield* closeText() + yield* closeReasoning() + yield* synthesizeUnresolvedResults() + + if (event.stopReason === 'refusal') { + yield { + type: EventType.RUN_ERROR, + model, + timestamp: now(), + message: 'Gemini CLI refused the request.', + code: 'refusal', + error: { + message: 'Gemini CLI refused the request.', + code: 'refusal', + }, + } + } else { + const usage = buildUsage(event.usage) + const finishReason = + event.stopReason === 'max_tokens' || + event.stopReason === 'max_turn_requests' + ? ('length' as const) + : ('stop' as const) + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + timestamp: now(), + finishReason, + ...(usage !== undefined && { usage }), + } + } + } + } + } catch (error) { + // The run is dying (abort, process exit, or connection failure). Close + // any open message and pair started tool calls with a synthetic result + // first so the next request's pending-tool-call scan doesn't try to + // execute them, then let the adapter surface the error as RUN_ERROR. + yield* closeText() + yield* closeReasoning() + yield* synthesizeUnresolvedResults() + throw error + } +} diff --git a/packages/ai-gemini-cli/src/tools/bridge.ts b/packages/ai-gemini-cli/src/tools/bridge.ts new file mode 100644 index 000000000..d6cbbf1bc --- /dev/null +++ b/packages/ai-gemini-cli/src/tools/bridge.ts @@ -0,0 +1,129 @@ +import { createServer } from 'node:http' +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js' +import { + CallToolRequestSchema, + ListToolsRequestSchema, +} from '@modelcontextprotocol/sdk/types.js' +import { BRIDGED_MCP_SERVER_NAME } from '../stream/translate' +import type { AddressInfo } from 'node:net' +import type { AnyTool } from '@tanstack/ai' + +/** A running localhost MCP server exposing TanStack tools to the harness. */ +export interface ToolBridgeHandle { + /** Streamable-HTTP MCP endpoint, e.g. `http://127.0.0.1:54321/mcp`. */ + url: string + /** Stop the HTTP server and drop any open connections. */ + close: () => Promise +} + +function createMcpServer(tools: Array): McpServer { + const instance = new McpServer( + { name: BRIDGED_MCP_SERVER_NAME, version: '1.0.0' }, + { capabilities: { tools: {} } }, + ) + + const toolsByName = new Map(tools.map((tool) => [tool.name, tool])) + + instance.server.setRequestHandler(ListToolsRequestSchema, () => ({ + tools: tools.map((tool) => ({ + name: tool.name, + description: tool.description, + inputSchema: (tool.inputSchema ?? { + type: 'object', + properties: {}, + }) as { type: 'object'; [key: string]: unknown }, + })), + })) + + instance.server.setRequestHandler(CallToolRequestSchema, async (request) => { + const tool = toolsByName.get(request.params.name) + if (!tool?.execute) { + throw new Error(`Unknown tool: ${request.params.name}`) + } + try { + const result: unknown = await tool.execute(request.params.arguments ?? {}) + const text = typeof result === 'string' ? result : JSON.stringify(result) + return { content: [{ type: 'text', text }] } + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + return { + isError: true, + content: [{ type: 'text', text: `Tool execution failed: ${message}` }], + } + } + }) + + return instance +} + +/** + * Expose TanStack tools to the Gemini CLI harness as a Streamable-HTTP MCP + * server on an ephemeral localhost port. + * + * Gemini CLI runs as a separate subprocess, so there is no in-process MCP + * option — the bridge listens on `127.0.0.1` and the adapter hands its URL + * to the harness via the ACP session's `mcpServers` list. Each request is + * handled statelessly with a fresh `McpServer` + transport pair, which is + * all the harness's list/call traffic needs. + * + * The engine has already converted each tool's schema to JSON Schema before + * the adapter sees it, and JSON Schema is exactly what MCP's `tools/list` + * wants — so the low-level request handlers pass schemas through verbatim + * instead of round-tripping them through zod. + * + * The caller owns the lifecycle: `close()` must run when the chat stream + * ends (the adapter does this in a `finally`) so the port is never leaked. + */ +export async function startToolBridge( + tools: Array, +): Promise { + const httpServer = createServer((req, res) => { + void (async () => { + if (req.method !== 'POST') { + res.writeHead(405).end() + return + } + const chunks: Array = [] + for await (const chunk of req) { + chunks.push(chunk as Buffer) + } + let parsedBody: unknown + try { + parsedBody = JSON.parse(Buffer.concat(chunks).toString('utf8')) + } catch { + res.writeHead(400).end() + return + } + const mcpServer = createMcpServer(tools) + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: undefined, + }) + res.on('close', () => { + void transport.close() + void mcpServer.close() + }) + await mcpServer.connect(transport) + await transport.handleRequest(req, res, parsedBody) + })().catch(() => { + if (!res.headersSent) res.writeHead(500) + res.end() + }) + }) + + await new Promise((resolve, reject) => { + httpServer.once('error', reject) + httpServer.listen(0, '127.0.0.1', resolve) + }) + + const { port } = httpServer.address() as AddressInfo + + return { + url: `http://127.0.0.1:${port}/mcp`, + close: () => + new Promise((resolve, reject) => { + httpServer.closeAllConnections() + httpServer.close((error) => (error ? reject(error) : resolve())) + }), + } +} diff --git a/packages/ai-gemini-cli/tests/bridge.test.ts b/packages/ai-gemini-cli/tests/bridge.test.ts new file mode 100644 index 000000000..48acf57aa --- /dev/null +++ b/packages/ai-gemini-cli/tests/bridge.test.ts @@ -0,0 +1,108 @@ +import { describe, expect, it } from 'vitest' +import { Client } from '@modelcontextprotocol/sdk/client/index.js' +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js' +import { startToolBridge } from '../src/tools/bridge' +import type { AnyTool } from '@tanstack/ai' + +function makeTool(overrides: Partial = {}): AnyTool { + return { + name: 'echo', + description: 'Echo the input back', + inputSchema: { + type: 'object', + properties: { value: { type: 'string' } }, + }, + execute: async (args: unknown) => args, + ...overrides, + } as unknown as AnyTool +} + +async function connectClient(url: string): Promise { + const client = new Client({ name: 'test-client', version: '1.0.0' }) + await client.connect(new StreamableHTTPClientTransport(new URL(url))) + return client +} + +describe('startToolBridge', () => { + it('listens on an ephemeral localhost port', async () => { + const bridge = await startToolBridge([makeTool()]) + try { + expect(bridge.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+\/mcp$/) + } finally { + await bridge.close() + } + }) + + it('lists tools with their JSON schemas passed through verbatim', async () => { + const bridge = await startToolBridge([makeTool()]) + try { + const client = await connectClient(bridge.url) + const { tools } = await client.listTools() + expect(tools).toHaveLength(1) + expect(tools[0]).toMatchObject({ + name: 'echo', + description: 'Echo the input back', + inputSchema: { + type: 'object', + properties: { value: { type: 'string' } }, + }, + }) + await client.close() + } finally { + await bridge.close() + } + }) + + it('executes tool calls and returns stringified results', async () => { + const bridge = await startToolBridge([ + makeTool({ + execute: async (args: unknown) => ({ + echoed: (args as { value: string }).value, + }), + } as Partial), + ]) + try { + const client = await connectClient(bridge.url) + const result = await client.callTool({ + name: 'echo', + arguments: { value: 'hi' }, + }) + expect(result.content).toEqual([ + { type: 'text', text: JSON.stringify({ echoed: 'hi' }) }, + ]) + await client.close() + } finally { + await bridge.close() + } + }) + + it('returns isError content when the tool throws', async () => { + const bridge = await startToolBridge([ + makeTool({ + execute: async () => { + throw new Error('tool blew up') + }, + } as Partial), + ]) + try { + const client = await connectClient(bridge.url) + const result = await client.callTool({ + name: 'echo', + arguments: {}, + }) + expect(result.isError).toBe(true) + expect(result.content).toEqual([ + { type: 'text', text: 'Tool execution failed: tool blew up' }, + ]) + await client.close() + } finally { + await bridge.close() + } + }) + + it('refuses connections after close()', async () => { + const bridge = await startToolBridge([makeTool()]) + await bridge.close() + await expect(connectClient(bridge.url)).rejects.toThrow() + }) +}) diff --git a/packages/ai-gemini-cli/tests/permissions.test.ts b/packages/ai-gemini-cli/tests/permissions.test.ts new file mode 100644 index 000000000..fd8dfd062 --- /dev/null +++ b/packages/ai-gemini-cli/tests/permissions.test.ts @@ -0,0 +1,103 @@ +import { describe, expect, it } from 'vitest' +import { resolvePermission } from '../src/process/permissions' +import type { AcpPermissionRequest } from '../src/stream/acp-types' + +function makeRequest( + overrides: Partial = {}, +): AcpPermissionRequest { + return { + sessionId: 'sess-1', + toolCall: { + toolCallId: 'tc-1', + title: 'Run shell command', + kind: 'execute', + ...overrides, + }, + options: [ + { optionId: 'allow-once', name: 'Allow once', kind: 'allow_once' }, + { optionId: 'allow-always', name: 'Always allow', kind: 'allow_always' }, + { optionId: 'reject-once', name: 'Reject', kind: 'reject_once' }, + ], + } +} + +describe('resolvePermission', () => { + it('rejects harness tools in default mode', () => { + expect(resolvePermission(makeRequest(), 'default', undefined)).toEqual({ + outcome: 'selected', + optionId: 'reject-once', + }) + }) + + it('allows bridged TanStack tools in every mode', () => { + const request = makeRequest({ + title: 'lookup_user (tanstack MCP Server)', + kind: 'other', + }) + const bridged = new Set(['lookup_user']) + for (const mode of [ + 'default', + 'acceptEdits', + 'bypassPermissions', + ] as const) { + expect(resolvePermission(request, mode, bridged)).toEqual({ + outcome: 'selected', + optionId: 'allow-once', + }) + } + }) + + it('allows edit-kind tools only in acceptEdits and bypassPermissions', () => { + const edit = makeRequest({ title: 'Edit file', kind: 'edit' }) + expect(resolvePermission(edit, 'default', undefined)).toEqual({ + outcome: 'selected', + optionId: 'reject-once', + }) + expect(resolvePermission(edit, 'acceptEdits', undefined)).toEqual({ + outcome: 'selected', + optionId: 'allow-once', + }) + expect(resolvePermission(edit, 'bypassPermissions', undefined)).toEqual({ + outcome: 'selected', + optionId: 'allow-once', + }) + }) + + it('treats move and delete as edits', () => { + for (const kind of ['move', 'delete']) { + expect( + resolvePermission(makeRequest({ kind }), 'acceptEdits', undefined), + ).toEqual({ outcome: 'selected', optionId: 'allow-once' }) + } + }) + + it('does not auto-approve execute tools in acceptEdits mode', () => { + expect( + resolvePermission( + makeRequest({ kind: 'execute' }), + 'acceptEdits', + undefined, + ), + ).toEqual({ outcome: 'selected', optionId: 'reject-once' }) + }) + + it('allows everything in bypassPermissions mode', () => { + expect( + resolvePermission(makeRequest(), 'bypassPermissions', undefined), + ).toEqual({ outcome: 'selected', optionId: 'allow-once' }) + }) + + it('falls back through option kinds and cancels when nothing matches', () => { + const request: AcpPermissionRequest = { + ...makeRequest(), + options: [{ optionId: 'always', name: 'Always', kind: 'allow_always' }], + } + expect(resolvePermission(request, 'bypassPermissions', undefined)).toEqual({ + outcome: 'selected', + optionId: 'always', + }) + expect(resolvePermission(request, 'default', undefined)).toEqual({ + outcome: 'cancelled', + }) + }) +}) diff --git a/packages/ai-gemini-cli/tests/prompt.test.ts b/packages/ai-gemini-cli/tests/prompt.test.ts new file mode 100644 index 000000000..6e8dfcdf3 --- /dev/null +++ b/packages/ai-gemini-cli/tests/prompt.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from 'vitest' +import { buildPrompt } from '../src/messages/prompt' +import type { ModelMessage } from '@tanstack/ai' + +const user = (content: ModelMessage['content']): ModelMessage => ({ + role: 'user', + content, +}) +const assistant = (content: ModelMessage['content']): ModelMessage => ({ + role: 'assistant', + content, +}) + +describe('buildPrompt', () => { + it('resumes with only the last user message when sessionId is provided', () => { + const result = buildPrompt( + [ + user('first question'), + assistant('first answer'), + user('follow-up question'), + ], + 'sess-1', + ) + expect(result).toEqual({ + prompt: 'follow-up question', + resume: 'sess-1', + }) + }) + + it('throws when sessionId is provided but there is no trailing user message', () => { + expect(() => buildPrompt([user('q'), assistant('a')], 'sess-1')).toThrow( + /user message/i, + ) + }) + + it('sends a single user message as-is for a fresh session', () => { + expect(buildPrompt([user('hello')], undefined)).toEqual({ + prompt: 'hello', + }) + }) + + it('flattens prior turns into a transcript preamble for fresh multi-turn history', () => { + const { prompt, resume } = buildPrompt( + [user('What is 2+2?'), assistant('4'), user('And times 3?')], + undefined, + ) + expect(resume).toBeUndefined() + expect(prompt).toBe( + 'Previous conversation:\nUser: What is 2+2?\nAssistant: 4\n\nAnd times 3?', + ) + }) + + it('skips tool messages and assistant tool-call-only turns when flattening', () => { + const messages: Array = [ + user('list files'), + { + role: 'assistant', + content: null, + toolCalls: [ + { + id: 't1', + type: 'function', + function: { name: 'ls', arguments: '{}' }, + }, + ], + } as unknown as ModelMessage, + { role: 'tool', content: 'file-a', toolCallId: 't1' }, + assistant('There is one file.'), + user('thanks, which one?'), + ] + const { prompt } = buildPrompt(messages, undefined) + expect(prompt).toBe( + 'Previous conversation:\nUser: list files\nAssistant: There is one file.\n\nthanks, which one?', + ) + }) + + it('extracts text from content-part arrays and ignores non-text parts', () => { + const { prompt } = buildPrompt( + [ + user([ + { type: 'text', content: 'describe ' }, + { + type: 'image', + source: { type: 'url', url: 'https://x/y.png' }, + } as never, + { type: 'text', content: 'this' }, + ] as ModelMessage['content']), + ], + undefined, + ) + expect(prompt).toBe('describe this') + }) + + it('throws when there is no usable user content at all', () => { + expect(() => buildPrompt([], undefined)).toThrow(/user message/i) + }) +}) diff --git a/packages/ai-gemini-cli/tests/text-adapter.test.ts b/packages/ai-gemini-cli/tests/text-adapter.test.ts new file mode 100644 index 000000000..137f4426b --- /dev/null +++ b/packages/ai-gemini-cli/tests/text-adapter.test.ts @@ -0,0 +1,562 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { startAcpSession } from '../src/process/acp-client' +import { startToolBridge } from '../src/tools/bridge' +import { geminiCliText } from '../src/adapters/text' +import type { StartAcpSessionOptions } from '../src/process/acp-client' +import type { AcpStopReason, AcpUsage } from '../src/stream/acp-types' +import type { InternalLogger } from '@tanstack/ai/adapter-internals' +import type { StreamChunk, TextOptions } from '@tanstack/ai' + +vi.mock('../src/process/acp-client', () => ({ + startAcpSession: vi.fn(), +})) +vi.mock('../src/tools/bridge', () => ({ + startToolBridge: vi.fn(), +})) + +const startAcpSessionMock = vi.mocked(startAcpSession) +const startToolBridgeMock = vi.mocked(startToolBridge) + +const cancelMock = vi.fn() +const disposeMock = vi.fn() +const bridgeCloseMock = vi.fn() + +interface ScriptedTurn { + updates?: Array[0]> + stopReason?: AcpStopReason + usage?: AcpUsage + resumed?: boolean + sessionId?: string + promptError?: Error +} + +/** Captured options from the most recent startAcpSession call. */ +let capturedOptions: StartAcpSessionOptions | undefined + +function scriptSession(turn: ScriptedTurn = {}) { + startAcpSessionMock.mockImplementation((options) => { + capturedOptions = options + return Promise.resolve({ + sessionId: turn.sessionId ?? 'sess-1', + resumed: turn.resumed ?? false, + prompt: (text: string) => { + void text + if (turn.promptError) return Promise.reject(turn.promptError) + for (const update of turn.updates ?? []) { + options.onUpdate(update) + } + return Promise.resolve({ + stopReason: turn.stopReason ?? 'end_turn', + ...(turn.usage !== undefined && { usage: turn.usage }), + }) + }, + cancel: cancelMock, + dispose: disposeMock, + }) + }) +} + +const noopLogger = { + request: vi.fn(), + provider: vi.fn(), + output: vi.fn(), + errors: vi.fn(), + middleware: vi.fn(), + tools: vi.fn(), + agentLoop: vi.fn(), + config: vi.fn(), + isEnabled: () => false, +} as unknown as InternalLogger + +function makeOptions( + overrides: Partial>> = {}, +): TextOptions> { + return { + model: 'gemini-3-pro-preview', + messages: [{ role: 'user', content: 'hello' }], + logger: noopLogger, + ...overrides, + } as TextOptions> +} + +async function collect( + stream: AsyncIterable, +): Promise> { + const chunks: Array = [] + for await (const chunk of stream) chunks.push(chunk) + return chunks +} + +const textUpdate = (text: string) => ({ + sessionUpdate: 'agent_message_chunk' as const, + content: { type: 'text' as const, text }, +}) + +beforeEach(() => { + startAcpSessionMock.mockReset() + startToolBridgeMock.mockReset() + cancelMock.mockReset() + disposeMock.mockReset() + bridgeCloseMock.mockReset() + capturedOptions = undefined + startToolBridgeMock.mockResolvedValue({ + url: 'http://127.0.0.1:7777/mcp', + close: bridgeCloseMock, + }) + scriptSession({ updates: [textUpdate('hi there')] }) +}) + +describe('geminiCliText', () => { + it('creates an adapter with the gemini-cli provider name', () => { + const adapter = geminiCliText('gemini-3-pro-preview') + expect(adapter.kind).toBe('text') + expect(adapter.name).toBe('gemini-cli') + expect(adapter.model).toBe('gemini-3-pro-preview') + }) +}) + +describe('chatStream', () => { + it('streams translated AG-UI events for a simple turn', async () => { + const adapter = geminiCliText('gemini-3-pro-preview') + const chunks = await collect(adapter.chatStream(makeOptions())) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks.at(-1)).toMatchObject({ finishReason: 'stop' }) + }) + + it('spawns with the configured model, cwd, and executable', async () => { + const adapter = geminiCliText('gemini-3-pro-preview', { + cwd: '/workspace', + executablePath: '/opt/bin/gemini', + extraArgs: ['--sandbox'], + }) + await collect(adapter.chatStream(makeOptions())) + expect(capturedOptions).toMatchObject({ + model: 'gemini-3-pro-preview', + cwd: '/workspace', + executablePath: '/opt/bin/gemini', + extraArgs: ['--sandbox'], + }) + }) + + it('passes the configured ACP auth method through to the session', async () => { + const adapter = geminiCliText('gemini-3-pro-preview', { + authMethodId: 'oauth-personal', + }) + await collect(adapter.chatStream(makeOptions())) + expect(capturedOptions).toMatchObject({ authMethodId: 'oauth-personal' }) + }) + + it('lets modelOptions override the configured ACP auth method', async () => { + const adapter = geminiCliText('gemini-3-pro-preview', { + authMethodId: 'oauth-personal', + }) + await collect( + adapter.chatStream( + makeOptions({ modelOptions: { authMethodId: 'gemini-api-key' } }), + ), + ) + expect(capturedOptions).toMatchObject({ authMethodId: 'gemini-api-key' }) + }) + + it('omits authMethodId when none is configured', async () => { + const adapter = geminiCliText('gemini-3-pro-preview') + await collect(adapter.chatStream(makeOptions())) + expect(capturedOptions?.authMethodId).toBeUndefined() + }) + + it('sends only the trailing user message and requests resume with a sessionId', async () => { + scriptSession({ resumed: true, sessionId: 'sess-prior' }) + const promptSpy = vi.fn() + startAcpSessionMock.mockImplementation((options) => { + capturedOptions = options + return Promise.resolve({ + sessionId: 'sess-prior', + resumed: true, + prompt: (text: string) => { + promptSpy(text) + return Promise.resolve({ stopReason: 'end_turn' as const }) + }, + cancel: cancelMock, + dispose: disposeMock, + }) + }) + + const adapter = geminiCliText('gemini-3-pro-preview') + await collect( + adapter.chatStream( + makeOptions({ + messages: [ + { role: 'user', content: 'first' }, + { role: 'assistant', content: 'answer' }, + { role: 'user', content: 'follow-up' }, + ], + modelOptions: { sessionId: 'sess-prior' }, + }), + ), + ) + expect(capturedOptions).toMatchObject({ resumeSessionId: 'sess-prior' }) + expect(promptSpy).toHaveBeenCalledWith('follow-up') + }) + + it('falls back to the flattened transcript when resume is unavailable', async () => { + const promptSpy = vi.fn() + startAcpSessionMock.mockImplementation((options) => { + capturedOptions = options + return Promise.resolve({ + sessionId: 'sess-fresh', + resumed: false, + prompt: (text: string) => { + promptSpy(text) + return Promise.resolve({ stopReason: 'end_turn' as const }) + }, + cancel: cancelMock, + dispose: disposeMock, + }) + }) + + const adapter = geminiCliText('gemini-3-pro-preview') + const chunks = await collect( + adapter.chatStream( + makeOptions({ + messages: [ + { role: 'user', content: 'first' }, + { role: 'assistant', content: 'answer' }, + { role: 'user', content: 'follow-up' }, + ], + modelOptions: { sessionId: 'sess-gone' }, + }), + ), + ) + expect(promptSpy).toHaveBeenCalledWith( + 'Previous conversation:\nUser: first\nAssistant: answer\n\nfollow-up', + ) + // The fresh session id is surfaced so the client can re-sync. + expect(chunks.find((c) => c.type === 'CUSTOM')).toMatchObject({ + value: { sessionId: 'sess-fresh' }, + }) + }) + + it('prepends system prompts to the prompt text', async () => { + const promptSpy = vi.fn() + startAcpSessionMock.mockImplementation((options) => { + capturedOptions = options + return Promise.resolve({ + sessionId: 'sess-1', + resumed: false, + prompt: (text: string) => { + promptSpy(text) + return Promise.resolve({ stopReason: 'end_turn' as const }) + }, + cancel: cancelMock, + dispose: disposeMock, + }) + }) + const adapter = geminiCliText('gemini-3-pro-preview') + await collect( + adapter.chatStream( + makeOptions({ systemPrompts: ['Be terse.', 'Use tabs.'] }), + ), + ) + expect(promptSpy).toHaveBeenCalledWith('Be terse.\n\nUse tabs.\n\nhello') + }) + + it('starts the MCP bridge and hands its URL to the session when tools are passed', async () => { + const adapter = geminiCliText('gemini-3-pro-preview') + await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'lookup_user', + description: 'Look up a user', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ ok: true }), + } as never, + ], + }), + ), + ) + expect(startToolBridgeMock).toHaveBeenCalledTimes(1) + expect(capturedOptions).toMatchObject({ + mcpServers: [{ name: 'tanstack', url: 'http://127.0.0.1:7777/mcp' }], + }) + expect(bridgeCloseMock).toHaveBeenCalledTimes(1) + }) + + it('does not start the bridge when no tools are passed', async () => { + const adapter = geminiCliText('gemini-3-pro-preview') + await collect(adapter.chatStream(makeOptions())) + expect(startToolBridgeMock).not.toHaveBeenCalled() + expect(capturedOptions?.mcpServers).toBeUndefined() + }) + + it('wires the default permission policy through the session options', async () => { + const adapter = geminiCliText('gemini-3-pro-preview', { + permissionMode: 'acceptEdits', + }) + await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'lookup_user', + description: 'Look up a user', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ ok: true }), + } as never, + ], + }), + ), + ) + + const handler = capturedOptions!.onPermissionRequest + const options = [ + { optionId: 'yes', name: 'Allow', kind: 'allow_once' as const }, + { optionId: 'no', name: 'Reject', kind: 'reject_once' as const }, + ] + + await expect( + Promise.resolve( + handler({ + sessionId: 's', + toolCall: { + toolCallId: 't1', + title: 'lookup_user (tanstack MCP Server)', + kind: 'other', + }, + options, + }), + ), + ).resolves.toEqual({ outcome: 'selected', optionId: 'yes' }) + + await expect( + Promise.resolve( + handler({ + sessionId: 's', + toolCall: { toolCallId: 't2', title: 'Edit file', kind: 'edit' }, + options, + }), + ), + ).resolves.toEqual({ outcome: 'selected', optionId: 'yes' }) + + await expect( + Promise.resolve( + handler({ + sessionId: 's', + toolCall: { toolCallId: 't3', title: 'Run command', kind: 'execute' }, + options, + }), + ), + ).resolves.toEqual({ outcome: 'selected', optionId: 'no' }) + }) + + it('lets a custom onPermissionRequest replace the default policy', async () => { + const custom = vi.fn().mockResolvedValue({ outcome: 'cancelled' }) + const adapter = geminiCliText('gemini-3-pro-preview', { + onPermissionRequest: custom, + }) + await collect(adapter.chatStream(makeOptions())) + expect(capturedOptions!.onPermissionRequest).toBe(custom) + }) + + it('cancels the harness turn when the abort signal fires', async () => { + let resolvePrompt: (() => void) | undefined + startAcpSessionMock.mockImplementation((options) => { + capturedOptions = options + return Promise.resolve({ + sessionId: 'sess-1', + resumed: false, + prompt: () => + new Promise((resolve) => { + resolvePrompt = () => resolve({ stopReason: 'cancelled' as const }) + }), + cancel: cancelMock.mockImplementation(() => { + resolvePrompt?.() + return Promise.resolve() + }), + dispose: disposeMock, + }) + }) + + const controller = new AbortController() + const adapter = geminiCliText('gemini-3-pro-preview') + const collected = collect( + adapter.chatStream(makeOptions({ abortController: controller })), + ) + // Give the stream a beat to start the session, then abort. + await new Promise((resolve) => setTimeout(resolve, 0)) + controller.abort() + const chunks = await collected + expect(cancelMock).toHaveBeenCalled() + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_FINISHED', + finishReason: 'stop', + }) + }) + + it('disposes the session after the stream completes', async () => { + const adapter = geminiCliText('gemini-3-pro-preview') + await collect(adapter.chatStream(makeOptions())) + expect(disposeMock).toHaveBeenCalledTimes(1) + }) + + it('emits RUN_ERROR and disposes when the prompt fails', async () => { + scriptSession({ promptError: new Error('connection lost') }) + const adapter = geminiCliText('gemini-3-pro-preview') + const chunks = await collect(adapter.chatStream(makeOptions())) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'connection lost', + }) + expect(disposeMock).toHaveBeenCalledTimes(1) + }) + + it('emits RUN_ERROR when the CLI cannot be spawned', async () => { + startAcpSessionMock.mockRejectedValue(new Error('gemini not found')) + const adapter = geminiCliText('gemini-3-pro-preview') + const chunks = await collect(adapter.chatStream(makeOptions())) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'gemini not found', + }) + }) + + it('emits RUN_ERROR for client-side tools (no execute)', async () => { + const adapter = geminiCliText('gemini-3-pro-preview') + const chunks = await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'client_only', + description: 'runs in browser', + inputSchema: { type: 'object', properties: {} }, + } as never, + ], + }), + ), + ) + expect(startAcpSessionMock).not.toHaveBeenCalled() + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + expect((chunks.at(-1) as { message: string }).message).toMatch( + /client-side/i, + ) + }) + + it('emits RUN_ERROR for approval-gated tools', async () => { + const adapter = geminiCliText('gemini-3-pro-preview') + const chunks = await collect( + adapter.chatStream( + makeOptions({ + tools: [ + { + name: 'needs_ok', + description: 'requires approval', + inputSchema: { type: 'object', properties: {} }, + execute: async () => 'x', + needsApproval: true, + } as never, + ], + }), + ), + ) + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + }) +}) + +describe('structuredOutput', () => { + it('embeds the schema in the prompt and parses the JSON response', async () => { + const promptSpy = vi.fn() + startAcpSessionMock.mockImplementation((options) => { + capturedOptions = options + return Promise.resolve({ + sessionId: 'sess-so', + resumed: false, + prompt: (text: string) => { + promptSpy(text) + options.onUpdate(textUpdate('{"answer":42}')) + return Promise.resolve({ + stopReason: 'end_turn' as const, + usage: { inputTokens: 7, outputTokens: 3, totalTokens: 10 }, + }) + }, + cancel: cancelMock, + dispose: disposeMock, + }) + }) + const adapter = geminiCliText('gemini-3-pro-preview') + const result = await adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { + type: 'object', + properties: { answer: { type: 'number' } }, + }, + }) + expect(result.data).toEqual({ answer: 42 }) + expect(result.rawText).toBe('{"answer":42}') + expect(result.usage).toMatchObject({ + promptTokens: 7, + completionTokens: 3, + totalTokens: 10, + }) + expect(promptSpy.mock.calls[0]![0]).toContain('JSON Schema') + expect(promptSpy.mock.calls[0]![0]).toContain('"answer"') + expect(disposeMock).toHaveBeenCalledTimes(1) + }) + + it('strips markdown fences from the response', async () => { + scriptSession({ + updates: [textUpdate('```json\n{"answer":7}\n```')], + }) + const adapter = geminiCliText('gemini-3-pro-preview') + const result = await adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }) + expect(result.data).toEqual({ answer: 7 }) + }) + + it('extracts JSON embedded in prose', async () => { + scriptSession({ + updates: [textUpdate('Sure! Here you go: {"answer":1} Hope that helps.')], + }) + const adapter = geminiCliText('gemini-3-pro-preview') + const result = await adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }) + expect(result.data).toEqual({ answer: 1 }) + }) + + it('throws when the harness refuses', async () => { + scriptSession({ + updates: [textUpdate('no')], + stopReason: 'refusal', + }) + const adapter = geminiCliText('gemini-3-pro-preview') + await expect( + adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }), + ).rejects.toThrow(/refused/) + }) + + it('throws when the run produces no text', async () => { + scriptSession({ updates: [] }) + const adapter = geminiCliText('gemini-3-pro-preview') + await expect( + adapter.structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }), + ).rejects.toThrow(/without a response/) + }) +}) diff --git a/packages/ai-gemini-cli/tests/translate.test.ts b/packages/ai-gemini-cli/tests/translate.test.ts new file mode 100644 index 000000000..d93a7c7b7 --- /dev/null +++ b/packages/ai-gemini-cli/tests/translate.test.ts @@ -0,0 +1,435 @@ +import { describe, expect, it } from 'vitest' +import { + PLAN_EVENT, + SESSION_ID_EVENT, + matchBridgedToolName, + translateAcpStream, +} from '../src/stream/translate' +import type { AcpStreamEvent, TranslateContext } from '../src/stream/translate' +import type { StreamChunk } from '@tanstack/ai' + +function makeCtx(overrides: Partial = {}): TranslateContext { + let id = 0 + return { + model: 'gemini-3-pro-preview', + runId: 'run-1', + threadId: 'thread-1', + genId: () => `gen-${++id}`, + ...overrides, + } +} + +async function* fromArray( + events: Array, +): AsyncIterable { + for (const event of events) yield event +} + +async function collect( + events: Array, + ctx: TranslateContext = makeCtx(), +): Promise> { + const chunks: Array = [] + for await (const chunk of translateAcpStream(fromArray(events), ctx)) { + chunks.push(chunk) + } + return chunks +} + +const session: AcpStreamEvent = { kind: 'session', sessionId: 'sess-1' } +const done: AcpStreamEvent = { kind: 'done', stopReason: 'end_turn' } + +function text(value: string): AcpStreamEvent { + return { + kind: 'update', + update: { + sessionUpdate: 'agent_message_chunk', + content: { type: 'text', text: value }, + }, + } +} + +function thought(value: string): AcpStreamEvent { + return { + kind: 'update', + update: { + sessionUpdate: 'agent_thought_chunk', + content: { type: 'text', text: value }, + }, + } +} + +describe('translateAcpStream', () => { + it('translates streamed text deltas into one accumulated message', async () => { + const chunks = await collect([session, text('Hel'), text('lo'), done]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks[1]).toMatchObject({ + name: SESSION_ID_EVENT, + value: { sessionId: 'sess-1' }, + }) + expect(chunks[3]).toMatchObject({ delta: 'Hel', content: 'Hel' }) + expect(chunks[4]).toMatchObject({ delta: 'lo', content: 'Hello' }) + expect(chunks.at(-1)).toMatchObject({ finishReason: 'stop' }) + }) + + it('translates thought chunks into reasoning events', async () => { + const chunks = await collect([ + session, + thought('hmm '), + thought('ok'), + text('answer'), + done, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'REASONING_START', + 'REASONING_MESSAGE_START', + 'REASONING_MESSAGE_CONTENT', + 'REASONING_MESSAGE_CONTENT', + 'REASONING_MESSAGE_END', + 'REASONING_END', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + }) + + it('closes an open text message when a tool call interleaves, then reopens', async () => { + const chunks = await collect([ + session, + text('Let me check. '), + { + kind: 'update', + update: { + sessionUpdate: 'tool_call', + toolCallId: 'tc-1', + title: 'Reading file', + kind: 'read', + status: 'in_progress', + rawInput: { path: 'a.ts' }, + }, + }, + { + kind: 'update', + update: { + sessionUpdate: 'tool_call_update', + toolCallId: 'tc-1', + status: 'completed', + rawOutput: 'contents', + }, + }, + text('Done.'), + done, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'TOOL_CALL_START', + 'TOOL_CALL_ARGS', + 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks[5]).toMatchObject({ + toolCallId: 'tc-1', + toolCallName: 'read', + }) + expect(chunks[6]).toMatchObject({ + args: JSON.stringify({ title: 'Reading file', path: 'a.ts' }), + }) + expect(chunks[8]).toMatchObject({ content: 'contents' }) + }) + + it('marks failed tool calls as output-error', async () => { + const chunks = await collect([ + session, + { + kind: 'update', + update: { + sessionUpdate: 'tool_call', + toolCallId: 'tc-2', + kind: 'execute', + status: 'in_progress', + }, + }, + { + kind: 'update', + update: { + sessionUpdate: 'tool_call_update', + toolCallId: 'tc-2', + status: 'failed', + rawOutput: { error: 'denied' }, + }, + }, + done, + ]) + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + state: 'output-error', + content: JSON.stringify({ error: 'denied' }), + }) + }) + + it('resolves a tool_call that arrives already completed', async () => { + const chunks = await collect([ + session, + { + kind: 'update', + update: { + sessionUpdate: 'tool_call', + toolCallId: 'tc-3', + kind: 'search', + status: 'completed', + content: [ + { type: 'content', content: { type: 'text', text: 'found it' } }, + ], + }, + }, + done, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TOOL_CALL_START', + 'TOOL_CALL_ARGS', + 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', + 'RUN_FINISHED', + ]) + expect(chunks[5]).toMatchObject({ content: 'found it' }) + }) + + it('opens a synthetic pair for a tool_call_update with an unknown id', async () => { + const chunks = await collect([ + session, + { + kind: 'update', + update: { + sessionUpdate: 'tool_call_update', + toolCallId: 'tc-mystery', + status: 'completed', + rawOutput: 'late result', + }, + }, + done, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TOOL_CALL_START', + 'TOOL_CALL_ARGS', + 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', + 'RUN_FINISHED', + ]) + }) + + it('surfaces bridged TanStack tool calls under their registered names', async () => { + const chunks = await collect( + [ + session, + { + kind: 'update', + update: { + sessionUpdate: 'tool_call', + toolCallId: 'tc-4', + title: 'lookup_user (tanstack MCP Server)', + kind: 'other', + status: 'completed', + rawOutput: '{"name":"Ada"}', + }, + }, + done, + ], + makeCtx({ bridgedToolNames: new Set(['lookup_user']) }), + ) + expect(chunks.find((c) => c.type === 'TOOL_CALL_START')).toMatchObject({ + toolCallName: 'lookup_user', + }) + }) + + it('emits plan updates as CUSTOM events', async () => { + const chunks = await collect([ + session, + { + kind: 'update', + update: { + sessionUpdate: 'plan', + entries: [{ content: 'step 1', status: 'pending' }], + }, + }, + done, + ]) + expect(chunks[2]).toMatchObject({ + type: 'CUSTOM', + name: PLAN_EVENT, + value: { entries: [{ content: 'step 1', status: 'pending' }] }, + }) + }) + + it('maps max_tokens and max_turn_requests to finishReason length', async () => { + for (const stopReason of ['max_tokens', 'max_turn_requests'] as const) { + const chunks = await collect([session, { kind: 'done', stopReason }]) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_FINISHED', + finishReason: 'length', + }) + } + }) + + it('maps cancelled to a normal stop', async () => { + const chunks = await collect([ + session, + { kind: 'done', stopReason: 'cancelled' }, + ]) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_FINISHED', + finishReason: 'stop', + }) + }) + + it('maps refusal to RUN_ERROR', async () => { + const chunks = await collect([ + session, + { kind: 'done', stopReason: 'refusal' }, + ]) + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR', code: 'refusal' }) + }) + + it('reports usage from the prompt response when present', async () => { + const chunks = await collect([ + session, + { + kind: 'done', + stopReason: 'end_turn', + usage: { + inputTokens: 50, + outputTokens: 10, + totalTokens: 60, + cachedReadTokens: 20, + thoughtTokens: 4, + }, + }, + ]) + const finished = chunks.at(-1) as unknown as { + usage: Record + } + expect(finished.usage).toMatchObject({ + promptTokens: 50, + completionTokens: 10, + totalTokens: 60, + promptTokensDetails: { cachedTokens: 20 }, + completionTokensDetails: { reasoningTokens: 4 }, + }) + }) + + it('omits usage when the harness reports none', async () => { + const chunks = await collect([session, done]) + expect( + (chunks.at(-1) as unknown as { usage?: unknown }).usage, + ).toBeUndefined() + }) + + it('closes open messages and synthesizes results before finishing', async () => { + const chunks = await collect([ + session, + text('working...'), + { + kind: 'update', + update: { + sessionUpdate: 'tool_call', + toolCallId: 'tc-5', + kind: 'execute', + status: 'in_progress', + }, + }, + done, + ]) + const types: Array = chunks.map((c) => c.type) + expect(types.indexOf('TOOL_CALL_RESULT')).toBeGreaterThan(-1) + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + content: JSON.stringify({ status: 'interrupted' }), + }) + expect(types.at(-1)).toBe('RUN_FINISHED') + }) + + it('synthesizes results then rethrows when the source stream throws', async () => { + async function* failing(): AsyncIterable { + yield session + yield { + kind: 'update', + update: { + sessionUpdate: 'tool_call', + toolCallId: 'tc-6', + kind: 'execute', + status: 'in_progress', + }, + } + throw new Error('process died') + } + + const chunks: Array = [] + await expect(async () => { + for await (const chunk of translateAcpStream(failing(), makeCtx())) { + chunks.push(chunk) + } + }).rejects.toThrow('process died') + expect(chunks.at(-1)).toMatchObject({ + type: 'TOOL_CALL_RESULT', + toolCallId: 'tc-6', + content: JSON.stringify({ status: 'interrupted' }), + }) + }) + + it('ignores harness-internal update types', async () => { + const chunks = await collect([ + session, + { + kind: 'update', + update: { sessionUpdate: 'available_commands_update' }, + }, + { kind: 'update', update: { sessionUpdate: 'current_mode_update' } }, + done, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'RUN_FINISHED', + ]) + }) +}) + +describe('matchBridgedToolName', () => { + const names = new Set(['lookup_user', 'get_weather']) + + it('matches exact tool names', () => { + expect(matchBridgedToolName('lookup_user', names)).toBe('lookup_user') + }) + + it('matches server-suffixed titles', () => { + expect( + matchBridgedToolName('get_weather (tanstack MCP Server)', names), + ).toBe('get_weather') + }) + + it('returns undefined for unrelated titles', () => { + expect(matchBridgedToolName('Run shell command', names)).toBeUndefined() + expect(matchBridgedToolName(undefined, names)).toBeUndefined() + expect(matchBridgedToolName('lookup_user', undefined)).toBeUndefined() + }) +}) diff --git a/packages/ai-gemini-cli/tsconfig.json b/packages/ai-gemini-cli/tsconfig.json new file mode 100644 index 000000000..c38689f4e --- /dev/null +++ b/packages/ai-gemini-cli/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist" + }, + "include": ["src", "tests"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/ai-gemini-cli/vite.config.ts b/packages/ai-gemini-cli/vite.config.ts new file mode 100644 index 000000000..11f5b20b7 --- /dev/null +++ b/packages/ai-gemini-cli/vite.config.ts @@ -0,0 +1,37 @@ +import { defineConfig, mergeConfig } from 'vitest/config' +import { tanstackViteConfig } from '@tanstack/vite-config' +import packageJson from './package.json' + +const config = defineConfig({ + test: { + name: packageJson.name, + dir: './', + watch: false, + + globals: true, + environment: 'node', + include: ['tests/**/*.test.ts'], + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html', 'lcov'], + exclude: [ + 'node_modules/', + 'dist/', + 'tests/', + '**/*.test.ts', + '**/*.config.ts', + '**/types.ts', + ], + include: ['src/**/*.ts'], + }, + }, +}) + +export default mergeConfig( + config, + tanstackViteConfig({ + entry: ['./src/index.ts'], + srcDir: './src', + cjs: false, + }), +) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 07b67f744..667bdf441 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -494,6 +494,12 @@ importers: '@tanstack/ai-client': specifier: workspace:* version: link:../../packages/ai-client + '@tanstack/ai-codex': + specifier: workspace:* + version: link:../../packages/ai-codex + '@tanstack/ai-gemini-cli': + specifier: workspace:* + version: link:../../packages/ai-gemini-cli '@tanstack/ai-react': specifier: workspace:* version: link:../../packages/ai-react @@ -1230,6 +1236,22 @@ importers: specifier: ^4.21.0 version: 4.21.0 + packages/ai-codex: + dependencies: + '@modelcontextprotocol/sdk': + specifier: ^1.29.0 + version: 1.29.0(zod@4.3.6) + '@openai/codex-sdk': + specifier: ^0.139.0 + version: 0.139.0 + devDependencies: + '@tanstack/ai': + specifier: workspace:* + version: link:../ai + '@vitest/coverage-v8': + specifier: 4.0.14 + version: 4.0.14(vitest@4.0.14(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.15))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + packages/ai-devtools: dependencies: '@tanstack/ai': @@ -1346,6 +1368,22 @@ importers: specifier: ^4.2.0 version: 4.3.6 + packages/ai-gemini-cli: + dependencies: + '@agentclientprotocol/sdk': + specifier: ^0.25.0 + version: 0.25.0(zod@4.3.6) + '@modelcontextprotocol/sdk': + specifier: ^1.29.0 + version: 1.29.0(zod@4.3.6) + devDependencies: + '@tanstack/ai': + specifier: workspace:* + version: link:../ai + '@vitest/coverage-v8': + specifier: 4.0.14 + version: 4.0.14(vitest@4.0.14(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.15))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + packages/ai-grok: dependencies: '@tanstack/ai-utils': @@ -2203,6 +2241,11 @@ packages: '@ag-ui/core@0.0.52': resolution: {integrity: sha512-Xo0bUaNV56EqylzcrAuhUkQX7et7+SZIrqZZtEByGwEq/I1EHny6ZMkWHLkKR7UNi0FJZwJyhKYmKJS3B2SEgA==} + '@agentclientprotocol/sdk@0.25.0': + resolution: {integrity: sha512-wU1VgXNtMvdVotX49txc3WJUDV+/QbLpsgjMvFhlRmp37osdLbI7L7y+iwAlQATwfjLxcv1r1p3ZxZBcXlGhcQ==} + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.176': resolution: {integrity: sha512-QZLVv9Hlo5W7YEV23eTsAKYQTkA1V7TG4Z5oFESgvkVfx02TxguZKtUbqlpmzZ9JqXRu+qMY9iIpIgbI3PwRJw==} cpu: [arm64] @@ -4536,6 +4579,51 @@ packages: resolution: {integrity: sha512-T8TbSnGsxo6TDBJx/Sgv/BlVJL3tshxZP7Aq5R1mSnM5OcHY2dQaxLMu2+E8u3gN0MLOzdjurqN4ZRVuzQycOQ==} engines: {node: '>=8.0'} + '@openai/codex-sdk@0.139.0': + resolution: {integrity: sha512-r4lDckaVx4mVZy1v/7ykEhkeWjVfM/4oGJfhG0AP4+zN3Sa+jcf5hdY4EHfJasofBcp0tIF/7JCKHpv534R+tw==} + engines: {node: '>=18'} + + '@openai/codex@0.139.0': + resolution: {integrity: sha512-wr2fRE+fzW0CjEbfFsLh1ftarVEcw0CMLWS7QyA0nyOz5qacQPVq3cq2+/U7oEbwm1TOqoi0Fm1nxniB5FkpmA==} + engines: {node: '>=16'} + hasBin: true + + '@openai/codex@0.139.0-darwin-arm64': + resolution: {integrity: sha512-o+0ZKWwgDFMMLO7rwinzO0PQsgK+Vme1pMN2GeAxsX29ZgGZcyPICfpJbeGSUO1mb2a36Skjx6nfdRnxMY0r7w==} + engines: {node: '>=16'} + cpu: [arm64] + os: [darwin] + + '@openai/codex@0.139.0-darwin-x64': + resolution: {integrity: sha512-9gkBWzu6DB2rqU4DbpxD3DE5bofGpsK46Lp0h0I+bKWc2IIcxvSi8K2utKmBLoJCbKrn4JQu7dFNGRqEfENung==} + engines: {node: '>=16'} + cpu: [x64] + os: [darwin] + + '@openai/codex@0.139.0-linux-arm64': + resolution: {integrity: sha512-tBQE5lZciRHeWZGuURgjP9S717MvTIpQMc593+DNxY2LQxozkngOkzFSQd1+/UmQKGrCqdFLu5irIwPXpSZyEw==} + engines: {node: '>=16'} + cpu: [arm64] + os: [linux] + + '@openai/codex@0.139.0-linux-x64': + resolution: {integrity: sha512-14UgzDS+X4crkvdt6S02A/ZZOrS8ZyWiuTRpguCtnhNamb7unSuDxy86BWgpAl3sqiTaN2CP8VLyp2ohQ8Nbzw==} + engines: {node: '>=16'} + cpu: [x64] + os: [linux] + + '@openai/codex@0.139.0-win32-arm64': + resolution: {integrity: sha512-nlwRjsYotH1Rtqu/Q0VwQbIeO2UX1mkHK84Ov9qn/hl29QqqoBtno0tRyqIPbkXFIVQuWiAYXlV3ugLwH5fTrQ==} + engines: {node: '>=16'} + cpu: [arm64] + os: [win32] + + '@openai/codex@0.139.0-win32-x64': + resolution: {integrity: sha512-lQrVLNz+90wdvWVNFDvCkHQRiAK9ZllmkTka3c8eqSDqdJk35Gpgppfv9Xtw5M2ZBtTq0sBdWBiCMyzGDBSpmQ==} + engines: {node: '>=16'} + cpu: [x64] + os: [win32] + '@openrouter/sdk@0.12.35': resolution: {integrity: sha512-s4QVLLnG1AmfW3TjnnHUqGfsCkzwVK+kboGcZmKbde09m1DPqgzl4RUFt/HJ5v97MX8aEaN0UG3mKv2S+qj2Gw==} @@ -10060,10 +10148,6 @@ packages: resolution: {integrity: sha512-tAAg/72/VxOUW7RQSX1pIxJVucYKcjFjfvj60L57jrZpYCHC3XN0WCQ3sNYL4Gmvv+7GPvTAjc+KSdeNuE8oWQ==} engines: {node: '>=12.22.0'} - ip-address@10.1.0: - resolution: {integrity: sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==} - engines: {node: '>= 12'} - ip-address@10.2.0: resolution: {integrity: sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==} engines: {node: '>= 12'} @@ -14082,6 +14166,10 @@ snapshots: dependencies: zod: 3.25.76 + '@agentclientprotocol/sdk@0.25.0(zod@4.3.6)': + dependencies: + zod: 4.3.6 + '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.176': optional: true @@ -16457,6 +16545,37 @@ snapshots: '@oozcitak/util@8.3.8': {} + '@openai/codex-sdk@0.139.0': + dependencies: + '@openai/codex': 0.139.0 + + '@openai/codex@0.139.0': + optionalDependencies: + '@openai/codex-darwin-arm64': '@openai/codex@0.139.0-darwin-arm64' + '@openai/codex-darwin-x64': '@openai/codex@0.139.0-darwin-x64' + '@openai/codex-linux-arm64': '@openai/codex@0.139.0-linux-arm64' + '@openai/codex-linux-x64': '@openai/codex@0.139.0-linux-x64' + '@openai/codex-win32-arm64': '@openai/codex@0.139.0-win32-arm64' + '@openai/codex-win32-x64': '@openai/codex@0.139.0-win32-x64' + + '@openai/codex@0.139.0-darwin-arm64': + optional: true + + '@openai/codex@0.139.0-darwin-x64': + optional: true + + '@openai/codex@0.139.0-linux-arm64': + optional: true + + '@openai/codex@0.139.0-linux-x64': + optional: true + + '@openai/codex@0.139.0-win32-arm64': + optional: true + + '@openai/codex@0.139.0-win32-x64': + optional: true + '@openrouter/sdk@0.12.35': dependencies: zod: 4.3.6 @@ -22971,8 +23090,6 @@ snapshots: transitivePeerDependencies: - supports-color - ip-address@10.1.0: {} - ip-address@10.2.0: {} ipaddr.js@1.9.1: {} @@ -26310,7 +26427,7 @@ snapshots: socks@2.8.7: dependencies: - ip-address: 10.1.0 + ip-address: 10.2.0 smart-buffer: 4.2.0 solid-js@1.9.10: From d61590ee3c5338fadd17e491f8aef3e2c85649b8 Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Fri, 12 Jun 2026 20:45:19 -0700 Subject: [PATCH 06/12] feat: add @tanstack/ai-opencode harness adapter Add the @tanstack/ai-opencode package, an OpenCode harness adapter that drives OpenCode (via @opencode-ai/sdk) as a TanStack AI chat backend with local tool execution, token-level streaming, stateful sessions, and TanStack tool bridging over a localhost MCP server. Wires the adapter into the ts-react-coding-agent example, adds the OpenCode adapter docs page, and anchors the OpenCode.md gitignore entry so it no longer shadows the docs page on case-insensitive filesystems. Co-authored-by: Cursor --- .changeset/ai-opencode-initial.md | 5 + .gitignore | 4 + docs/adapters/opencode.md | 186 ++++++++ docs/config.json | 5 + examples/ts-react-coding-agent/README.md | 38 +- examples/ts-react-coding-agent/package.json | 1 + .../src/lib/agent-status.ts | 11 + .../ts-react-coding-agent/src/lib/agents.ts | 42 +- .../src/routes/api.chat.ts | 9 + packages/ai-opencode/README.md | 18 + packages/ai-opencode/package.json | 58 +++ packages/ai-opencode/src/adapters/text.ts | 407 +++++++++++++++++ packages/ai-opencode/src/index.ts | 40 ++ packages/ai-opencode/src/messages/prompt.ts | 67 +++ packages/ai-opencode/src/model-meta.ts | 24 + .../ai-opencode/src/process/permissions.ts | 83 ++++ packages/ai-opencode/src/process/server.ts | 252 +++++++++++ packages/ai-opencode/src/provider-options.ts | 19 + packages/ai-opencode/src/stream/queue.ts | 64 +++ packages/ai-opencode/src/stream/sdk-types.ts | 104 +++++ packages/ai-opencode/src/stream/translate.ts | 417 ++++++++++++++++++ packages/ai-opencode/src/tools/bridge.ts | 129 ++++++ packages/ai-opencode/tests/bridge.test.ts | 105 +++++ .../ai-opencode/tests/permissions.test.ts | 102 +++++ packages/ai-opencode/tests/prompt.test.ts | 88 ++++ .../ai-opencode/tests/text-adapter.test.ts | 402 +++++++++++++++++ packages/ai-opencode/tests/translate.test.ts | 403 +++++++++++++++++ packages/ai-opencode/tsconfig.json | 8 + packages/ai-opencode/vite.config.ts | 37 ++ pnpm-lock.yaml | 26 ++ 30 files changed, 3137 insertions(+), 17 deletions(-) create mode 100644 .changeset/ai-opencode-initial.md create mode 100644 docs/adapters/opencode.md create mode 100644 packages/ai-opencode/README.md create mode 100644 packages/ai-opencode/package.json create mode 100644 packages/ai-opencode/src/adapters/text.ts create mode 100644 packages/ai-opencode/src/index.ts create mode 100644 packages/ai-opencode/src/messages/prompt.ts create mode 100644 packages/ai-opencode/src/model-meta.ts create mode 100644 packages/ai-opencode/src/process/permissions.ts create mode 100644 packages/ai-opencode/src/process/server.ts create mode 100644 packages/ai-opencode/src/provider-options.ts create mode 100644 packages/ai-opencode/src/stream/queue.ts create mode 100644 packages/ai-opencode/src/stream/sdk-types.ts create mode 100644 packages/ai-opencode/src/stream/translate.ts create mode 100644 packages/ai-opencode/src/tools/bridge.ts create mode 100644 packages/ai-opencode/tests/bridge.test.ts create mode 100644 packages/ai-opencode/tests/permissions.test.ts create mode 100644 packages/ai-opencode/tests/prompt.test.ts create mode 100644 packages/ai-opencode/tests/text-adapter.test.ts create mode 100644 packages/ai-opencode/tests/translate.test.ts create mode 100644 packages/ai-opencode/tsconfig.json create mode 100644 packages/ai-opencode/vite.config.ts diff --git a/.changeset/ai-opencode-initial.md b/.changeset/ai-opencode-initial.md new file mode 100644 index 000000000..66cd96e33 --- /dev/null +++ b/.changeset/ai-opencode-initial.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-opencode': minor +--- + +New `@tanstack/ai-opencode` package: an OpenCode harness adapter that drives [OpenCode](https://opencode.ai) (via `@opencode-ai/sdk`) as a TanStack AI chat backend. OpenCode owns the agent loop and executes its built-in tools (shell, file edits, search) locally; assistant text and thinking stream as token-level deltas, and tool activity streams back as resolved tool-call events. TanStack `toolDefinition()` server tools are bridged into the harness via a localhost MCP server, sessions are stateful and resumable, and OpenCode permission requests are answered by a configurable `permissionMode` (`default` / `acceptEdits` / `bypassPermissions` or a custom handler). Server-only (Node); requires the `opencode` CLI to be installed and authenticated on the host. diff --git a/.gitignore b/.gitignore index 6678fb779..92054517b 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,7 @@ solo.yml # Agent scratch output (gap-analysis reports, triage notes — generated locally) .agent/gap-analysis/ .agent/triage/ + +/OpenCode.md +.agentsroom/ +.opencode/ diff --git a/docs/adapters/opencode.md b/docs/adapters/opencode.md new file mode 100644 index 000000000..ff2fa70e6 --- /dev/null +++ b/docs/adapters/opencode.md @@ -0,0 +1,186 @@ +--- +title: OpenCode +id: opencode-adapter +order: 14 +description: "Use OpenCode as a chat backend in TanStack AI — agent harness with local tool execution, token-level streaming, stateful sessions, and tool bridging via @tanstack/ai-opencode." +keywords: + - tanstack ai + - opencode + - opencode sdk + - harness + - agent + - coding agent + - adapter +--- + +The OpenCode adapter runs [OpenCode](https://opencode.ai) as a chat backend, driving it over its local HTTP server (`@opencode-ai/sdk`). Unlike HTTP provider adapters, this is a **harness adapter**: OpenCode runs its own agent loop and executes its own tools — shell commands, file reads and edits, search — locally on your server. Each `chat()` call runs one full harness turn; assistant text and reasoning stream as true token-level deltas, and the harness's tool activity streams back as already-resolved tool-call events your UI can render. + +> **Server-only.** The adapter spawns (or attaches to) an `opencode serve` process, so it only works in a Node.js server environment — never in the browser. Treat it like giving OpenCode a shell on the machine it runs on, and configure permissions accordingly. + +## Installation + +```bash +npm install @tanstack/ai-opencode +``` + +The `opencode` CLI must be installed and its providers authenticated on the host: + +```bash +npm install -g opencode-ai +opencode auth login +``` + +A runnable demo lives at [`examples/ts-react-coding-agent`](https://github.com/TanStack/ai/tree/main/examples/ts-react-coding-agent) — session resume, the harness tool timeline, permission modes, and tool bridging, wired into a React app. + +## Models + +OpenCode is provider-agnostic: it resolves any `provider/model` id its configured providers support. Address models as `provider/model` (the adapter splits on the first `/`): + +```typescript +import { chat } from "@tanstack/ai"; +import { opencodeText } from "@tanstack/ai-opencode"; + +const stream = chat({ + adapter: opencodeText("anthropic/claude-sonnet-4-5", { + directory: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: [{ role: "user", content: "Fix the failing test in utils.test.ts" }], +}); +``` + +## Configuration + +| Option | Description | +| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `directory` | Working directory for the harness session. Defaults to `process.cwd()`. | +| `baseUrl` | Attach to an already-running `opencode serve` (e.g. `http://127.0.0.1:4096`) instead of spawning a new server per turn. | +| `hostname` | Hostname for the spawned server. Defaults to the SDK default (`127.0.0.1`). | +| `port` | Port for the spawned server. Defaults to the SDK default (`4096`). | +| `permissionMode` | `'default'` (bridged tools run, everything else that prompts is rejected), `'acceptEdits'` (also auto-approves file edits), or `'bypassPermissions'` (allow all). | +| `onPermissionRequest` | Custom permission handler; replaces the default policy entirely. | +| `config` | Extra OpenCode config merged with the adapter's MCP and permission config. | + +Per-call overrides — `sessionId`, `permissionMode`, `directory` — go through `modelOptions`. + +## Permissions + +OpenCode asks for permission before mutating files or running commands. A headless server has no one to answer those prompts, so the adapter applies a policy automatically — it never hangs a turn: + +- **`'default'`** — bridged TanStack tools run; anything else that would prompt (edits, shell, web fetch) is rejected. +- **`'acceptEdits'`** — additionally auto-approves file-mutation requests (edit / write / patch). +- **`'bypassPermissions'`** — approves everything. Only use this against a sandbox or scratch directory. + +Provide `onPermissionRequest` to implement your own policy (e.g. allow-list specific commands). + +## Stateful Sessions + +OpenCode sessions are stateful — the harness keeps the full working context (files read, commands run, conclusions reached) between turns. The adapter surfaces the session id of every fresh run as a custom stream event named `opencode.session-id`; thread it back via `modelOptions.sessionId` to resume. When resuming, only the latest user message is sent — the harness already holds the prior context. + +Server endpoint: + +```typescript +import { + chat, + chatParamsFromRequest, + toServerSentEventsResponse, +} from "@tanstack/ai"; +import { opencodeText } from "@tanstack/ai-opencode"; + +export async function POST(request: Request) { + const params = await chatParamsFromRequest(request); + + // Extra fields the client puts in the connection `body` arrive here. + const sessionId = + typeof params.forwardedProps.sessionId === "string" + ? params.forwardedProps.sessionId + : undefined; + + const stream = chat({ + adapter: opencodeText("anthropic/claude-sonnet-4-5", { + directory: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: params.messages, + modelOptions: { sessionId }, + }); + + return toServerSentEventsResponse(stream); +} +``` + +Client (React) — capture the session id from the custom event and send it back on subsequent requests: + +```typescript +import { useState } from "react"; +import { useChat } from "@tanstack/ai-react"; +import { fetchServerSentEvents } from "@tanstack/ai-client"; + +function CodingAssistant() { + const [sessionId, setSessionId] = useState(undefined); + + const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents("/api/chat", () => ({ + body: { sessionId }, + })), + onCustomEvent: (name, value) => { + if ( + name === "opencode.session-id" && + typeof value === "object" && + value !== null && + "sessionId" in value && + typeof value.sessionId === "string" + ) { + setSessionId(value.sessionId); + } + }, + }); + + // ... render messages; harness tool activity (bash, edit, read, ...) + // arrives as regular tool-call parts with results. +} +``` + +Sessions live on the server that ran them, so resuming only works against the same server instance (or a shared `baseUrl`). + +## Tools + +Two kinds of tools flow through this adapter: + +1. **Built-in harness tools** are executed by OpenCode itself and stream back as tool-call events with results already attached: `bash`, `edit`, `write`, `read`, `grep`, and the agent's running todo plan (surfaced as an `opencode.todo` custom event). Your code never executes them. + +2. **Your TanStack tools** are bridged *into* the harness: the adapter starts a short-lived Streamable-HTTP MCP server on `127.0.0.1` for the duration of the turn and registers it with OpenCode. Define tools as usual with `toolDefinition().server()`; tool-call events come back under the names you registered (OpenCode prefixes MCP tools `tanstack_…` internally, which the adapter strips). + +```typescript +import { z } from "zod"; +import { chat, toolDefinition } from "@tanstack/ai"; +import { opencodeText } from "@tanstack/ai-opencode"; + +const lookupTicket = toolDefinition({ + name: "lookup_ticket", + description: "Look up an issue ticket by id", + inputSchema: z.object({ ticketId: z.string() }), +}).server(async ({ ticketId }) => { + return { ticketId, status: "open", title: "Crash on startup" }; +}); + +const stream = chat({ + adapter: opencodeText("anthropic/claude-sonnet-4-5"), + messages: [{ role: "user", content: "What's the status of ticket T-123?" }], + tools: [lookupTicket], +}); +``` + +**Client-side and approval-gated tools are not supported.** The harness executes tools inside a live process, which cannot pause across HTTP requests to wait for a browser round-trip or a human approval. Passing a tool without a server `execute()` implementation — or one marked `needsApproval` — fails fast with a descriptive error. Run those tools outside the harness with a regular provider adapter. + +## Structured Output + +`structuredOutput()` is best-effort: OpenCode's prompt API has no native JSON-schema channel, so the schema is embedded as a prompt instruction in a fresh, one-shot session and the final text is parsed (markdown fences are stripped when present). It works for finalization after a chat, but a plain provider adapter (e.g. `@tanstack/ai-openai`) is the better choice when structured extraction is the primary job — it's faster, deterministic, and doesn't spawn a harness. + +## Limitations + +- **Server-only (Node).** The adapter spawns or attaches to an `opencode serve` process. +- **The harness owns the agent loop.** TanStack's agent-loop strategies and per-iteration middleware don't apply inside a harness turn. +- **No sampling controls.** `temperature`-style options don't exist here. +- **Sessions are server-local.** Resume requires hitting the same server instance (or a shared `baseUrl`). +- **Cold starts.** Spawning a server per turn adds first-token latency; point the adapter at a long-lived `baseUrl` to avoid it. diff --git a/docs/config.json b/docs/config.json index 05ab94858..ab8e11e07 100644 --- a/docs/config.json +++ b/docs/config.json @@ -461,6 +461,11 @@ "label": "Gemini CLI", "to": "adapters/gemini-cli", "addedAt": "2026-06-12" + }, + { + "label": "OpenCode", + "to": "adapters/opencode", + "addedAt": "2026-06-12" } ] }, diff --git a/examples/ts-react-coding-agent/README.md b/examples/ts-react-coding-agent/README.md index 98bc24bdc..b40587d90 100644 --- a/examples/ts-react-coding-agent/README.md +++ b/examples/ts-react-coding-agent/README.md @@ -3,9 +3,10 @@ A React (TanStack Start) app that drives **coding-agent harnesses** through TanStack AI — [Claude Code](https://docs.anthropic.com/en/docs/claude-code) via `@tanstack/ai-claude-code`, [Codex](https://developers.openai.com/codex) -via `@tanstack/ai-codex`, and +via `@tanstack/ai-codex`, [Gemini CLI](https://github.com/google-gemini/gemini-cli) via -`@tanstack/ai-gemini-cli`, switchable from a dropdown. +`@tanstack/ai-gemini-cli`, and [OpenCode](https://opencode.ai) via +`@tanstack/ai-opencode`, switchable from a dropdown. Unlike a normal chat example, the agent here runs its own loop server-side and executes its own tools — reading, searching, and (in Edit mode) editing @@ -16,24 +17,25 @@ timeline of resolved tool calls. - **Session resume** — the server emits the harness session id via a `.session-id` custom event (`claude-code.session-id`, - `codex.session-id`, `gemini-cli.session-id`); the client pins it and sends + `codex.session-id`, `gemini-cli.session-id`, `opencode.session-id`); the + client pins it and sends it back through `forwardedProps` → `modelOptions.sessionId`, so follow-ups continue the same stateful session. Switching agents resets the session. - **Harness tool timeline** — built-in tools (Read, Grep, Edit, command_execution, ...) arrive as already-resolved tool-call parts and render with their inputs/outputs. Note that Codex streams text - message-at-a-time (its SDK has no token deltas), while Claude Code and - Gemini CLI stream token-by-token. + message-at-a-time (its SDK has no token deltas), while Claude Code, + Gemini CLI, and OpenCode stream token-by-token. - **Permission modes** — a Read-only/Edit toggle maps to each harness's knobs: `disallowedTools` vs `permissionMode: 'acceptEdits'` for Claude Code, `sandboxMode: 'read-only'` vs `'workspace-write'` for Codex, and - the default-deny vs `acceptEdits` permission policy for Gemini CLI. With - Claude Code and Gemini CLI, ask it to run a shell command and watch the - denial show up in the timeline. + the default-deny vs `acceptEdits` permission policy for Gemini CLI and + OpenCode. With Claude Code, Gemini CLI, and OpenCode, ask it to run a + shell command and watch the denial show up in the timeline. - **Tool bridging** — `lookup_style_guide` is an ordinary TanStack server tool the harness calls from inside its own loop (in-process MCP for - Claude Code; a localhost Streamable-HTTP MCP bridge for Codex and - Gemini CLI). + Claude Code; a localhost Streamable-HTTP MCP bridge for Codex, + Gemini CLI, and OpenCode). - **Sandboxed cwd** — the agent only works inside `workspace/`. ## Running @@ -89,6 +91,19 @@ GEMINI_ACP_AUTH_METHOD=oauth-personal GEMINI_CLI_TRUST_WORKSPACE=true pnpm dev To use an API key instead, set `GEMINI_API_KEY` and `GEMINI_ACP_AUTH_METHOD=gemini-api-key`. +**OpenCode** ([docs](https://opencode.ai/docs)) + +```bash +npm i -g opencode-ai # install the CLI +opencode auth login # authenticate a provider (interactive) +# …or set the provider API key in the server env (this example uses Anthropic): +export ANTHROPIC_API_KEY=sk-ant-… +``` + +The adapter spawns `opencode serve` per turn, so the CLI must be on `PATH`. The +example drives the `anthropic/claude-sonnet-4-5` model; point it at a different +`provider/model` in `src/routes/api.chat.ts` to use another provider. + ### 2. Install and run ```bash @@ -125,6 +140,9 @@ configured when: - **Gemini CLI** — `GEMINI_API_KEY` or `GEMINI_ACP_AUTH_METHOD` is set (a cached Google login alone isn't enough for headless ACP, so it isn't counted). +- **OpenCode** — a provider key (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY` / + `GEMINI_API_KEY`) is set, or an `opencode auth login` credential file + (`~/.local/share/opencode/auth.json`) exists. Detection runs at server startup time per request to the loader, so set your env vars / log in **before** `pnpm dev` (or restart it after). diff --git a/examples/ts-react-coding-agent/package.json b/examples/ts-react-coding-agent/package.json index a6b6555a4..2d8e74c09 100644 --- a/examples/ts-react-coding-agent/package.json +++ b/examples/ts-react-coding-agent/package.json @@ -16,6 +16,7 @@ "@tanstack/ai-client": "workspace:*", "@tanstack/ai-codex": "workspace:*", "@tanstack/ai-gemini-cli": "workspace:*", + "@tanstack/ai-opencode": "workspace:*", "@tanstack/ai-react": "workspace:*", "@tanstack/nitro-v2-vite-plugin": "^1.154.7", "@tanstack/react-router": "^1.158.4", diff --git a/examples/ts-react-coding-agent/src/lib/agent-status.ts b/examples/ts-react-coding-agent/src/lib/agent-status.ts index 3a28cae1b..d2db68e9c 100644 --- a/examples/ts-react-coding-agent/src/lib/agent-status.ts +++ b/examples/ts-react-coding-agent/src/lib/agent-status.ts @@ -43,10 +43,21 @@ export const getAgentConfigFn = createServerFn({ method: 'GET' }).handler( const geminiCli = Boolean(env.GEMINI_API_KEY) || Boolean(env.GEMINI_ACP_AUTH_METHOD) + // OpenCode resolves any configured provider — count a provider API key in + // the environment or an `opencode auth login` credential file. + const opencode = + Boolean(env.ANTHROPIC_API_KEY) || + Boolean(env.OPENAI_API_KEY) || + Boolean(env.GEMINI_API_KEY) || + (await fileExists( + path.join(home, '.local', 'share', 'opencode', 'auth.json'), + )) + return { 'claude-code': claudeCode, codex, 'gemini-cli': geminiCli, + opencode, } }, ) diff --git a/examples/ts-react-coding-agent/src/lib/agents.ts b/examples/ts-react-coding-agent/src/lib/agents.ts index 319a7e6cd..00aaf9bad 100644 --- a/examples/ts-react-coding-agent/src/lib/agents.ts +++ b/examples/ts-react-coding-agent/src/lib/agents.ts @@ -3,21 +3,28 @@ * * Each entry maps to a harness adapter on the server (see * `src/routes/api.chat.ts`): Claude Code (`@tanstack/ai-claude-code`), - * Codex (`@tanstack/ai-codex`), and Gemini CLI (`@tanstack/ai-gemini-cli`). + * Codex (`@tanstack/ai-codex`), Gemini CLI (`@tanstack/ai-gemini-cli`), and + * OpenCode (`@tanstack/ai-opencode`). */ export const AGENTS = [ { id: 'claude-code', label: 'Claude Code' }, { id: 'codex', label: 'Codex' }, { id: 'gemini-cli', label: 'Gemini CLI' }, + { id: 'opencode', label: 'OpenCode' }, ] as const /** Agent ids with a working adapter behind them. */ -export type AgentId = 'claude-code' | 'codex' | 'gemini-cli' +export type AgentId = 'claude-code' | 'codex' | 'gemini-cli' | 'opencode' export const DEFAULT_AGENT: AgentId = 'claude-code' export function isAgentId(value: unknown): value is AgentId { - return value === 'claude-code' || value === 'codex' || value === 'gemini-cli' + return ( + value === 'claude-code' || + value === 'codex' || + value === 'gemini-cli' || + value === 'opencode' + ) } /** A single, optionally command-bearing step in an agent's setup guide. */ @@ -102,16 +109,37 @@ export const AGENT_SETUP: Record = { ], docsUrl: 'https://github.com/google-gemini/gemini-cli', }, + opencode: { + label: 'OpenCode', + summary: + 'Drives OpenCode through @tanstack/ai-opencode. Needs the opencode CLI installed and a provider authenticated on the server.', + steps: [ + { + text: 'Install the OpenCode CLI:', + code: 'npm i -g opencode-ai', + }, + { + text: 'Authenticate a provider once (interactive):', + code: 'opencode auth login', + }, + { + text: '…or set the provider API key in the server environment instead:', + code: 'export ANTHROPIC_API_KEY=sk-ant-…', + }, + { text: 'Restart the dev server so it picks up new credentials.' }, + ], + docsUrl: 'https://opencode.ai/docs', + }, } /** * What the agent is allowed to do in the workspace: * - `read-only`: it can read and search, but file edits and shell commands * are blocked. - * - `edit`: file edits are auto-approved; with Claude Code and Gemini CLI, - * shell commands still get denied by each adapter's default permission - * policy (a deliberate demo of the permission system), while Codex - * sandboxes them inside the workspace instead. + * - `edit`: file edits are auto-approved; with Claude Code, Gemini CLI, and + * OpenCode, shell commands still get denied by each adapter's default + * permission policy (a deliberate demo of the permission system), while + * Codex sandboxes them inside the workspace instead. */ export type AgentMode = 'read-only' | 'edit' diff --git a/examples/ts-react-coding-agent/src/routes/api.chat.ts b/examples/ts-react-coding-agent/src/routes/api.chat.ts index 63b08b416..6e4896936 100644 --- a/examples/ts-react-coding-agent/src/routes/api.chat.ts +++ b/examples/ts-react-coding-agent/src/routes/api.chat.ts @@ -8,6 +8,7 @@ import { import { claudeCodeText } from '@tanstack/ai-claude-code' import { codexText } from '@tanstack/ai-codex' import { geminiCliText } from '@tanstack/ai-gemini-cli' +import { opencodeText } from '@tanstack/ai-opencode' import { isAgentId, isAgentMode } from '@/lib/agents' import { lookupStyleGuide } from '@/lib/style-guide-tool' import type { AgentId, AgentMode } from '@/lib/agents' @@ -61,6 +62,14 @@ function createAdapter( authMethodId: process.env.GEMINI_ACP_AUTH_METHOD, }), }) + case 'opencode': + return opencodeText('anthropic/claude-sonnet-4-5', { + directory: cwd, + // Edit mode auto-approves file edits; shell commands still get + // rejected by the adapter's default permission policy, same demo + // as Claude Code and Gemini CLI above. + permissionMode: mode === 'edit' ? 'acceptEdits' : 'default', + }) } } diff --git a/packages/ai-opencode/README.md b/packages/ai-opencode/README.md new file mode 100644 index 000000000..80f12e243 --- /dev/null +++ b/packages/ai-opencode/README.md @@ -0,0 +1,18 @@ +# @tanstack/ai-opencode + +OpenCode harness adapter for [TanStack AI](https://tanstack.com/ai) — run [OpenCode](https://opencode.ai) (via `@opencode-ai/sdk`) as a chat backend with local tool execution, token-level streaming, stateful sessions, and TanStack tool bridging. + +```typescript +import { chat } from '@tanstack/ai' +import { opencodeText } from '@tanstack/ai-opencode' + +const stream = chat({ + adapter: opencodeText('anthropic/claude-sonnet-4-5', { + directory: '/path/to/project', + permissionMode: 'acceptEdits', + }), + messages: [{ role: 'user', content: 'Fix the failing test.' }], +}) +``` + +Server-only (Node); requires the `opencode` CLI installed and authenticated. See the [OpenCode adapter docs](https://tanstack.com/ai/latest/docs/adapters/opencode) for sessions, tool bridging, permissions, and limitations. diff --git a/packages/ai-opencode/package.json b/packages/ai-opencode/package.json new file mode 100644 index 000000000..f3a85c28c --- /dev/null +++ b/packages/ai-opencode/package.json @@ -0,0 +1,58 @@ +{ + "name": "@tanstack/ai-opencode", + "version": "0.1.0", + "description": "OpenCode harness adapter for TanStack AI — run OpenCode as a chat backend with local tool execution and stateful sessions.", + "author": "", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/TanStack/ai.git", + "directory": "packages/ai-opencode" + }, + "keywords": [ + "ai", + "ai-sdk", + "typescript", + "tanstack", + "opencode", + "harness", + "agent", + "adapter", + "chat", + "tool-calling" + ], + "type": "module", + "module": "./dist/esm/index.js", + "types": "./dist/esm/index.d.ts", + "exports": { + ".": { + "types": "./dist/esm/index.d.ts", + "import": "./dist/esm/index.js" + } + }, + "files": [ + "dist", + "src" + ], + "scripts": { + "build": "vite build", + "clean": "premove ./build ./dist", + "lint:fix": "eslint ./src --fix", + "test:build": "publint --strict", + "test:eslint": "eslint ./src", + "test:lib": "vitest", + "test:lib:dev": "pnpm test:lib --watch", + "test:types": "tsc" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.29.0", + "@opencode-ai/sdk": "^1.17.4" + }, + "peerDependencies": { + "@tanstack/ai": "workspace:^" + }, + "devDependencies": { + "@tanstack/ai": "workspace:*", + "@vitest/coverage-v8": "4.0.14" + } +} diff --git a/packages/ai-opencode/src/adapters/text.ts b/packages/ai-opencode/src/adapters/text.ts new file mode 100644 index 000000000..0d287d82b --- /dev/null +++ b/packages/ai-opencode/src/adapters/text.ts @@ -0,0 +1,407 @@ +import { EventType, normalizeSystemPrompts } from '@tanstack/ai' +import { toRunErrorRawEvent } from '@tanstack/ai/adapter-internals' +import { BaseTextAdapter } from '@tanstack/ai/adapters' +import { buildPrompt } from '../messages/prompt' +import { startToolBridge } from '../tools/bridge' +import { startOpencodeSession } from '../process/server' +import { resolvePermission } from '../process/permissions' +import { AsyncQueue } from '../stream/queue' +import { + BRIDGED_MCP_SERVER_NAME, + translateOpencodeStream, +} from '../stream/translate' +import type { + StructuredOutputOptions, + StructuredOutputResult, +} from '@tanstack/ai/adapters' +import type { + AnyTool, + DefaultMessageMetadataByModality, + Modality, + StreamChunk, + TextOptions, +} from '@tanstack/ai' +import type { Config } from '@opencode-ai/sdk' +import type { OpencodeSessionHandle } from '../process/server' +import type { + OpencodePermissionMode, + PermissionHandler, +} from '../process/permissions' +import type { OpencodeStreamEvent } from '../stream/sdk-types' +import type { OpencodeModel } from '../model-meta' +import type { OpencodeTextProviderOptions } from '../provider-options' +import type { ToolBridgeHandle } from '../tools/bridge' + +export interface OpencodeTextConfig { + /** Working directory for the harness session. Defaults to `process.cwd()`. */ + directory?: string + /** + * Attach to an already-running `opencode serve` instead of spawning a new + * server for each turn (e.g. `http://127.0.0.1:4096`). When omitted, the + * adapter boots and tears down its own server per turn. + */ + baseUrl?: string + /** Hostname for the spawned server. Defaults to the SDK default (`127.0.0.1`). */ + hostname?: string + /** Port for the spawned server. Defaults to the SDK default (`4096`). */ + port?: number + /** + * OpenCode permission mode. Without an explicit mode or a custom + * `onPermissionRequest`, the adapter's default policy auto-allows bridged + * TanStack tools and rejects anything else that would normally prompt — + * set `'acceptEdits'` / `'bypassPermissions'` to let the harness edit files + * and run commands on a headless server. + */ + permissionMode?: OpencodePermissionMode + /** Custom permission handler; replaces the adapter's default policy. */ + onPermissionRequest?: PermissionHandler + /** Extra OpenCode config merged with the adapter's mcp/permission config. */ + config?: Config +} + +function validateTools(tools: Array | undefined): void { + if (!tools || tools.length === 0) return + const unsupported = tools.filter( + (tool) => typeof tool.execute !== 'function' || tool.needsApproval === true, + ) + if (unsupported.length > 0) { + throw new Error( + `OpenCode harness cannot execute client-side or approval-gated tools: ${unsupported + .map((tool) => tool.name) + .join( + ', ', + )}. Provide server execute() implementations without needsApproval, or run these tools outside the harness.`, + ) + } +} + +/** Split a `provider/model` id into its provider and model halves. */ +function splitModel(model: string): { providerID: string; modelID: string } { + const slash = model.indexOf('/') + if (slash <= 0 || slash === model.length - 1) { + throw new Error( + `OpenCode models must be addressed as "provider/model" (e.g. "anthropic/claude-sonnet-4-5"); received "${model}".`, + ) + } + return { providerID: model.slice(0, slash), modelID: model.slice(slash + 1) } +} + +/** Baseline server permission config for a mode (the dynamic policy still runs). */ +function permissionConfig( + mode: OpencodePermissionMode, +): NonNullable { + switch (mode) { + case 'bypassPermissions': + return { edit: 'allow', bash: 'allow', webfetch: 'allow' } + case 'acceptEdits': + return { edit: 'allow', bash: 'ask', webfetch: 'ask' } + case 'default': + return { edit: 'ask', bash: 'ask', webfetch: 'ask' } + } +} + +/** Extract the first JSON object/array from possibly fenced model output. */ +function extractJson(text: string): unknown { + const trimmed = text.trim() + const unfenced = trimmed.startsWith('```') + ? trimmed.replace(/^```[a-zA-Z]*\n?/, '').replace(/\n?```$/, '') + : trimmed + try { + return JSON.parse(unfenced) + } catch { + const start = unfenced.search(/[{[]/) + if (start === -1) { + throw new Error( + `OpenCode structured output is not valid JSON: ${text.slice(0, 200)}`, + ) + } + const end = Math.max(unfenced.lastIndexOf('}'), unfenced.lastIndexOf(']')) + return JSON.parse(unfenced.slice(start, end + 1)) + } +} + +export class OpencodeTextAdapter< + TModel extends OpencodeModel, +> extends BaseTextAdapter< + TModel, + OpencodeTextProviderOptions, + ReadonlyArray & readonly ['text'], + DefaultMessageMetadataByModality, + ReadonlyArray, + unknown, + never +> { + readonly name = 'opencode' as const + + private readonly adapterConfig: OpencodeTextConfig + + constructor(config: OpencodeTextConfig, model: TModel) { + super({}, model) + this.adapterConfig = config + } + + async *chatStream( + options: TextOptions, + ): AsyncIterable { + const { logger } = options + let bridge: ToolBridgeHandle | undefined + let handle: OpencodeSessionHandle | undefined + const externalSignal = + options.abortController?.signal ?? options.request?.signal ?? undefined + let onAbort: (() => void) | undefined + + try { + validateTools(options.tools) + + const modelOptions = options.modelOptions + const sessionId = modelOptions?.sessionId + // Validates the trailing user message up front (throws before any + // server is spawned) and prepares the resume-path prompt. + const { prompt: resumePrompt } = buildPrompt(options.messages, sessionId) + const { providerID, modelID } = splitModel(this.model) + + if (options.tools && options.tools.length > 0) { + bridge = await startToolBridge(options.tools) + } + const bridgedToolNames = new Set( + (options.tools ?? []).map((tool) => tool.name), + ) + + const queue = new AsyncQueue() + const mode = + modelOptions?.permissionMode ?? + this.adapterConfig.permissionMode ?? + 'default' + const permissionHandler: PermissionHandler = + this.adapterConfig.onPermissionRequest ?? + ((request) => resolvePermission(request, mode, bridgedToolNames)) + + logger.request( + `activity=chat provider=opencode model=${this.model} messages=${options.messages.length} tools=${options.tools?.length ?? 0} resume=${sessionId ?? 'none'}`, + { provider: 'opencode', model: this.model }, + ) + + handle = await startOpencodeSession({ + ...(this.adapterConfig.baseUrl !== undefined && { + baseUrl: this.adapterConfig.baseUrl, + }), + ...(this.adapterConfig.hostname !== undefined && { + hostname: this.adapterConfig.hostname, + }), + ...(this.adapterConfig.port !== undefined && { + port: this.adapterConfig.port, + }), + ...(this.adapterConfig.config !== undefined && { + config: this.adapterConfig.config, + }), + directory: + modelOptions?.directory ?? + this.adapterConfig.directory ?? + process.cwd(), + providerID, + modelID, + permission: permissionConfig(mode), + ...(bridge !== undefined && { + mcpServers: [{ name: BRIDGED_MCP_SERVER_NAME, url: bridge.url }], + }), + ...(sessionId !== undefined && { resumeSessionId: sessionId }), + onEvent: (event) => queue.push({ kind: 'event', event }), + onPermissionRequest: permissionHandler, + onError: (error) => queue.fail(error), + }) + const session = handle + + if (externalSignal !== undefined) { + onAbort = () => void session.abort().catch(() => undefined) + if (externalSignal.aborted) onAbort() + else externalSignal.addEventListener('abort', onAbort, { once: true }) + } + + queue.push({ kind: 'session', sessionId: session.sessionId }) + + // When resume was requested but the server no longer has the session, + // fall back to seeding a fresh session with the whole transcript. + const promptText = this.applySystemPrompts( + options, + session.resumed || sessionId === undefined + ? resumePrompt + : buildPrompt(options.messages, undefined).prompt, + ) + + session + .prompt(promptText) + .then(({ message }) => { + queue.push({ kind: 'done', message }) + queue.end() + }) + .catch((error: unknown) => queue.fail(error)) + + yield* translateOpencodeStream(queue, { + model: this.model, + runId: options.runId ?? this.generateId(), + threadId: options.threadId ?? this.generateId(), + ...(options.parentRunId !== undefined && { + parentRunId: options.parentRunId, + }), + genId: () => this.generateId(), + bridgedToolNames, + onStreamEvent: (event) => + logger.provider(`provider=opencode kind=${event.kind}`, { + chunk: event, + }), + }) + } catch (error: unknown) { + const err = error as Error & { code?: string } + const rawEvent = toRunErrorRawEvent(error) + logger.errors('opencode.chatStream fatal', { + error, + source: 'opencode.chatStream', + }) + yield { + type: EventType.RUN_ERROR, + model: options.model, + timestamp: Date.now(), + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + ...(rawEvent !== undefined && { rawEvent }), + error: { + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + }, + } + } finally { + if (externalSignal !== undefined && onAbort !== undefined) { + externalSignal.removeEventListener('abort', onAbort) + } + await handle?.dispose() + await bridge?.close() + } + } + + /** + * Structured output, best-effort: OpenCode's typed prompt API has no native + * JSON-schema channel, so the schema is embedded as a prompt instruction in + * a fresh one-shot session and the final text is parsed (markdown fences are + * stripped when present). Runs with the default deny-everything permission + * policy. + */ + async structuredOutput( + options: StructuredOutputOptions, + ): Promise> { + const { chatOptions, outputSchema } = options + const { logger } = chatOptions + + // Fresh one-shot run: deliberately no resume, so finalization never + // mutates the caller's interactive session. No bridge either — tools are + // a chat concern. + const { prompt } = buildPrompt(chatOptions.messages, undefined) + const { providerID, modelID } = splitModel(this.model) + const instruction = `Respond with ONLY a JSON value that conforms to this JSON Schema — no prose, no markdown fences:\n${JSON.stringify(outputSchema)}` + const promptText = this.applySystemPrompts( + chatOptions, + `${prompt}\n\n${instruction}`, + ) + + logger.request( + `activity=structured-output provider=opencode model=${this.model}`, + { provider: 'opencode', model: this.model }, + ) + + const handle = await startOpencodeSession({ + ...(this.adapterConfig.baseUrl !== undefined && { + baseUrl: this.adapterConfig.baseUrl, + }), + ...(this.adapterConfig.hostname !== undefined && { + hostname: this.adapterConfig.hostname, + }), + ...(this.adapterConfig.port !== undefined && { + port: this.adapterConfig.port, + }), + ...(this.adapterConfig.config !== undefined && { + config: this.adapterConfig.config, + }), + directory: + chatOptions.modelOptions?.directory ?? + this.adapterConfig.directory ?? + process.cwd(), + providerID, + modelID, + permission: permissionConfig('default'), + onEvent: () => undefined, + onPermissionRequest: (request) => + resolvePermission(request, 'default', undefined), + }) + + let rawText = '' + let usage: { input?: number; output?: number } | undefined + try { + const result = await handle.prompt(promptText) + rawText = result.text + usage = result.message.tokens + if (result.message.error) { + throw new Error( + result.message.error.data?.message ?? result.message.error.name, + ) + } + } finally { + await handle.dispose() + } + + if (rawText.trim() === '') { + throw new Error( + 'OpenCode run ended without a response during structured output generation.', + ) + } + + const promptTokens = usage?.input ?? 0 + const completionTokens = usage?.output ?? 0 + return { + data: extractJson(rawText), + rawText, + usage: { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }, + } + } + + /** + * OpenCode prompts have no separate system-prompt channel here, so + * `systemPrompts` from `chat()` are prepended to the prompt text as an + * instruction preamble. + */ + private applySystemPrompts( + options: TextOptions, + prompt: string, + ): string { + const systemPrompts = normalizeSystemPrompts(options.systemPrompts) + .map((systemPrompt) => systemPrompt.content) + .filter((content) => content.trim() !== '') + if (systemPrompts.length === 0) return prompt + return `${systemPrompts.join('\n\n')}\n\n${prompt}` + } +} + +/** + * Creates an OpenCode text adapter. + * + * Unlike HTTP provider adapters, this is a *harness* adapter: OpenCode runs + * its own agent loop and executes its own tools (shell commands, file edits, + * search, ...) locally, server-side. The adapter drives OpenCode over its + * HTTP server (`@opencode-ai/sdk`), so assistant text and reasoning stream as + * true token-level deltas. Each `chat()` call runs one full harness turn; + * harness tool activity streams back as already-resolved tool-call events, and + * the session id is surfaced via a CUSTOM `opencode.session-id` event so + * follow-up calls can resume the session through `modelOptions.sessionId`. + * + * Models are addressed as `provider/model` (e.g. + * `anthropic/claude-sonnet-4-5`). Requires the `opencode` CLI to be installed + * and its providers authenticated on the host (`npm i -g opencode-ai`). + */ +export function opencodeText( + model: TModel, + config: OpencodeTextConfig = {}, +): OpencodeTextAdapter { + return new OpencodeTextAdapter(config, model) +} diff --git a/packages/ai-opencode/src/index.ts b/packages/ai-opencode/src/index.ts new file mode 100644 index 000000000..f6d73fd14 --- /dev/null +++ b/packages/ai-opencode/src/index.ts @@ -0,0 +1,40 @@ +export { OpencodeTextAdapter, opencodeText } from './adapters/text' +export type { OpencodeTextConfig } from './adapters/text' +export type { OpencodeTextProviderOptions } from './provider-options' +export { OPENCODE_MODELS } from './model-meta' +export type { OpencodeModel, KnownOpencodeModel } from './model-meta' +export { + SESSION_ID_EVENT, + TODO_EVENT, + BRIDGED_MCP_SERVER_NAME, + translateOpencodeStream, + resolveToolName, +} from './stream/translate' +export type { TranslateContext } from './stream/translate' +export type { + OpencodeAssistantMessage, + OpencodeEvent, + OpencodePart, + OpencodeStreamEvent, + OpencodeTokens, + OpencodeToolState, +} from './stream/sdk-types' +export { + resolvePermission, + matchBridgedToolName, +} from './process/permissions' +export type { + OpencodePermissionMode, + OpencodePermissionRequest, + OpencodePermissionResponse, + PermissionHandler, +} from './process/permissions' +export { startOpencodeSession } from './process/server' +export type { + OpencodeSessionHandle, + StartOpencodeSessionOptions, +} from './process/server' +export { buildPrompt } from './messages/prompt' +export type { BuiltPrompt } from './messages/prompt' +export { startToolBridge } from './tools/bridge' +export type { ToolBridgeHandle } from './tools/bridge' diff --git a/packages/ai-opencode/src/messages/prompt.ts b/packages/ai-opencode/src/messages/prompt.ts new file mode 100644 index 000000000..bb2824e17 --- /dev/null +++ b/packages/ai-opencode/src/messages/prompt.ts @@ -0,0 +1,67 @@ +import type { ModelMessage } from '@tanstack/ai' + +export interface BuiltPrompt { + prompt: string + /** OpenCode session id to resume, when the caller threaded one through. */ + resume?: string +} + +function extractText(content: ModelMessage['content']): string { + if (content === null) return '' + if (typeof content === 'string') return content + return content + .map((part) => + part.type === 'text' && typeof part.content === 'string' + ? part.content + : '', + ) + .join('') +} + +/** + * Convert TanStack chat history into the OpenCode prompt + resume inputs. + * + * With a `sessionId`, the harness already holds the conversation context, so + * only the trailing user message is sent and the session is resumed. Without + * one, prior turns are flattened into a plain-text transcript preamble (tool + * messages and tool-call-only assistant turns are harness-internal noise and + * are skipped; prompts are text-only in v1). + */ +export function buildPrompt( + messages: Array, + sessionId: string | undefined, +): BuiltPrompt { + const lastMessage = messages.at(-1) + const lastUserText = + lastMessage?.role === 'user' ? extractText(lastMessage.content).trim() : '' + + if (!lastUserText) { + throw new Error( + 'OpenCode adapter requires a trailing user message with text content.', + ) + } + + if (sessionId !== undefined) { + return { prompt: lastUserText, resume: sessionId } + } + + const priorTurns = messages + .slice(0, -1) + .filter( + (message) => + (message.role === 'user' || message.role === 'assistant') && + extractText(message.content).trim() !== '', + ) + .map( + (message) => + `${message.role === 'user' ? 'User' : 'Assistant'}: ${extractText(message.content).trim()}`, + ) + + if (priorTurns.length === 0) { + return { prompt: lastUserText } + } + + return { + prompt: `Previous conversation:\n${priorTurns.join('\n')}\n\n${lastUserText}`, + } +} diff --git a/packages/ai-opencode/src/model-meta.ts b/packages/ai-opencode/src/model-meta.ts new file mode 100644 index 000000000..95a4e8efd --- /dev/null +++ b/packages/ai-opencode/src/model-meta.ts @@ -0,0 +1,24 @@ +/** + * Models known to work with OpenCode. OpenCode is provider-agnostic — it + * resolves any `provider/model` id its configured providers support (via the + * Vercel AI SDK + Models.dev), so this list exists for autocomplete. Any + * string is accepted via the `(string & {})` escape hatch in + * {@link OpencodeModel}. + * + * Models are addressed as `provider_id/model_id` (e.g. + * `anthropic/claude-sonnet-4-5`); the adapter splits on the first `/`. + */ +export const OPENCODE_MODELS = [ + 'anthropic/claude-opus-4-5', + 'anthropic/claude-sonnet-4-5', + 'openai/gpt-5.2', + 'openai/gpt-5.1-codex', + 'google/gemini-3-pro-preview', + 'opencode/claude-sonnet-4-5', + 'opencode/gpt-5.1-codex', +] as const + +export type KnownOpencodeModel = (typeof OPENCODE_MODELS)[number] + +/** Any `provider/model` id accepted by OpenCode; known ids get autocomplete. */ +export type OpencodeModel = KnownOpencodeModel | (string & {}) diff --git a/packages/ai-opencode/src/process/permissions.ts b/packages/ai-opencode/src/process/permissions.ts new file mode 100644 index 000000000..a68b6b525 --- /dev/null +++ b/packages/ai-opencode/src/process/permissions.ts @@ -0,0 +1,83 @@ +/** + * Permission modes for the OpenCode adapter, mirroring the Claude Code and + * Gemini CLI adapters' semantics: + * + * - `'default'`: bridged TanStack tools run; anything else that asks for + * permission is rejected with no prompt (a headless server must never hang + * on an interactive question). + * - `'acceptEdits'`: additionally auto-approves file-mutation requests + * (edit / write / patch). + * - `'bypassPermissions'`: approves everything. + */ +export type OpencodePermissionMode = + | 'default' + | 'acceptEdits' + | 'bypassPermissions' + +/** Structural subset of an OpenCode `permission.updated` payload. */ +export interface OpencodePermissionRequest { + id: string + sessionID: string + /** Permission category, e.g. `'edit'`, `'bash'`, `'webfetch'`, a tool id. */ + type: string + title: string + /** Tool call id this permission gates, when it gates a tool. */ + callID?: string +} + +/** OpenCode permission reply: allow once, allow always, or reject. */ +export type OpencodePermissionResponse = 'once' | 'always' | 'reject' + +/** Custom permission handler; replaces the adapter's default policy. */ +export type PermissionHandler = ( + request: OpencodePermissionRequest, +) => Promise | OpencodePermissionResponse + +/** Permission categories treated as file mutations for `'acceptEdits'`. */ +const EDIT_TYPES = new Set(['edit', 'write', 'patch']) + +/** + * Decide whether an OpenCode permission request targets one of the bridged + * TanStack tools. OpenCode names MCP tools `_` (e.g. + * `tanstack_lookup_user`), so a request is bridged when its type or title is + * a registered tool name, or carries the `tanstack` server prefix. + */ +export function matchBridgedToolName( + request: OpencodePermissionRequest, + bridgedToolNames: ReadonlySet | undefined, +): boolean { + if (!bridgedToolNames || bridgedToolNames.size === 0) return false + for (const field of [request.type, request.title]) { + if (typeof field !== 'string' || field === '') continue + if (bridgedToolNames.has(field)) return true + if (field.startsWith('tanstack_') && bridgedToolNames.has(field.slice(9))) { + return true + } + if (field.startsWith('tanstack.') && bridgedToolNames.has(field.slice(9))) { + return true + } + } + return false +} + +/** + * The adapter's default permission policy. Always answers immediately — never + * hangs a headless server on a question only an interactive user could + * answer. + */ +export function resolvePermission( + request: OpencodePermissionRequest, + mode: OpencodePermissionMode, + bridgedToolNames: ReadonlySet | undefined, +): OpencodePermissionResponse { + if (matchBridgedToolName(request, bridgedToolNames)) { + return 'once' + } + if (mode === 'bypassPermissions') { + return 'once' + } + if (mode === 'acceptEdits' && EDIT_TYPES.has(request.type)) { + return 'once' + } + return 'reject' +} diff --git a/packages/ai-opencode/src/process/server.ts b/packages/ai-opencode/src/process/server.ts new file mode 100644 index 000000000..53e34b505 --- /dev/null +++ b/packages/ai-opencode/src/process/server.ts @@ -0,0 +1,252 @@ +import { createOpencode, createOpencodeClient } from '@opencode-ai/sdk' +import type { Config, Event, OpencodeClient, Part } from '@opencode-ai/sdk' +import type { + OpencodeAssistantMessage, + OpencodeEvent, +} from '../stream/sdk-types' +import type { + OpencodePermissionRequest, + OpencodePermissionResponse, +} from './permissions' + +/** A live OpenCode session backed by an `opencode serve` HTTP server. */ +export interface OpencodeSessionHandle { + sessionId: string + /** Whether an existing session was actually resumed. */ + resumed: boolean + /** + * Run one prompt turn. Resolves with the final assistant message (finish + * reason, token usage, error) and its concatenated text once the harness + * goes idle. Streaming deltas arrive via `onEvent` while this is pending. + */ + prompt: ( + text: string, + ) => Promise<{ message: OpencodeAssistantMessage; text: string }> + /** Ask the harness to abort the in-flight prompt turn. */ + abort: () => Promise + /** Tear down the event subscription and (if owned) the server. */ + dispose: () => Promise +} + +export interface StartOpencodeSessionOptions { + /** Connect to an already-running server instead of spawning one. */ + baseUrl?: string + /** Hostname for the spawned server. Defaults to the SDK default. */ + hostname?: string + /** Port for the spawned server. Defaults to the SDK default. */ + port?: number + /** Working directory for the session (absolute path). */ + directory: string + /** Provider id (the part before `/` in the model id). */ + providerID: string + /** Model id (the part after `/` in the model id). */ + modelID: string + /** Extra OpenCode config merged with the adapter's mcp/permission config. */ + config?: Config + /** Baseline permission policy applied to the spawned server. */ + permission?: Config['permission'] + /** MCP servers (e.g. the TanStack tool bridge) for the session. */ + mcpServers?: Array<{ name: string; url: string }> + /** Session id to resume; falls back to a fresh session when not found. */ + resumeSessionId?: string + onEvent: (event: OpencodeEvent) => void + onPermissionRequest: ( + request: OpencodePermissionRequest, + ) => Promise | OpencodePermissionResponse + /** Called when the event subscription fails mid-turn. */ + onError?: (error: unknown) => void +} + +/** Locate the session id an OpenCode event belongs to, when it carries one. */ +function sessionIdOf(event: Event): string | undefined { + const props = event.properties as { sessionID?: string } | undefined + if (props?.sessionID !== undefined) return props.sessionID + if (event.type === 'message.part.updated') { + return event.properties.part.sessionID + } + if (event.type === 'message.updated') { + return event.properties.info.sessionID + } + if (event.type === 'permission.updated') { + return event.properties.sessionID + } + return undefined +} + +function buildConfig(options: StartOpencodeSessionOptions): Config { + const mcp: NonNullable = { ...options.config?.mcp } + for (const server of options.mcpServers ?? []) { + mcp[server.name] = { type: 'remote', url: server.url, enabled: true } + } + return { + ...options.config, + ...(Object.keys(mcp).length > 0 && { mcp }), + ...(options.permission !== undefined && { permission: options.permission }), + } +} + +/** + * Boot (or attach to) an OpenCode HTTP server, resolve a session, and wire its + * event subscription + permission replies. + * + * This module is the only place that touches `@opencode-ai/sdk`; the rest of + * the package works with the structural types in `sdk-types.ts`. + * + * Resume semantics: when `resumeSessionId` is set and the server still knows + * the session (same machine, same data dir), it is reused. Otherwise a fresh + * session is created and `resumed: false` tells the adapter to send the + * flattened transcript. + */ +export async function startOpencodeSession( + options: StartOpencodeSessionOptions, +): Promise { + const { directory } = options + + let client: OpencodeClient + let ownedServer: { close: () => void } | undefined + + if (options.baseUrl !== undefined) { + client = createOpencodeClient({ baseUrl: options.baseUrl, directory }) + } else { + const config = buildConfig(options) + const result = await createOpencode({ + ...(options.hostname !== undefined && { hostname: options.hostname }), + ...(options.port !== undefined && { port: options.port }), + ...(Object.keys(config).length > 0 && { config }), + }) + client = result.client + ownedServer = result.server + } + + // Mutated from several closures (the subscription loop, dispose, teardown); + // a holder object keeps reads typed as `boolean` rather than being + // flow-narrowed to a literal across those boundaries. + const lifecycle = { disposed: false } + + const teardown = async (): Promise => { + if (lifecycle.disposed) return + lifecycle.disposed = true + ownedServer?.close() + await Promise.resolve() + } + + try { + // Resolve the session before subscribing so the event filter has an id. + let sessionId: string | undefined + let resumed = false + if (options.resumeSessionId !== undefined) { + const existing = await client.session.get({ + path: { id: options.resumeSessionId }, + query: { directory }, + }) + if (existing.data) { + sessionId = options.resumeSessionId + resumed = true + } + } + if (sessionId === undefined) { + const created = await client.session.create({ + query: { directory }, + body: {}, + throwOnError: true, + }) + sessionId = created.data.id + } + const resolvedSessionId = sessionId + + const handlePermission = async ( + permission: Extract< + Event, + { type: 'permission.updated' } + >['properties'], + ): Promise => { + try { + const response = await options.onPermissionRequest({ + id: permission.id, + sessionID: permission.sessionID, + type: permission.type, + title: permission.title, + ...(permission.callID !== undefined && { callID: permission.callID }), + }) + await client.postSessionIdPermissionsPermissionId({ + path: { id: permission.sessionID, permissionID: permission.id }, + query: { directory }, + body: { response }, + throwOnError: true, + }) + } catch (error) { + if (!lifecycle.disposed) options.onError?.(error) + } + } + + const subscription = await client.event.subscribe() + const stream = subscription.stream + + void (async () => { + try { + for await (const event of stream) { + if (lifecycle.disposed) break + const sid = sessionIdOf(event) + if (sid !== undefined && sid !== resolvedSessionId) continue + if (event.type === 'permission.updated') { + void handlePermission(event.properties) + continue + } + // The SDK event union is a structural superset of the subset the + // translator consumes; unknown event types match no translator + // branch and are ignored. + options.onEvent(event as OpencodeEvent) + } + } catch (error) { + if (!lifecycle.disposed) options.onError?.(error) + } + })() + + return { + sessionId: resolvedSessionId, + resumed, + prompt: async (text: string) => { + const result = await client.session.prompt({ + path: { id: resolvedSessionId }, + query: { directory }, + body: { + model: { providerID: options.providerID, modelID: options.modelID }, + parts: [{ type: 'text', text }], + }, + throwOnError: true, + }) + const data = result.data + const message = data.info as OpencodeAssistantMessage + const responseText = data.parts + .filter((part): part is Extract => + part.type === 'text', + ) + .map((part) => part.text) + .join('') + return { message, text: responseText } + }, + abort: async () => { + try { + await client.session.abort({ + path: { id: resolvedSessionId }, + query: { directory }, + }) + } catch { + // Best-effort: the turn may already be finishing. + } + }, + dispose: async () => { + lifecycle.disposed = true + try { + await stream.return(undefined) + } catch { + // Ignore: stream may already be closed. + } + ownedServer?.close() + }, + } + } catch (error) { + await teardown() + throw error + } +} diff --git a/packages/ai-opencode/src/provider-options.ts b/packages/ai-opencode/src/provider-options.ts new file mode 100644 index 000000000..79c65e3f5 --- /dev/null +++ b/packages/ai-opencode/src/provider-options.ts @@ -0,0 +1,19 @@ +import type { OpencodePermissionMode } from './process/permissions' + +/** + * Per-call provider options for the OpenCode adapter, passed via + * `modelOptions` on `chat()`. + */ +export interface OpencodeTextProviderOptions { + /** + * Resume an existing OpenCode session. The adapter emits the session id of + * every fresh run via a CUSTOM `opencode.session-id` stream event; thread + * it back here to continue that session (only the latest user message is + * sent — the harness already holds the prior context). + */ + sessionId?: string + /** Per-call override of the configured permission mode. */ + permissionMode?: OpencodePermissionMode + /** Per-call override of the harness working directory. */ + directory?: string +} diff --git a/packages/ai-opencode/src/stream/queue.ts b/packages/ai-opencode/src/stream/queue.ts new file mode 100644 index 000000000..f0f37c5e9 --- /dev/null +++ b/packages/ai-opencode/src/stream/queue.ts @@ -0,0 +1,64 @@ +/** + * Minimal promise-based async queue bridging the OpenCode event + * subscription's callback-style notifications into the async-iterable world + * the stream translator consumes. + */ +export class AsyncQueue implements AsyncIterable { + private readonly values: Array = [] + private readonly waiters: Array<{ + resolve: (result: IteratorResult) => void + reject: (error: unknown) => void + }> = [] + private ended = false + private error: unknown = undefined + private failed = false + + push(value: T): void { + if (this.ended || this.failed) return + const waiter = this.waiters.shift() + if (waiter) { + waiter.resolve({ value, done: false }) + } else { + this.values.push(value) + } + } + + /** Signal normal completion; pending and future reads resolve as done. */ + end(): void { + if (this.ended || this.failed) return + this.ended = true + for (const waiter of this.waiters.splice(0)) { + waiter.resolve({ value: undefined, done: true }) + } + } + + /** Signal failure; pending and future reads reject (after buffered values drain). */ + fail(error: unknown): void { + if (this.ended || this.failed) return + this.failed = true + this.error = error + for (const waiter of this.waiters.splice(0)) { + waiter.reject(error) + } + } + + [Symbol.asyncIterator](): AsyncIterator { + return { + next: (): Promise> => { + if (this.values.length > 0) { + return Promise.resolve({ + value: this.values.shift() as T, + done: false, + }) + } + if (this.failed) return Promise.reject(this.error) + if (this.ended) { + return Promise.resolve({ value: undefined, done: true }) + } + return new Promise((resolve, reject) => { + this.waiters.push({ resolve, reject }) + }) + }, + } + } +} diff --git a/packages/ai-opencode/src/stream/sdk-types.ts b/packages/ai-opencode/src/stream/sdk-types.ts new file mode 100644 index 000000000..8deb3ce87 --- /dev/null +++ b/packages/ai-opencode/src/stream/sdk-types.ts @@ -0,0 +1,104 @@ +/** + * Structural subset of the `@opencode-ai/sdk` event types that the stream + * translator consumes. + * + * These are intentionally defined structurally (rather than imported from the + * OpenCode SDK) so the translator stays a pure, fixture-testable state machine + * and the package's public types don't depend on the SDK's generated schema + * types. Unknown part or event types fall through every branch at runtime. + */ + +export interface OpencodeTokens { + input?: number + output?: number + reasoning?: number + cache?: { read?: number; write?: number } +} + +/** Error payload attached to a failed assistant message. */ +export interface OpencodeMessageError { + name: string + data?: { message?: string } +} + +/** + * The final assistant message of a turn, returned by the blocking prompt + * call. Carries the finish reason, token usage, and any fatal error. + */ +export interface OpencodeAssistantMessage { + id: string + role: 'assistant' + finish?: string + error?: OpencodeMessageError + tokens?: OpencodeTokens + cost?: number +} + +export type OpencodeToolState = + | { status: 'pending'; input?: Record } + | { + status: 'running' + input?: Record + title?: string + } + | { + status: 'completed' + input?: Record + output: string + title?: string + } + | { status: 'error'; input?: Record; error: string } + +/** + * The OpenCode message-part kinds the translator understands. The trailing + * catch-all member keeps the union open to other kinds (file, step-start, + * step-finish, snapshot, patch, agent, ...); the translator dispatches via + * the `is*Part` type guards, so those kinds simply match no guard. + */ +export type OpencodePart = + | { id: string; sessionID?: string; type: 'text'; text: string } + | { id: string; sessionID?: string; type: 'reasoning'; text: string } + | { + id: string + sessionID?: string + type: 'tool' + callID: string + tool: string + state: OpencodeToolState + } + | { id: string; sessionID?: string; type: string } + +/** + * The OpenCode events the translator understands. This is a closed + * discriminated union (so `event.type` narrows cleanly); the server forwards + * raw SDK events cast to this type, and any event whose `type` isn't listed + * here simply matches no branch and is ignored at runtime. + */ +export type OpencodeEvent = + | { + type: 'message.part.updated' + properties: { part: OpencodePart; delta?: string } + } + | { + type: 'message.updated' + properties: { info: { sessionID?: string } } + } + | { type: 'session.idle'; properties: { sessionID: string } } + | { + type: 'session.error' + properties: { sessionID?: string; error?: OpencodeMessageError } + } + | { + type: 'todo.updated' + properties: { sessionID: string; todos: Array } + } + +/** + * Events fed to the translator: the session id once established, every + * session-scoped OpenCode event, and a terminal `done` carrying the final + * assistant message (the adapter's async queue produces these). + */ +export type OpencodeStreamEvent = + | { kind: 'session'; sessionId: string } + | { kind: 'event'; event: OpencodeEvent } + | { kind: 'done'; message: OpencodeAssistantMessage } diff --git a/packages/ai-opencode/src/stream/translate.ts b/packages/ai-opencode/src/stream/translate.ts new file mode 100644 index 000000000..3c679fb9e --- /dev/null +++ b/packages/ai-opencode/src/stream/translate.ts @@ -0,0 +1,417 @@ +import { EventType, buildBaseUsage } from '@tanstack/ai' +import type { StreamChunk, TokenUsage } from '@tanstack/ai' +import type { + OpencodeAssistantMessage, + OpencodeEvent, + OpencodePart, + OpencodeStreamEvent, + OpencodeTokens, +} from './sdk-types' + +/** Name of the CUSTOM event carrying the OpenCode session id. */ +export const SESSION_ID_EVENT = 'opencode.session-id' + +/** Name of the CUSTOM event carrying the harness's todo list updates. */ +export const TODO_EVENT = 'opencode.todo' + +/** Server name used for bridged TanStack tools. */ +export const BRIDGED_MCP_SERVER_NAME = 'tanstack' + +export interface TranslateContext { + model: string + runId: string + threadId: string + parentRunId?: string + genId: () => string + /** + * Names of bridged TanStack tools, used to surface the harness's MCP tool + * calls under the names the application registered. + */ + bridgedToolNames?: ReadonlySet + /** Called for each raw stream event, for logging. */ + onStreamEvent?: (event: OpencodeStreamEvent) => void +} + +/** + * Resolve the AG-UI tool-call name for an OpenCode tool part. OpenCode names + * MCP tools `_`, so bridged TanStack tools arrive as + * `tanstack_` and are surfaced under the names the application + * registered; everything else (built-in `read`, `edit`, `bash`, ... and + * foreign MCP tools) uses the harness tool name verbatim. + */ +export function resolveToolName( + tool: string, + bridgedToolNames: ReadonlySet | undefined, +): string { + if (!bridgedToolNames || bridgedToolNames.size === 0) return tool + if (bridgedToolNames.has(tool)) return tool + if (tool.startsWith('tanstack_') && bridgedToolNames.has(tool.slice(9))) { + return tool.slice(9) + } + return tool +} + +function buildUsage(tokens: OpencodeTokens | undefined): TokenUsage | undefined { + if (!tokens) return undefined + const promptTokens = tokens.input ?? 0 + const completionTokens = tokens.output ?? 0 + const result = buildBaseUsage({ + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }) + if (tokens.cache?.read) { + result.promptTokensDetails = { cachedTokens: tokens.cache.read } + } + if (tokens.reasoning) { + result.completionTokensDetails = { reasoningTokens: tokens.reasoning } + } + return result +} + +type TextPart = Extract +type ReasoningPart = Extract +type ToolPart = Extract + +const isTextPart = (part: OpencodePart): part is TextPart => + part.type === 'text' +const isReasoningPart = (part: OpencodePart): part is ReasoningPart => + part.type === 'reasoning' +const isToolPart = (part: OpencodePart): part is ToolPart => + part.type === 'tool' + +function messageError( + message: OpencodeAssistantMessage, +): { message: string } | undefined { + if (!message.error) return undefined + return { message: message.error.data?.message ?? message.error.name } +} + +/** + * Translate an OpenCode event stream into AG-UI StreamChunk events. + * + * The harness runs its own agent loop and executes its own tools, so the + * translation always ends with `finishReason: 'stop'` (or `'length'` / + * RUN_ERROR) — never `'tool_calls'`. Harness tool activity is emitted as + * already-resolved TOOL_CALL_START/ARGS/END + TOOL_CALL_RESULT sequences so + * UIs can render it, while the TanStack engine never tries to execute them. + * + * OpenCode delivers true token-level deltas for both assistant text and + * reasoning via `message.part.updated` events (a `delta` string when + * incremental, otherwise the full part text, from which the delta is + * derived). The final assistant message — finish reason, token usage, and any + * fatal error — arrives as the terminal `done` event. + * + * Invariant: every TOOL_CALL_START is eventually paired with a + * TOOL_CALL_RESULT (synthesized as `{"status":"interrupted"}` when the run + * ends or aborts before the harness reported one) so the engine's + * pending-tool-call scan on the next request never force-executes them. + */ +export async function* translateOpencodeStream( + events: AsyncIterable, + ctx: TranslateContext, +): AsyncIterable { + const { model, runId, threadId, genId } = ctx + const now = () => Date.now() + + let runStarted = false + /** Tool calls started but with no result yet, keyed by callID. */ + const unresolvedToolCalls = new Set() + /** Tool call ids that already emitted TOOL_CALL_START/ARGS/END. */ + const openedToolCalls = new Set() + /** Tool call ids that already emitted a TOOL_CALL_RESULT. */ + const resolvedToolCalls = new Set() + + /** Accumulated text per text-part id, for delta derivation. */ + const textAccumulators = new Map() + let openTextId: string | null = null + let openReasoningId: string | null = null + + function* startRun(): Generator { + if (runStarted) return + runStarted = true + yield { + type: EventType.RUN_STARTED, + runId, + threadId, + model, + timestamp: now(), + ...(ctx.parentRunId !== undefined && { parentRunId: ctx.parentRunId }), + } + } + + function* closeText(): Generator { + if (openTextId !== null) { + yield { + type: EventType.TEXT_MESSAGE_END, + messageId: openTextId, + model, + timestamp: now(), + } + openTextId = null + } + } + + function* closeReasoning(): Generator { + if (openReasoningId !== null) { + yield { + type: EventType.REASONING_MESSAGE_END, + messageId: openReasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_END, + messageId: openReasoningId, + model, + timestamp: now(), + } + openReasoningId = null + } + } + + function* synthesizeUnresolvedResults(): Generator { + for (const toolCallId of unresolvedToolCalls) { + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId, + messageId: genId(), + model, + timestamp: now(), + content: JSON.stringify({ status: 'interrupted' }), + } + } + unresolvedToolCalls.clear() + } + + function* handleTextPart( + part: Extract, + delta: string | undefined, + ): Generator { + yield* closeReasoning() + + const prev = textAccumulators.get(part.id) ?? '' + let deltaText: string + if (typeof delta === 'string' && delta !== '') { + deltaText = delta + textAccumulators.set(part.id, prev + delta) + } else { + const full = part.text + deltaText = full.startsWith(prev) ? full.slice(prev.length) : full + textAccumulators.set(part.id, full) + } + if (deltaText === '') return + + if (openTextId !== part.id) { + yield* closeText() + openTextId = part.id + yield { + type: EventType.TEXT_MESSAGE_START, + messageId: part.id, + model, + timestamp: now(), + role: 'assistant', + } + } + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId: part.id, + model, + timestamp: now(), + delta: deltaText, + content: textAccumulators.get(part.id) ?? deltaText, + } + } + + function* handleReasoningPart( + part: Extract, + delta: string | undefined, + ): Generator { + yield* closeText() + + const prev = textAccumulators.get(part.id) ?? '' + let deltaText: string + if (typeof delta === 'string' && delta !== '') { + deltaText = delta + textAccumulators.set(part.id, prev + delta) + } else { + const full = part.text + deltaText = full.startsWith(prev) ? full.slice(prev.length) : full + textAccumulators.set(part.id, full) + } + if (deltaText === '') return + + if (openReasoningId !== part.id) { + yield* closeReasoning() + openReasoningId = part.id + yield { + type: EventType.REASONING_START, + messageId: part.id, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_START, + messageId: part.id, + role: 'reasoning' as const, + model, + timestamp: now(), + } + } + yield { + type: EventType.REASONING_MESSAGE_CONTENT, + messageId: part.id, + delta: deltaText, + model, + timestamp: now(), + } + } + + function* openToolCall( + part: Extract, + ): Generator { + if (openedToolCalls.has(part.callID)) return + openedToolCalls.add(part.callID) + const toolCallName = resolveToolName(part.tool, ctx.bridgedToolNames) + const input = part.state.input ?? {} + const args = JSON.stringify(input) + yield { + type: EventType.TOOL_CALL_START, + toolCallId: part.callID, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + } + yield { + type: EventType.TOOL_CALL_ARGS, + toolCallId: part.callID, + model, + timestamp: now(), + delta: args, + args, + } + yield { + type: EventType.TOOL_CALL_END, + toolCallId: part.callID, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + input, + } + unresolvedToolCalls.add(part.callID) + } + + function* handleToolPart( + part: Extract, + ): Generator { + yield* closeText() + yield* closeReasoning() + yield* openToolCall(part) + + const state = part.state + if (state.status !== 'completed' && state.status !== 'error') return + if (resolvedToolCalls.has(part.callID)) return + resolvedToolCalls.add(part.callID) + unresolvedToolCalls.delete(part.callID) + + const isError = state.status === 'error' + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId: part.callID, + messageId: genId(), + model, + timestamp: now(), + content: isError ? state.error : state.output, + ...(isError && { state: 'output-error' as const }), + } + } + + function* handleEvent(event: OpencodeEvent): Generator { + if (event.type === 'message.part.updated') { + const { part, delta } = event.properties + if (isTextPart(part)) { + yield* handleTextPart(part, delta) + } else if (isReasoningPart(part)) { + yield* handleReasoningPart(part, delta) + } else if (isToolPart(part)) { + yield* handleToolPart(part) + } + // Other part kinds (file, step-start/finish, snapshot, ...) carry no + // state the chunk stream needs. + } else if (event.type === 'todo.updated') { + yield { + type: EventType.CUSTOM, + model, + timestamp: now(), + name: TODO_EVENT, + value: { todos: event.properties.todos }, + } + } + // session.idle / session.status / message.updated are redundant with the + // terminal `done` event and are ignored. + } + + function* finish(message: OpencodeAssistantMessage): Generator { + yield* startRun() + yield* closeText() + yield* closeReasoning() + yield* synthesizeUnresolvedResults() + + const error = messageError(message) + if (error) { + yield { + type: EventType.RUN_ERROR, + model, + timestamp: now(), + message: error.message, + error, + } + return + } + + const usage = buildUsage(message.tokens) + const finishReason = message.finish === 'length' ? 'length' : 'stop' + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + timestamp: now(), + finishReason, + ...(usage !== undefined && { usage }), + } + } + + try { + for await (const streamEvent of events) { + ctx.onStreamEvent?.(streamEvent) + + if (streamEvent.kind === 'session') { + yield* startRun() + yield { + type: EventType.CUSTOM, + model, + timestamp: now(), + name: SESSION_ID_EVENT, + value: { sessionId: streamEvent.sessionId }, + } + } else if (streamEvent.kind === 'event') { + yield* startRun() + yield* handleEvent(streamEvent.event) + } else { + yield* finish(streamEvent.message) + } + } + } catch (error) { + // The run is dying (abort, server exit, or connection failure). Close any + // open message and pair started tool calls with a synthetic result first + // so the next request's pending-tool-call scan doesn't try to execute + // them, then let the adapter surface the error as RUN_ERROR. + yield* closeText() + yield* closeReasoning() + yield* synthesizeUnresolvedResults() + throw error + } +} diff --git a/packages/ai-opencode/src/tools/bridge.ts b/packages/ai-opencode/src/tools/bridge.ts new file mode 100644 index 000000000..01e6296ec --- /dev/null +++ b/packages/ai-opencode/src/tools/bridge.ts @@ -0,0 +1,129 @@ +import { createServer } from 'node:http' +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js' +import { + CallToolRequestSchema, + ListToolsRequestSchema, +} from '@modelcontextprotocol/sdk/types.js' +import { BRIDGED_MCP_SERVER_NAME } from '../stream/translate' +import type { AddressInfo } from 'node:net' +import type { AnyTool } from '@tanstack/ai' + +/** A running localhost MCP server exposing TanStack tools to the harness. */ +export interface ToolBridgeHandle { + /** Streamable-HTTP MCP endpoint, e.g. `http://127.0.0.1:54321/mcp`. */ + url: string + /** Stop the HTTP server and drop any open connections. */ + close: () => Promise +} + +function createMcpServer(tools: Array): McpServer { + const instance = new McpServer( + { name: BRIDGED_MCP_SERVER_NAME, version: '1.0.0' }, + { capabilities: { tools: {} } }, + ) + + const toolsByName = new Map(tools.map((tool) => [tool.name, tool])) + + instance.server.setRequestHandler(ListToolsRequestSchema, () => ({ + tools: tools.map((tool) => ({ + name: tool.name, + description: tool.description, + inputSchema: (tool.inputSchema ?? { + type: 'object', + properties: {}, + }) as { type: 'object'; [key: string]: unknown }, + })), + })) + + instance.server.setRequestHandler(CallToolRequestSchema, async (request) => { + const tool = toolsByName.get(request.params.name) + if (!tool?.execute) { + throw new Error(`Unknown tool: ${request.params.name}`) + } + try { + const result: unknown = await tool.execute(request.params.arguments ?? {}) + const text = typeof result === 'string' ? result : JSON.stringify(result) + return { content: [{ type: 'text', text }] } + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + return { + isError: true, + content: [{ type: 'text', text: `Tool execution failed: ${message}` }], + } + } + }) + + return instance +} + +/** + * Expose TanStack tools to the OpenCode harness as a Streamable-HTTP MCP + * server on an ephemeral localhost port. + * + * OpenCode runs as a separate server process, so the bridge listens on + * `127.0.0.1` and the adapter registers it as a `remote` MCP server in the + * harness config (`mcp.tanstack.url`). Each request is handled statelessly + * with a fresh `McpServer` + transport pair, which is all the harness's + * list/call traffic needs. + * + * The engine has already converted each tool's schema to JSON Schema before + * the adapter sees it, and JSON Schema is exactly what MCP's `tools/list` + * wants — so the low-level request handlers pass schemas through verbatim + * instead of round-tripping them through zod. + * + * The caller owns the lifecycle: `close()` must run when the chat stream + * ends (the adapter does this in a `finally`) so the port is never leaked. + */ +export async function startToolBridge( + tools: Array, +): Promise { + const httpServer = createServer((req, res) => { + void (async () => { + if (req.method !== 'POST') { + res.writeHead(405).end() + return + } + const chunks: Array = [] + for await (const chunk of req) { + chunks.push(chunk as Buffer) + } + let parsedBody: unknown + try { + parsedBody = JSON.parse(Buffer.concat(chunks).toString('utf8')) + } catch { + res.writeHead(400).end() + return + } + const mcpServer = createMcpServer(tools) + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: undefined, + }) + res.on('close', () => { + void transport.close() + void mcpServer.close() + }) + await mcpServer.connect(transport) + await transport.handleRequest(req, res, parsedBody) + })().catch(() => { + if (!res.headersSent) res.writeHead(500) + res.end() + }) + }) + + await new Promise((resolve, reject) => { + httpServer.once('error', reject) + httpServer.listen(0, '127.0.0.1', resolve) + }) + + const { port } = httpServer.address() as AddressInfo + + return { + url: `http://127.0.0.1:${port}/mcp`, + close: () => + new Promise((resolve, reject) => { + httpServer.closeAllConnections() + httpServer.close((error) => (error ? reject(error) : resolve())) + }), + } +} diff --git a/packages/ai-opencode/tests/bridge.test.ts b/packages/ai-opencode/tests/bridge.test.ts new file mode 100644 index 000000000..987efbc3d --- /dev/null +++ b/packages/ai-opencode/tests/bridge.test.ts @@ -0,0 +1,105 @@ +import { describe, expect, it } from 'vitest' +import { Client } from '@modelcontextprotocol/sdk/client/index.js' +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js' +import { startToolBridge } from '../src/tools/bridge' +import type { AnyTool } from '@tanstack/ai' + +function makeTool(overrides: Partial = {}): AnyTool { + return { + name: 'echo', + description: 'Echo the input back', + inputSchema: { + type: 'object', + properties: { value: { type: 'string' } }, + }, + execute: async (args: unknown) => args, + ...overrides, + } as unknown as AnyTool +} + +async function connectClient(url: string): Promise { + const client = new Client({ name: 'test-client', version: '1.0.0' }) + await client.connect(new StreamableHTTPClientTransport(new URL(url))) + return client +} + +describe('startToolBridge', () => { + it('listens on an ephemeral localhost port', async () => { + const bridge = await startToolBridge([makeTool()]) + try { + expect(bridge.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+\/mcp$/) + } finally { + await bridge.close() + } + }) + + it('lists tools with their JSON schemas passed through verbatim', async () => { + const bridge = await startToolBridge([makeTool()]) + try { + const client = await connectClient(bridge.url) + const { tools } = await client.listTools() + expect(tools).toHaveLength(1) + expect(tools[0]).toMatchObject({ + name: 'echo', + description: 'Echo the input back', + inputSchema: { + type: 'object', + properties: { value: { type: 'string' } }, + }, + }) + await client.close() + } finally { + await bridge.close() + } + }) + + it('executes tool calls and returns stringified results', async () => { + const bridge = await startToolBridge([ + makeTool({ + execute: async (args: unknown) => ({ + echoed: (args as { value: string }).value, + }), + } as Partial), + ]) + try { + const client = await connectClient(bridge.url) + const result = await client.callTool({ + name: 'echo', + arguments: { value: 'hi' }, + }) + expect(result.content).toEqual([ + { type: 'text', text: JSON.stringify({ echoed: 'hi' }) }, + ]) + await client.close() + } finally { + await bridge.close() + } + }) + + it('returns isError content when the tool throws', async () => { + const bridge = await startToolBridge([ + makeTool({ + execute: async () => { + throw new Error('tool blew up') + }, + } as Partial), + ]) + try { + const client = await connectClient(bridge.url) + const result = await client.callTool({ name: 'echo', arguments: {} }) + expect(result.isError).toBe(true) + expect(result.content).toEqual([ + { type: 'text', text: 'Tool execution failed: tool blew up' }, + ]) + await client.close() + } finally { + await bridge.close() + } + }) + + it('refuses connections after close()', async () => { + const bridge = await startToolBridge([makeTool()]) + await bridge.close() + await expect(connectClient(bridge.url)).rejects.toThrow() + }) +}) diff --git a/packages/ai-opencode/tests/permissions.test.ts b/packages/ai-opencode/tests/permissions.test.ts new file mode 100644 index 000000000..de5e0dd0f --- /dev/null +++ b/packages/ai-opencode/tests/permissions.test.ts @@ -0,0 +1,102 @@ +import { describe, expect, it } from 'vitest' +import { + matchBridgedToolName, + resolvePermission, +} from '../src/process/permissions' +import type { OpencodePermissionRequest } from '../src/process/permissions' + +function request( + overrides: Partial = {}, +): OpencodePermissionRequest { + return { + id: 'perm-1', + sessionID: 'sess-1', + type: 'bash', + title: 'Run a command', + ...overrides, + } +} + +describe('matchBridgedToolName', () => { + const bridged = new Set(['lookup_user']) + + it('returns false without bridged tools', () => { + expect(matchBridgedToolName(request({ type: 'lookup_user' }), undefined)).toBe( + false, + ) + expect(matchBridgedToolName(request({ type: 'lookup_user' }), new Set())).toBe( + false, + ) + }) + + it('matches a bare registered tool name in type or title', () => { + expect(matchBridgedToolName(request({ type: 'lookup_user' }), bridged)).toBe( + true, + ) + expect( + matchBridgedToolName( + request({ type: 'tool', title: 'lookup_user' }), + bridged, + ), + ).toBe(true) + }) + + it('matches the tanstack_ and tanstack. server prefixes', () => { + expect( + matchBridgedToolName(request({ type: 'tanstack_lookup_user' }), bridged), + ).toBe(true) + expect( + matchBridgedToolName(request({ type: 'tanstack.lookup_user' }), bridged), + ).toBe(true) + }) + + it('does not match foreign tools', () => { + expect( + matchBridgedToolName(request({ type: 'github_create_issue' }), bridged), + ).toBe(false) + }) +}) + +describe('resolvePermission', () => { + const bridged = new Set(['lookup_user']) + + it('always allows bridged tools regardless of mode', () => { + for (const mode of ['default', 'acceptEdits', 'bypassPermissions'] as const) { + expect( + resolvePermission(request({ type: 'tanstack_lookup_user' }), mode, bridged), + ).toBe('once') + } + }) + + it('rejects everything else in default mode', () => { + expect(resolvePermission(request({ type: 'bash' }), 'default', bridged)).toBe( + 'reject', + ) + expect(resolvePermission(request({ type: 'edit' }), 'default', bridged)).toBe( + 'reject', + ) + expect( + resolvePermission(request({ type: 'webfetch' }), 'default', bridged), + ).toBe('reject') + }) + + it('auto-approves file mutations only in acceptEdits mode', () => { + for (const type of ['edit', 'write', 'patch']) { + expect(resolvePermission(request({ type }), 'acceptEdits', bridged)).toBe( + 'once', + ) + } + expect(resolvePermission(request({ type: 'bash' }), 'acceptEdits', bridged)).toBe( + 'reject', + ) + }) + + it('approves everything in bypassPermissions mode', () => { + expect( + resolvePermission(request({ type: 'bash' }), 'bypassPermissions', bridged), + ).toBe('once') + expect( + resolvePermission(request({ type: 'webfetch' }), 'bypassPermissions', undefined), + ).toBe('once') + }) +}) diff --git a/packages/ai-opencode/tests/prompt.test.ts b/packages/ai-opencode/tests/prompt.test.ts new file mode 100644 index 000000000..74ced6564 --- /dev/null +++ b/packages/ai-opencode/tests/prompt.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it } from 'vitest' +import { buildPrompt } from '../src/messages/prompt' +import type { ModelMessage } from '@tanstack/ai' + +const user = (content: ModelMessage['content']): ModelMessage => ({ + role: 'user', + content, +}) +const assistant = (content: ModelMessage['content']): ModelMessage => ({ + role: 'assistant', + content, +}) + +describe('buildPrompt', () => { + it('resumes with only the last user message when sessionId is provided', () => { + const result = buildPrompt( + [ + user('first question'), + assistant('first answer'), + user('follow-up question'), + ], + 'sess-1', + ) + expect(result).toEqual({ prompt: 'follow-up question', resume: 'sess-1' }) + }) + + it('throws when sessionId is provided but there is no trailing user message', () => { + expect(() => buildPrompt([user('q'), assistant('a')], 'sess-1')).toThrow( + /user message/i, + ) + }) + + it('sends a single user message as-is for a fresh session', () => { + expect(buildPrompt([user('hello')], undefined)).toEqual({ prompt: 'hello' }) + }) + + it('flattens prior turns into a transcript preamble for fresh multi-turn history', () => { + const { prompt, resume } = buildPrompt( + [user('What is 2+2?'), assistant('4'), user('And times 3?')], + undefined, + ) + expect(resume).toBeUndefined() + expect(prompt).toBe( + 'Previous conversation:\nUser: What is 2+2?\nAssistant: 4\n\nAnd times 3?', + ) + }) + + it('skips tool messages and assistant tool-call-only turns when flattening', () => { + const messages: Array = [ + user('list files'), + { + role: 'assistant', + content: null, + toolCalls: [ + { id: 't1', type: 'function', function: { name: 'ls', arguments: '{}' } }, + ], + } as unknown as ModelMessage, + { role: 'tool', content: 'file-a', toolCallId: 't1' }, + assistant('There is one file.'), + user('thanks, which one?'), + ] + const { prompt } = buildPrompt(messages, undefined) + expect(prompt).toBe( + 'Previous conversation:\nUser: list files\nAssistant: There is one file.\n\nthanks, which one?', + ) + }) + + it('extracts text from content-part arrays and ignores non-text parts', () => { + const { prompt } = buildPrompt( + [ + user([ + { type: 'text', content: 'describe ' }, + { + type: 'image', + source: { type: 'url', url: 'https://x/y.png' }, + } as never, + { type: 'text', content: 'this' }, + ] as ModelMessage['content']), + ], + undefined, + ) + expect(prompt).toBe('describe this') + }) + + it('throws when there is no usable user content at all', () => { + expect(() => buildPrompt([], undefined)).toThrow(/user message/i) + }) +}) diff --git a/packages/ai-opencode/tests/text-adapter.test.ts b/packages/ai-opencode/tests/text-adapter.test.ts new file mode 100644 index 000000000..e3267daa4 --- /dev/null +++ b/packages/ai-opencode/tests/text-adapter.test.ts @@ -0,0 +1,402 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { opencodeText } from '../src/adapters/text' +import { startOpencodeSession } from '../src/process/server' +import { startToolBridge } from '../src/tools/bridge' +import type { + OpencodeAssistantMessage, + OpencodeEvent, +} from '../src/stream/sdk-types' +import type { StartOpencodeSessionOptions } from '../src/process/server' +import type { InternalLogger } from '@tanstack/ai/adapter-internals' +import type { StreamChunk, TextOptions } from '@tanstack/ai' + +vi.mock('../src/process/server', () => ({ startOpencodeSession: vi.fn() })) +vi.mock('../src/tools/bridge', () => ({ startToolBridge: vi.fn() })) + +const startSessionMock = vi.mocked(startOpencodeSession) +const bridgeMock = vi.mocked(startToolBridge) +const promptMock = vi.fn() +const abortMock = vi.fn() +const disposeMock = vi.fn() +const bridgeCloseMock = vi.fn() + +let captured: StartOpencodeSessionOptions | undefined + +const MODEL = 'anthropic/claude-sonnet-4-5' + +function textTurn(text = 'hi there'): { + message: OpencodeAssistantMessage + text: string +} { + captured?.onEvent({ + type: 'message.part.updated', + properties: { + part: { id: 'p1', sessionID: 's', type: 'text', text }, + delta: text, + }, + } as OpencodeEvent) + return { + message: { + id: 'm1', + role: 'assistant', + finish: 'stop', + tokens: { input: 10, output: 5 }, + }, + text, + } +} + +const noopLogger = { + request: vi.fn(), + provider: vi.fn(), + output: vi.fn(), + errors: vi.fn(), + middleware: vi.fn(), + tools: vi.fn(), + agentLoop: vi.fn(), + config: vi.fn(), + isEnabled: () => false, +} as unknown as InternalLogger + +function makeOptions( + overrides: Partial>> = {}, +): TextOptions> { + return { + model: MODEL, + messages: [{ role: 'user', content: 'hello' }], + logger: noopLogger, + ...overrides, + } as TextOptions> +} + +async function collect( + stream: AsyncIterable, +): Promise> { + const chunks: Array = [] + for await (const chunk of stream) chunks.push(chunk) + return chunks +} + +beforeEach(() => { + captured = undefined + startSessionMock.mockReset() + bridgeMock.mockReset() + promptMock.mockReset() + abortMock.mockReset() + disposeMock.mockReset() + bridgeCloseMock.mockReset() + + abortMock.mockResolvedValue(undefined) + disposeMock.mockResolvedValue(undefined) + bridgeCloseMock.mockResolvedValue(undefined) + bridgeMock.mockResolvedValue({ + url: 'http://127.0.0.1:54321/mcp', + close: bridgeCloseMock, + }) + promptMock.mockImplementation(() => Promise.resolve(textTurn())) + + startSessionMock.mockImplementation( + async (options: StartOpencodeSessionOptions) => { + captured = options + return { + sessionId: options.resumeSessionId ?? 'sess-new', + resumed: options.resumeSessionId !== undefined, + prompt: promptMock, + abort: abortMock, + dispose: disposeMock, + } + }, + ) +}) + +describe('opencodeText', () => { + it('creates an adapter with the opencode provider name', () => { + const adapter = opencodeText(MODEL) + expect(adapter.kind).toBe('text') + expect(adapter.name).toBe('opencode') + expect(adapter.model).toBe(MODEL) + }) +}) + +describe('chatStream', () => { + it('streams translated AG-UI events for a simple turn', async () => { + const chunks = await collect(opencodeText(MODEL).chatStream(makeOptions())) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks.at(-1)).toMatchObject({ finishReason: 'stop' }) + }) + + it('splits the provider/model id for the session', async () => { + await collect(opencodeText(MODEL).chatStream(makeOptions())) + expect(captured).toMatchObject({ + providerID: 'anthropic', + modelID: 'claude-sonnet-4-5', + }) + }) + + it('rejects a model id without a provider prefix', async () => { + const chunks = await collect( + opencodeText('claude-sonnet-4-5').chatStream(makeOptions()), + ) + expect(startSessionMock).not.toHaveBeenCalled() + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + expect((chunks.at(-1) as { message: string }).message).toMatch( + /provider\/model/, + ) + }) + + it('starts a fresh session without a sessionId', async () => { + await collect(opencodeText(MODEL).chatStream(makeOptions())) + expect(captured?.resumeSessionId).toBeUndefined() + expect(promptMock.mock.calls[0]![0]).toBe('hello') + }) + + it('resumes the session and sends only the trailing user message', async () => { + await collect( + opencodeText(MODEL).chatStream( + makeOptions({ + messages: [ + { role: 'user', content: 'first' }, + { role: 'assistant', content: 'answer' }, + { role: 'user', content: 'follow-up' }, + ], + modelOptions: { sessionId: 'sess-prior' }, + }), + ), + ) + expect(captured?.resumeSessionId).toBe('sess-prior') + expect(promptMock.mock.calls[0]![0]).toBe('follow-up') + }) + + it('flattens prior turns into the prompt without a sessionId', async () => { + await collect( + opencodeText(MODEL).chatStream( + makeOptions({ + messages: [ + { role: 'user', content: 'first' }, + { role: 'assistant', content: 'answer' }, + { role: 'user', content: 'follow-up' }, + ], + }), + ), + ) + expect(promptMock.mock.calls[0]![0]).toBe( + 'Previous conversation:\nUser: first\nAssistant: answer\n\nfollow-up', + ) + }) + + it('uses the configured directory and a default permission policy', async () => { + await collect( + opencodeText(MODEL, { directory: '/workspace' }).chatStream(makeOptions()), + ) + expect(captured?.directory).toBe('/workspace') + expect(captured?.permission).toMatchObject({ + edit: 'ask', + bash: 'ask', + webfetch: 'ask', + }) + }) + + it('lets modelOptions.directory override the configured directory', async () => { + await collect( + opencodeText(MODEL, { directory: '/workspace' }).chatStream( + makeOptions({ modelOptions: { directory: '/elsewhere' } }), + ), + ) + expect(captured?.directory).toBe('/elsewhere') + }) + + it('opens permissions for bypassPermissions mode', async () => { + await collect( + opencodeText(MODEL, { permissionMode: 'bypassPermissions' }).chatStream( + makeOptions(), + ), + ) + expect(captured?.permission).toMatchObject({ + edit: 'allow', + bash: 'allow', + webfetch: 'allow', + }) + }) + + it('prepends system prompts to the prompt text', async () => { + await collect( + opencodeText(MODEL).chatStream( + makeOptions({ systemPrompts: ['Be terse.', 'Use tabs.'] }), + ), + ) + expect(promptMock.mock.calls[0]![0]).toBe('Be terse.\n\nUse tabs.\n\nhello') + }) + + it('starts a localhost MCP bridge and registers it when tools are passed', async () => { + await collect( + opencodeText(MODEL).chatStream( + makeOptions({ + tools: [ + { + name: 'lookup_user', + description: 'Look up a user', + inputSchema: { type: 'object', properties: {} }, + execute: async () => ({ ok: true }), + } as never, + ], + }), + ), + ) + expect(bridgeMock).toHaveBeenCalledTimes(1) + expect(captured?.mcpServers).toEqual([ + { name: 'tanstack', url: 'http://127.0.0.1:54321/mcp' }, + ]) + expect(bridgeCloseMock).toHaveBeenCalledTimes(1) + }) + + it('does not start a bridge when no tools are passed', async () => { + await collect(opencodeText(MODEL).chatStream(makeOptions())) + expect(bridgeMock).not.toHaveBeenCalled() + expect(captured?.mcpServers).toBeUndefined() + }) + + it('emits RUN_ERROR for client-side tools (no execute)', async () => { + const chunks = await collect( + opencodeText(MODEL).chatStream( + makeOptions({ + tools: [ + { + name: 'client_only', + description: 'runs in browser', + inputSchema: { type: 'object', properties: {} }, + } as never, + ], + }), + ), + ) + expect(startSessionMock).not.toHaveBeenCalled() + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + expect((chunks.at(-1) as { message: string }).message).toMatch(/client-side/i) + }) + + it('emits RUN_ERROR for approval-gated tools', async () => { + const chunks = await collect( + opencodeText(MODEL).chatStream( + makeOptions({ + tools: [ + { + name: 'needs_ok', + description: 'requires approval', + inputSchema: { type: 'object', properties: {} }, + execute: async () => 'x', + needsApproval: true, + } as never, + ], + }), + ), + ) + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR' }) + }) + + it('aborts the session when the external signal fires', async () => { + const controller = new AbortController() + controller.abort() + await collect( + opencodeText(MODEL).chatStream(makeOptions({ abortController: controller })), + ) + expect(abortMock).toHaveBeenCalledTimes(1) + }) + + it('emits RUN_ERROR when the prompt turn rejects', async () => { + promptMock.mockRejectedValueOnce(new Error('boom')) + const chunks = await collect(opencodeText(MODEL).chatStream(makeOptions())) + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_ERROR', message: 'boom' }) + expect(disposeMock).toHaveBeenCalledTimes(1) + }) + + it('emits RUN_ERROR when starting the session throws', async () => { + startSessionMock.mockRejectedValueOnce(new Error('serve failed')) + const chunks = await collect(opencodeText(MODEL).chatStream(makeOptions())) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'serve failed', + }) + }) +}) + +describe('structuredOutput', () => { + it('parses the final message text and reports usage', async () => { + promptMock.mockResolvedValueOnce({ + message: { + id: 'm1', + role: 'assistant', + finish: 'stop', + tokens: { input: 7, output: 3 }, + }, + text: '{"answer":42}', + }) + const result = await opencodeText(MODEL).structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object', properties: { answer: { type: 'number' } } }, + }) + expect(result.data).toEqual({ answer: 42 }) + expect(result.rawText).toBe('{"answer":42}') + expect(result.usage).toMatchObject({ promptTokens: 7, completionTokens: 3 }) + }) + + it('strips markdown fences from the model output', async () => { + promptMock.mockResolvedValueOnce({ + message: { id: 'm1', role: 'assistant', finish: 'stop' }, + text: '```json\n{"ok":true}\n```', + }) + const result = await opencodeText(MODEL).structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }) + expect(result.data).toEqual({ ok: true }) + }) + + it('runs a fresh session even when a sessionId is supplied', async () => { + promptMock.mockResolvedValueOnce({ + message: { id: 'm1', role: 'assistant', finish: 'stop' }, + text: '{}', + }) + await opencodeText(MODEL).structuredOutput({ + chatOptions: makeOptions({ modelOptions: { sessionId: 'sess-live' } }), + outputSchema: { type: 'object' }, + }) + expect(captured?.resumeSessionId).toBeUndefined() + }) + + it('throws a descriptive error when the message carries an error', async () => { + promptMock.mockResolvedValueOnce({ + message: { + id: 'm1', + role: 'assistant', + error: { name: 'ProviderAuthError', data: { message: 'no key' } }, + }, + text: '', + }) + await expect( + opencodeText(MODEL).structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }), + ).rejects.toThrow(/no key/) + }) + + it('throws when the run ends without any text', async () => { + promptMock.mockResolvedValueOnce({ + message: { id: 'm1', role: 'assistant', finish: 'stop' }, + text: ' ', + }) + await expect( + opencodeText(MODEL).structuredOutput({ + chatOptions: makeOptions(), + outputSchema: { type: 'object' }, + }), + ).rejects.toThrow(/without a response/) + }) +}) diff --git a/packages/ai-opencode/tests/translate.test.ts b/packages/ai-opencode/tests/translate.test.ts new file mode 100644 index 000000000..c73573057 --- /dev/null +++ b/packages/ai-opencode/tests/translate.test.ts @@ -0,0 +1,403 @@ +import { describe, expect, it } from 'vitest' +import { + SESSION_ID_EVENT, + TODO_EVENT, + resolveToolName, + translateOpencodeStream, +} from '../src/stream/translate' +import type { TranslateContext } from '../src/stream/translate' +import type { + OpencodeAssistantMessage, + OpencodeStreamEvent, +} from '../src/stream/sdk-types' +import type { StreamChunk } from '@tanstack/ai' + +function makeCtx(overrides: Partial = {}): TranslateContext { + let id = 0 + return { + model: 'anthropic/claude-sonnet-4-5', + runId: 'run-1', + threadId: 'thread-1', + genId: () => `gen-${++id}`, + ...overrides, + } +} + +async function* fromArray( + events: Array, +): AsyncIterable { + for (const event of events) yield event +} + +async function collect( + events: Array, + ctx: TranslateContext = makeCtx(), +): Promise> { + const chunks: Array = [] + for await (const chunk of translateOpencodeStream(fromArray(events), ctx)) { + chunks.push(chunk) + } + return chunks +} + +const session: OpencodeStreamEvent = { kind: 'session', sessionId: 'sess-1' } + +function done( + overrides: Partial = {}, +): OpencodeStreamEvent { + return { + kind: 'done', + message: { id: 'msg-1', role: 'assistant', finish: 'stop', ...overrides }, + } +} + +function textPart( + id: string, + text: string, + delta?: string, +): OpencodeStreamEvent { + return { + kind: 'event', + event: { + type: 'message.part.updated', + properties: { + part: { id, sessionID: 'sess-1', type: 'text', text }, + ...(delta !== undefined && { delta }), + }, + }, + } +} + +describe('translateOpencodeStream', () => { + it('translates a simple text turn', async () => { + const chunks = await collect([ + session, + textPart('part-1', 'hi there', 'hi there'), + done(), + ]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks[1]).toMatchObject({ + name: SESSION_ID_EVENT, + value: { sessionId: 'sess-1' }, + }) + expect(chunks[3]).toMatchObject({ delta: 'hi there', content: 'hi there' }) + expect(chunks.at(-1)).toMatchObject({ finishReason: 'stop' }) + }) + + it('accumulates incremental text deltas', async () => { + const chunks = await collect([ + session, + textPart('part-1', 'Hel', 'Hel'), + textPart('part-1', 'Hello', 'lo'), + done(), + ]) + const contents = chunks.filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + expect(contents).toHaveLength(2) + expect(contents[0]).toMatchObject({ delta: 'Hel', content: 'Hel' }) + expect(contents[1]).toMatchObject({ delta: 'lo', content: 'Hello' }) + // A single START/END pair for the one part id. + expect(chunks.filter((c) => c.type === 'TEXT_MESSAGE_START')).toHaveLength(1) + expect(chunks.filter((c) => c.type === 'TEXT_MESSAGE_END')).toHaveLength(1) + }) + + it('derives the delta from full-text snapshots when no delta is given', async () => { + const chunks = await collect([ + session, + textPart('part-1', 'Hel'), + textPart('part-1', 'Hello'), + done(), + ]) + const contents = chunks.filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + expect(contents[0]).toMatchObject({ delta: 'Hel' }) + expect(contents[1]).toMatchObject({ delta: 'lo', content: 'Hello' }) + }) + + it('reports usage with cache and reasoning details', async () => { + const chunks = await collect([ + session, + done({ + tokens: { + input: 100, + output: 20, + reasoning: 5, + cache: { read: 40, write: 0 }, + }, + }), + ]) + const finished = chunks.at(-1) as unknown as { + usage: Record + } + expect(finished.usage).toMatchObject({ + promptTokens: 100, + completionTokens: 20, + totalTokens: 120, + promptTokensDetails: { cachedTokens: 40 }, + completionTokensDetails: { reasoningTokens: 5 }, + }) + }) + + it('maps a length finish to finishReason length', async () => { + const chunks = await collect([session, done({ finish: 'length' })]) + expect(chunks.at(-1)).toMatchObject({ finishReason: 'length' }) + }) + + it('translates a reasoning part into a reasoning sequence', async () => { + const chunks = await collect([ + session, + { + kind: 'event', + event: { + type: 'message.part.updated', + properties: { + part: { + id: 'r-1', + sessionID: 'sess-1', + type: 'reasoning', + text: 'thinking', + }, + delta: 'thinking', + }, + }, + }, + done(), + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'REASONING_START', + 'REASONING_MESSAGE_START', + 'REASONING_MESSAGE_CONTENT', + 'REASONING_MESSAGE_END', + 'REASONING_END', + 'RUN_FINISHED', + ]) + }) + + function toolEvent( + callID: string, + tool: string, + state: Record, + ): OpencodeStreamEvent { + return { + kind: 'event', + event: { + type: 'message.part.updated', + properties: { + part: { + id: `part-${callID}`, + sessionID: 'sess-1', + type: 'tool', + callID, + tool, + state: state as never, + }, + }, + }, + } + } + + it('pairs a tool call across running and completed states', async () => { + const chunks = await collect([ + session, + toolEvent('call-1', 'bash', { + status: 'running', + input: { command: 'ls' }, + }), + toolEvent('call-1', 'bash', { + status: 'completed', + input: { command: 'ls' }, + output: 'file.txt', + title: 'ls', + }), + done(), + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TOOL_CALL_START', + 'TOOL_CALL_ARGS', + 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', + 'RUN_FINISHED', + ]) + expect(chunks[2]).toMatchObject({ toolCallId: 'call-1', toolCallName: 'bash' }) + expect(chunks[3]).toMatchObject({ args: JSON.stringify({ command: 'ls' }) }) + expect(chunks[5]).toMatchObject({ content: 'file.txt' }) + expect((chunks[5] as { state?: string }).state).toBeUndefined() + }) + + it('marks tool errors as output-error', async () => { + const chunks = await collect([ + session, + toolEvent('call-2', 'bash', { + status: 'error', + input: { command: 'false' }, + error: 'exit 1', + }), + done(), + ]) + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + content: 'exit 1', + state: 'output-error', + }) + }) + + it('does not duplicate START events across repeated tool updates', async () => { + const chunks = await collect([ + session, + toolEvent('call-3', 'read', { status: 'pending', input: {} }), + toolEvent('call-3', 'read', { status: 'running', input: { path: 'a' } }), + toolEvent('call-3', 'read', { + status: 'completed', + input: { path: 'a' }, + output: 'data', + title: 'read a', + }), + done(), + ]) + expect(chunks.filter((c) => c.type === 'TOOL_CALL_START')).toHaveLength(1) + expect(chunks.filter((c) => c.type === 'TOOL_CALL_RESULT')).toHaveLength(1) + }) + + it('surfaces bridged MCP tool calls under the registered name', async () => { + const chunks = await collect( + [ + session, + toolEvent('call-4', 'tanstack_lookup_user', { + status: 'completed', + input: { userId: '7' }, + output: '{"name":"Ada"}', + title: 'lookup_user', + }), + done(), + ], + makeCtx({ bridgedToolNames: new Set(['lookup_user']) }), + ) + expect(chunks.find((c) => c.type === 'TOOL_CALL_START')).toMatchObject({ + toolCallName: 'lookup_user', + }) + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + content: '{"name":"Ada"}', + }) + }) + + it('synthesizes interrupted results for unresolved tool calls on done', async () => { + const chunks = await collect([ + session, + toolEvent('call-9', 'bash', { + status: 'running', + input: { command: 'sleep 100' }, + }), + done(), + ]) + const result = chunks.find((c) => c.type === 'TOOL_CALL_RESULT') + expect(result).toMatchObject({ + toolCallId: 'call-9', + content: JSON.stringify({ status: 'interrupted' }), + }) + expect(chunks.at(-1)).toMatchObject({ type: 'RUN_FINISHED' }) + }) + + it('maps a message error to RUN_ERROR', async () => { + const chunks = await collect([ + session, + done({ + finish: undefined, + error: { name: 'ProviderAuthError', data: { message: 'no key' } }, + }), + ]) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'no key', + }) + }) + + it('emits a todo CUSTOM event', async () => { + const chunks = await collect([ + session, + { + kind: 'event', + event: { + type: 'todo.updated', + properties: { + sessionID: 'sess-1', + todos: [{ content: 'step 1', status: 'pending' }], + }, + }, + }, + done(), + ]) + expect(chunks.find((c) => c.type === 'CUSTOM' && c.name === TODO_EVENT)).toBeDefined() + }) + + it('forwards raw stream events to onStreamEvent', async () => { + const kinds: Array = [] + await collect( + [session, textPart('p', 'hi', 'hi'), done()], + makeCtx({ onStreamEvent: (event) => kinds.push(event.kind) }), + ) + expect(kinds).toEqual(['session', 'event', 'done']) + }) + + it('synthesizes results then rethrows when the source stream throws', async () => { + async function* failing(): AsyncIterable { + yield session + yield { + kind: 'event', + event: { + type: 'message.part.updated', + properties: { + part: { + id: 'p-c', + sessionID: 'sess-1', + type: 'tool', + callID: 'call-7', + tool: 'bash', + state: { status: 'running', input: {} } as never, + }, + }, + }, + } + throw new Error('aborted') + } + + const chunks: Array = [] + await expect(async () => { + for await (const chunk of translateOpencodeStream(failing(), makeCtx())) { + chunks.push(chunk) + } + }).rejects.toThrow('aborted') + expect(chunks.at(-1)).toMatchObject({ + type: 'TOOL_CALL_RESULT', + toolCallId: 'call-7', + content: JSON.stringify({ status: 'interrupted' }), + }) + }) +}) + +describe('resolveToolName', () => { + it('returns the tool name verbatim without bridged names', () => { + expect(resolveToolName('bash', undefined)).toBe('bash') + expect(resolveToolName('edit', new Set())).toBe('edit') + }) + + it('strips the tanstack_ prefix for bridged tools', () => { + const bridged = new Set(['lookup_user']) + expect(resolveToolName('tanstack_lookup_user', bridged)).toBe('lookup_user') + expect(resolveToolName('lookup_user', bridged)).toBe('lookup_user') + }) + + it('leaves foreign tool names untouched', () => { + expect(resolveToolName('github_create_issue', new Set(['lookup_user']))).toBe( + 'github_create_issue', + ) + }) +}) diff --git a/packages/ai-opencode/tsconfig.json b/packages/ai-opencode/tsconfig.json new file mode 100644 index 000000000..c38689f4e --- /dev/null +++ b/packages/ai-opencode/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist" + }, + "include": ["src", "tests"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/ai-opencode/vite.config.ts b/packages/ai-opencode/vite.config.ts new file mode 100644 index 000000000..11f5b20b7 --- /dev/null +++ b/packages/ai-opencode/vite.config.ts @@ -0,0 +1,37 @@ +import { defineConfig, mergeConfig } from 'vitest/config' +import { tanstackViteConfig } from '@tanstack/vite-config' +import packageJson from './package.json' + +const config = defineConfig({ + test: { + name: packageJson.name, + dir: './', + watch: false, + + globals: true, + environment: 'node', + include: ['tests/**/*.test.ts'], + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html', 'lcov'], + exclude: [ + 'node_modules/', + 'dist/', + 'tests/', + '**/*.test.ts', + '**/*.config.ts', + '**/types.ts', + ], + include: ['src/**/*.ts'], + }, + }, +}) + +export default mergeConfig( + config, + tanstackViteConfig({ + entry: ['./src/index.ts'], + srcDir: './src', + cjs: false, + }), +) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 667bdf441..c30e85a5b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -500,6 +500,9 @@ importers: '@tanstack/ai-gemini-cli': specifier: workspace:* version: link:../../packages/ai-gemini-cli + '@tanstack/ai-opencode': + specifier: workspace:* + version: link:../../packages/ai-opencode '@tanstack/ai-react': specifier: workspace:* version: link:../../packages/ai-react @@ -1554,6 +1557,22 @@ importers: specifier: ^4.2.0 version: 4.3.6 + packages/ai-opencode: + dependencies: + '@modelcontextprotocol/sdk': + specifier: ^1.29.0 + version: 1.29.0(zod@4.3.6) + '@opencode-ai/sdk': + specifier: ^1.17.4 + version: 1.17.4 + devDependencies: + '@tanstack/ai': + specifier: workspace:* + version: link:../ai + '@vitest/coverage-v8': + specifier: 4.0.14 + version: 4.0.14(vitest@4.0.14(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.15))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + packages/ai-openrouter: dependencies: '@openrouter/sdk': @@ -4624,6 +4643,9 @@ packages: cpu: [x64] os: [win32] + '@opencode-ai/sdk@1.17.4': + resolution: {integrity: sha512-OdHkBoNIQOjQsPnFtkcxAp9HsLAKn/MQiq7wIQURkRARmw8yhFqGiMPYbNm+UfLn6bMkx97rdPkWQARejMyiXQ==} + '@openrouter/sdk@0.12.35': resolution: {integrity: sha512-s4QVLLnG1AmfW3TjnnHUqGfsCkzwVK+kboGcZmKbde09m1DPqgzl4RUFt/HJ5v97MX8aEaN0UG3mKv2S+qj2Gw==} @@ -16576,6 +16598,10 @@ snapshots: '@openai/codex@0.139.0-win32-x64': optional: true + '@opencode-ai/sdk@1.17.4': + dependencies: + cross-spawn: 7.0.6 + '@openrouter/sdk@0.12.35': dependencies: zod: 4.3.6 From 04deb99344e4315407d69db67a195b0689027260 Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Mon, 15 Jun 2026 07:35:17 -0700 Subject: [PATCH 07/12] feat: add coco CLI dev-overlay coding agent + bundled sample-app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coco wraps a project's dev server with a reverse proxy that injects a Shadow-DOM AI chat panel into every HTML response. The panel drives the existing CLI-agent adapters (Claude Code, Codex, Gemini CLI, OpenCode) with cwd pinned to the project, and forwards the current route plus the currently picked element to the server as page context. Adds a bundled sample-app under examples/coco/sample-apps/simple-app (a freshly scaffolded TanStack Start app) so the example is exercisable out of the box. The sample-app is intentionally NOT a workspace package — pnpm scripts in coco's package.json handle install, dev, and a git-restore-based reset that leaves node_modules intact. Co-authored-by: Cursor --- examples/coco/README.md | 160 ++++++ examples/coco/package.json | 37 ++ .../coco/sample-apps/simple-app/.cta.json | 18 + .../coco/sample-apps/simple-app/.gitignore | 13 + .../simple-app/.vscode/settings.json | 11 + .../coco/sample-apps/simple-app/README.md | 207 ++++++++ .../coco/sample-apps/simple-app/package.json | 50 ++ .../sample-apps/simple-app/public/favicon.ico | Bin 0 -> 3870 bytes .../sample-apps/simple-app/public/logo192.png | Bin 0 -> 5347 bytes .../sample-apps/simple-app/public/logo512.png | Bin 0 -> 9664 bytes .../simple-app/public/manifest.json | 25 + .../sample-apps/simple-app/public/robots.txt | 3 + .../simple-app/src/components/Footer.tsx | 44 ++ .../simple-app/src/components/Header.tsx | 78 +++ .../simple-app/src/components/ThemeToggle.tsx | 81 +++ .../sample-apps/simple-app/src/router.tsx | 19 + .../simple-app/src/routes/__root.tsx | 61 +++ .../simple-app/src/routes/about.tsx | 23 + .../simple-app/src/routes/index.tsx | 87 ++++ .../sample-apps/simple-app/src/styles.css | 464 ++++++++++++++++++ .../coco/sample-apps/simple-app/tsconfig.json | 28 ++ .../sample-apps/simple-app/tsr.config.json | 3 + .../sample-apps/simple-app/vite.config.ts | 21 + examples/coco/scripts/reset-sample.mjs | 53 ++ examples/coco/src/agent-status.ts | 53 ++ examples/coco/src/agents.ts | 124 +++++ examples/coco/src/chat-handler.ts | 166 +++++++ examples/coco/src/cli.ts | 176 +++++++ examples/coco/src/client/chat.ts | 103 ++++ examples/coco/src/client/context.ts | 204 ++++++++ examples/coco/src/client/index.ts | 119 +++++ examples/coco/src/client/panel.ts | 368 ++++++++++++++ examples/coco/src/client/styles.ts | 305 ++++++++++++ examples/coco/src/dev-runner.ts | 158 ++++++ examples/coco/src/proxy.ts | 292 +++++++++++ examples/coco/tsconfig.json | 21 + examples/coco/vite.config.ts | 29 ++ pnpm-lock.yaml | 47 ++ 38 files changed, 3651 insertions(+) create mode 100644 examples/coco/README.md create mode 100644 examples/coco/package.json create mode 100644 examples/coco/sample-apps/simple-app/.cta.json create mode 100644 examples/coco/sample-apps/simple-app/.gitignore create mode 100644 examples/coco/sample-apps/simple-app/.vscode/settings.json create mode 100644 examples/coco/sample-apps/simple-app/README.md create mode 100644 examples/coco/sample-apps/simple-app/package.json create mode 100644 examples/coco/sample-apps/simple-app/public/favicon.ico create mode 100644 examples/coco/sample-apps/simple-app/public/logo192.png create mode 100644 examples/coco/sample-apps/simple-app/public/logo512.png create mode 100644 examples/coco/sample-apps/simple-app/public/manifest.json create mode 100644 examples/coco/sample-apps/simple-app/public/robots.txt create mode 100644 examples/coco/sample-apps/simple-app/src/components/Footer.tsx create mode 100644 examples/coco/sample-apps/simple-app/src/components/Header.tsx create mode 100644 examples/coco/sample-apps/simple-app/src/components/ThemeToggle.tsx create mode 100644 examples/coco/sample-apps/simple-app/src/router.tsx create mode 100644 examples/coco/sample-apps/simple-app/src/routes/__root.tsx create mode 100644 examples/coco/sample-apps/simple-app/src/routes/about.tsx create mode 100644 examples/coco/sample-apps/simple-app/src/routes/index.tsx create mode 100644 examples/coco/sample-apps/simple-app/src/styles.css create mode 100644 examples/coco/sample-apps/simple-app/tsconfig.json create mode 100644 examples/coco/sample-apps/simple-app/tsr.config.json create mode 100644 examples/coco/sample-apps/simple-app/vite.config.ts create mode 100644 examples/coco/scripts/reset-sample.mjs create mode 100644 examples/coco/src/agent-status.ts create mode 100644 examples/coco/src/agents.ts create mode 100644 examples/coco/src/chat-handler.ts create mode 100644 examples/coco/src/cli.ts create mode 100644 examples/coco/src/client/chat.ts create mode 100644 examples/coco/src/client/context.ts create mode 100644 examples/coco/src/client/index.ts create mode 100644 examples/coco/src/client/panel.ts create mode 100644 examples/coco/src/client/styles.ts create mode 100644 examples/coco/src/dev-runner.ts create mode 100644 examples/coco/src/proxy.ts create mode 100644 examples/coco/tsconfig.json create mode 100644 examples/coco/vite.config.ts diff --git a/examples/coco/README.md b/examples/coco/README.md new file mode 100644 index 000000000..080998912 --- /dev/null +++ b/examples/coco/README.md @@ -0,0 +1,160 @@ +# Coco — CLI dev-overlay coding agent + +Coco is a drop-in command-line tool that wraps your project's dev server with +an in-page AI coding-agent chat panel. Run `coco` inside any web project, open +the URL it prints, and a floating chat appears on top of your running app. The +chat drives a real coding agent — [Claude Code], [Codex], [Gemini CLI], or +[OpenCode] — pointed at the project's working directory, so the agent edits +the live code and the dev server's HMR reloads the page. + +[Claude Code]: https://docs.anthropic.com/en/docs/claude-code +[Codex]: https://developers.openai.com/codex +[Gemini CLI]: https://github.com/google-gemini/gemini-cli +[OpenCode]: https://opencode.ai + +Coco is framework-agnostic: it reverse-proxies the dev server and injects a +small `' + +const injectIntoHtml = (html: string): string => { + if (html.includes('/__coco/client.js')) return html // already there + const lower = html.toLowerCase() + const idx = lower.lastIndexOf('') + if (idx >= 0) { + return html.slice(0, idx) + INJECTED_TAG + html.slice(idx) + } + // Fall back to appending if there's no . + return html + INJECTED_TAG +} + +const sendJson = (res: ServerResponse, status: number, value: unknown) => { + const body = JSON.stringify(value) + res.writeHead(status, { + 'Content-Type': 'application/json; charset=utf-8', + 'Content-Length': Buffer.byteLength(body), + 'Cache-Control': 'no-store', + }) + res.end(body) +} + +const send404 = (res: ServerResponse, msg = 'Not found') => { + res.writeHead(404, { 'Content-Type': 'text/plain; charset=utf-8' }) + res.end(msg) +} + +const send500 = (res: ServerResponse, err: unknown) => { + if (res.headersSent) { + res.end() + return + } + const body = err instanceof Error ? err.message : String(err) + res.writeHead(500, { 'Content-Type': 'text/plain; charset=utf-8' }) + res.end(body) +} + +/** + * Build an absolute URL for the incoming Node request so we can construct a + * `Request` from it. + */ +const requestUrl = (req: IncomingMessage): string => { + const host = req.headers.host ?? 'localhost' + return `http://${host}${req.url ?? '/'}` +} + +/** + * Convert a Node IncomingMessage into a Web `Request`. The body is taken + * directly from the readable stream for POSTs. + */ +const toFetchRequest = (req: IncomingMessage): Request => { + const headers = new Headers() + for (const [key, value] of Object.entries(req.headers)) { + if (value === undefined) continue + if (Array.isArray(value)) for (const v of value) headers.append(key, v) + else headers.set(key, value) + } + const method = (req.method ?? 'GET').toUpperCase() + const hasBody = method !== 'GET' && method !== 'HEAD' + const init: RequestInit & { duplex?: 'half' } = { + method, + headers, + } + if (hasBody) { + init.body = Readable.toWeb(req) as ReadableStream + init.duplex = 'half' + } + return new Request(requestUrl(req), init) +} + +/** + * Pipe a Web `Response` back into a Node ServerResponse. Streams the body so + * SSE flows in real-time. + */ +const pipeFetchResponse = async ( + response: Response, + res: ServerResponse, +): Promise => { + const headers: Record = {} + response.headers.forEach((value, key) => { + headers[key] = value + }) + res.writeHead(response.status, headers) + if (!response.body) { + res.end() + return + } + const reader = response.body.getReader() + const onClose = () => { + reader.cancel().catch(() => undefined) + } + res.once('close', onClose) + try { + for (;;) { + const { value, done } = await reader.read() + if (done) break + if (value) res.write(value) + } + } finally { + res.off('close', onClose) + res.end() + } +} + +/** Serve the built panel bundle. */ +const serveClientBundle = async ( + res: ServerResponse, + bundlePath: string, +): Promise => { + try { + const buf = await fs.readFile(bundlePath) + res.writeHead(200, { + 'Content-Type': 'application/javascript; charset=utf-8', + 'Content-Length': buf.length, + 'Cache-Control': 'no-store', + }) + res.end(buf) + } catch { + res.writeHead(503, { 'Content-Type': 'text/plain; charset=utf-8' }) + res.end( + 'Coco panel bundle is missing. Run `pnpm --filter coco build` and reload.', + ) + } +} + +/** + * Strip `accept-encoding` so the dev server returns HTML uncompressed and we + * can inject a script without round-tripping through gzip/br. + */ +const stripAcceptEncoding = (req: IncomingMessage) => { + delete req.headers['accept-encoding'] +} + +/** + * Build and start Coco's proxy server. Returns a promise that resolves to a + * stop function. + */ +export const startProxyServer = async ( + options: ProxyOptions, +): Promise<{ url: string; stop: () => Promise }> => { + const { target, port, projectCwd, clientBundlePath } = options + + const proxy = httpProxy.createProxyServer({ + target, + changeOrigin: true, + ws: true, + selfHandleResponse: true, + // Preserve original Host so dev servers that gate on it (e.g. Vite's + // allowed-hosts) accept the request. + autoRewrite: true, + }) + + proxy.on('error', (err, _req, resOrSocket) => { + if (resOrSocket && 'writeHead' in resOrSocket) { + try { + const res = resOrSocket as ServerResponse + if (!res.headersSent) { + res.writeHead(502, { 'Content-Type': 'text/plain; charset=utf-8' }) + } + res.end(`Coco proxy error: ${err.message}`) + } catch { + // ignore + } + } else if (resOrSocket && 'destroy' in resOrSocket) { + ;(resOrSocket as { destroy: () => void }).destroy() + } + }) + + // Custom response handling: buffer HTML to inject the panel script; + // stream everything else straight through. + proxy.on('proxyRes', (proxyRes, _req, res) => { + const contentType = String(proxyRes.headers['content-type'] ?? '') + const isHtml = contentType.includes('text/html') + + const status = proxyRes.statusCode ?? 502 + const headers = { ...proxyRes.headers } + + if (isHtml) { + const chunks: Array = [] + proxyRes.on('data', (chunk: Buffer) => chunks.push(chunk)) + proxyRes.on('end', () => { + const original = Buffer.concat(chunks).toString('utf8') + const injected = injectIntoHtml(original) + const body = Buffer.from(injected, 'utf8') + delete headers['content-length'] + delete headers['content-encoding'] + headers['content-length'] = String(body.length) + res.writeHead(status, headers) + res.end(body) + }) + proxyRes.on('error', (err) => send500(res, err)) + } else { + res.writeHead(status, headers) + proxyRes.pipe(res) + } + }) + + const server = http.createServer(async (req, res) => { + try { + const url = new URL(req.url ?? '/', 'http://localhost') + const pathname = url.pathname + + if (pathname === '/__coco/client.js') { + await serveClientBundle(res, clientBundlePath) + return + } + if (pathname === '/__coco/api/agents' && req.method === 'GET') { + sendJson(res, 200, await detectAgentConfig()) + return + } + if (pathname === '/__coco/api/chat' && req.method === 'POST') { + const fetchReq = toFetchRequest(req) + const fetchRes = await handleChat(fetchReq, projectCwd) + await pipeFetchResponse(fetchRes, res) + return + } + if (pathname.startsWith('/__coco/')) { + send404(res, 'Unknown __coco endpoint') + return + } + + stripAcceptEncoding(req) + proxy.web(req, res, { target }) + } catch (err) { + send500(res, err) + } + }) + + server.on('upgrade', (req, socket, head) => { + if (req.url?.startsWith('/__coco/')) { + socket.destroy() + return + } + proxy.ws(req, socket, head, { target }) + }) + + await new Promise((resolve, reject) => { + server.once('error', reject) + server.listen(port, () => { + server.off('error', reject) + resolve() + }) + }) + + const url = `http://localhost:${port}` + + const stop = () => + new Promise((resolve) => { + server.close(() => resolve()) + proxy.close() + }) + + // touch the path so a bare `--target` ENOENT bundle path produces a + // friendlier error at request time rather than at startup. + void path.basename(clientBundlePath) + + return { url, stop } +} diff --git a/examples/coco/tsconfig.json b/examples/coco/tsconfig.json new file mode 100644 index 000000000..37659fa43 --- /dev/null +++ b/examples/coco/tsconfig.json @@ -0,0 +1,21 @@ +{ + "include": ["src/**/*.ts", "vite.config.ts"], + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "lib": ["ES2022", "DOM", "DOM.Iterable"], + "types": ["node", "vite/client"], + + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": false, + "noEmit": true, + + "skipLibCheck": true, + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true + } +} diff --git a/examples/coco/vite.config.ts b/examples/coco/vite.config.ts new file mode 100644 index 000000000..cf60d642c --- /dev/null +++ b/examples/coco/vite.config.ts @@ -0,0 +1,29 @@ +import { defineConfig } from 'vite' +import { fileURLToPath } from 'node:url' + +/** + * Builds the in-page panel as a single IIFE bundle. The proxy serves it at + * `/__coco/client.js`; the panel runs inside the host page's window but inside + * a Shadow DOM root to keep CSS/DOM isolated from the host app. + */ +export default defineConfig({ + build: { + target: 'es2022', + outDir: 'dist/client', + emptyOutDir: true, + sourcemap: true, + lib: { + entry: fileURLToPath(new URL('./src/client/index.ts', import.meta.url)), + formats: ['iife'], + name: 'CocoPanel', + fileName: () => 'client.js', + }, + rollupOptions: { + output: { + // Avoid creating a separate CSS file — we inline styles into the + // Shadow DOM via a string constant. + assetFileNames: 'client.[ext]', + }, + }, + }, +}) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c30e85a5b..8706e6a54 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -105,6 +105,46 @@ importers: specifier: ^4.0.14 version: 4.1.4(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.15))(vite@7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + examples/coco: + dependencies: + '@tanstack/ai': + specifier: workspace:* + version: link:../../packages/ai + '@tanstack/ai-claude-code': + specifier: workspace:* + version: link:../../packages/ai-claude-code + '@tanstack/ai-client': + specifier: workspace:* + version: link:../../packages/ai-client + '@tanstack/ai-codex': + specifier: workspace:* + version: link:../../packages/ai-codex + '@tanstack/ai-gemini-cli': + specifier: workspace:* + version: link:../../packages/ai-gemini-cli + '@tanstack/ai-opencode': + specifier: workspace:* + version: link:../../packages/ai-opencode + http-proxy: + specifier: ^1.18.1 + version: 1.18.1 + tsx: + specifier: ^4.21.0 + version: 4.21.0 + devDependencies: + '@types/http-proxy': + specifier: ^1.17.17 + version: 1.17.17 + '@types/node': + specifier: ^24.10.1 + version: 24.10.3 + typescript: + specifier: 5.9.3 + version: 5.9.3 + vite: + specifier: ^7.3.3 + version: 7.3.3(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + examples/ts-code-mode-web: dependencies: '@radix-ui/react-collapsible': @@ -7489,6 +7529,9 @@ packages: '@types/hast@3.0.4': resolution: {integrity: sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==} + '@types/http-proxy@1.17.17': + resolution: {integrity: sha512-ED6LB+Z1AVylNTu7hdzuBqOgMnvG/ld6wGCG8wFnAzKX5uyW2K3WD52v0gnLCTK/VLpXtKckgWuyScYK6cSPaw==} + '@types/istanbul-lib-coverage@2.0.6': resolution: {integrity: sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==} @@ -19915,6 +19958,10 @@ snapshots: dependencies: '@types/unist': 3.0.3 + '@types/http-proxy@1.17.17': + dependencies: + '@types/node': 24.10.3 + '@types/istanbul-lib-coverage@2.0.6': {} '@types/istanbul-lib-report@3.0.3': From 7d97d9e45edd178620c9c65e36eb0c34508e2ad6 Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Mon, 15 Jun 2026 07:37:19 -0700 Subject: [PATCH 08/12] chore(coco/sample-app): ignore pnpm-lock and generated routeTree So `pnpm --filter coco sample:reset` (which is `git restore` + `git clean`) preserves them across resets, instead of forcing a full reinstall and route-tree regenerate on every reset. Co-authored-by: Cursor --- examples/coco/sample-apps/simple-app/.gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/coco/sample-apps/simple-app/.gitignore b/examples/coco/sample-apps/simple-app/.gitignore index 8b25bb54e..99e9e164b 100644 --- a/examples/coco/sample-apps/simple-app/.gitignore +++ b/examples/coco/sample-apps/simple-app/.gitignore @@ -11,3 +11,8 @@ dist-ssr .vinxi __unconfig* todos.json + +# Auto-generated; ignored so `sample:reset` (git clean) leaves them in +# place, avoiding a full reinstall / route-tree regenerate on every reset. +pnpm-lock.yaml +src/routeTree.gen.ts From 025441f7921225fcf4b432ccc9e58567228c5d5e Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Mon, 15 Jun 2026 12:58:43 -0700 Subject: [PATCH 09/12] fix(ai-client): stub missing methods on NoOpChatDevtoolsBridge The no-op devtools bridge that ChatClient falls back to (when no `devtoolsBridgeFactory` is supplied) was missing `mountWithTools`, `notifyToolsChanged`, and `recordStreamId`. `ChatClient.sendMessage` calls `mountDevtools()` which sets `devtoolsMounted = true` BEFORE invoking `mountWithTools`, so the first send died with `TypeError: this.devtoolsBridge.mountWithTools is not a function`, the promise rejected (often silently), and every subsequent send short-circuited past the broken line and worked. Symptom in the wild: "first message does nothing, the second one works". Also fix the structural-parity check that was supposed to prevent this drift. `const x: Missing = undefined as never` always typechecks (because `never` is assignable to anything), so the original assertion was a no-op. Replaced with `type _AssertBridgeParity = T`, which now fails the build the next time the real bridge grows a public method the no-op doesn't stub. Co-authored-by: Cursor --- .changeset/ai-client-noop-bridge-mount.md | 7 +++++ packages/ai-client/src/devtools-noop.ts | 31 ++++++++++++++++------- 2 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 .changeset/ai-client-noop-bridge-mount.md diff --git a/.changeset/ai-client-noop-bridge-mount.md b/.changeset/ai-client-noop-bridge-mount.md new file mode 100644 index 000000000..5c2068ad5 --- /dev/null +++ b/.changeset/ai-client-noop-bridge-mount.md @@ -0,0 +1,7 @@ +--- +'@tanstack/ai-client': patch +--- + +Fix `NoOpChatDevtoolsBridge` missing `mountWithTools`, `notifyToolsChanged`, and `recordStreamId` — the first call to `ChatClient.sendMessage` (with the default no-op devtools factory) threw `this.devtoolsBridge.mountWithTools is not a function` and silently rejected. `mountDevtools()` sets `devtoolsMounted = true` *before* invoking `mountWithTools`, so the failure was non-obvious: the first send died inside the bridge call, while every subsequent send short-circuited past the broken line and worked normally. + +Also fix the structural-parity check that was supposed to prevent this drift. `const x: Missing = undefined as never` always typechecks (because `never` is assignable to anything), so the original check was a no-op. Replaced with `type _AssertBridgeParity = T`, which now fails the build the next time the real bridge grows a public method the no-op doesn't stub. diff --git a/packages/ai-client/src/devtools-noop.ts b/packages/ai-client/src/devtools-noop.ts index 7d02c8a26..c22eda692 100644 --- a/packages/ai-client/src/devtools-noop.ts +++ b/packages/ai-client/src/devtools-noop.ts @@ -73,6 +73,9 @@ export class NoOpChatDevtoolsBridge { // chat-specific surface setCurrentStreamId(_streamId: string | null): void {} + recordStreamId(_streamId: string): void {} + mountWithTools(_initialMessageCount: number): void {} + notifyToolsChanged(): void {} getCurrentStreamId(): string | null { return null } @@ -150,9 +153,15 @@ export class NoOpVideoDevtoolsBridge< // Compile-time parity checks. If a public method is added to the real // bridge class without a matching stub on the no-op, the corresponding -// `Exclude<...>` will resolve to a non-`never` union and the `as never` -// assignment below will fail to typecheck — surfacing the drift at build -// time instead of as a runtime TypeError later. +// `Exclude<...>` resolves to a non-`never` union, the conditional below +// resolves to a string-literal type, and the assignment to `never` fails +// to typecheck — surfacing the drift at build time instead of as a +// runtime TypeError later. +// +// (An earlier version did `const x: Missing = undefined as never` which +// is the *wrong* direction: `never` is assignable to everything, so the +// check was a no-op and silently let new public methods slip through.) +type _AssertBridgeParity = T type _ChatBridgeMissing = Exclude< keyof ChatDevtoolsBridge, keyof NoOpChatDevtoolsBridge @@ -165,12 +174,16 @@ type _VideoBridgeMissing = Exclude< keyof VideoDevtoolsBridge, keyof NoOpVideoDevtoolsBridge > -const _chatBridgeParity: _ChatBridgeMissing = undefined as never -const _generationBridgeParity: _GenerationBridgeMissing = undefined as never -const _videoBridgeParity: _VideoBridgeMissing = undefined as never -void _chatBridgeParity -void _generationBridgeParity -void _videoBridgeParity +type _ChatBridgeParity = _AssertBridgeParity<_ChatBridgeMissing> +type _GenerationBridgeParity = _AssertBridgeParity<_GenerationBridgeMissing> +type _VideoBridgeParity = _AssertBridgeParity<_VideoBridgeMissing> +// Reference the aliases so they aren't pruned as unused types — the +// generic constraint on `_AssertBridgeParity` does the real check. +export type { + _ChatBridgeParity, + _GenerationBridgeParity, + _VideoBridgeParity, +} // =========================================================================== // Factories — these are what the clients call when no real factory was From 26db95f6688282b3acddd2396f0ef25edb7ccf12 Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Mon, 15 Jun 2026 12:59:00 -0700 Subject: [PATCH 10/12] feat(coco): visible in-flight UI and remove silent send gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The panel now makes it obvious when a turn is in progress so a quiet agent (or a network/server failure) can't masquerade as "nothing happened": - A `status` state machine (idle → sending → streaming) drives: - a spinning ring + green pulsing dot on the 🥥 launcher (visible even when the panel is closed), - a "Coco is calling the agent…" / "Coco is working…" pill with three bouncing dots in the messages area, - a status bar at the bottom of the panel ("Sending to claude-code…" → "Streaming from claude-code…" → "Ready (claude-code, edit)" with a colored dot), - the Send button disables and shows "…" while non-idle. - Removed the silent "not configured" gate. Previously, pressing Send before the async `/__coco/api/agents` check returned (or if it errored) would silently open the setup dialog instead of sending. `configured` is now `null` until the fetch lands, the panel never blocks sends on it, and a `null` fetch result no longer falsely shows every agent as unconfigured. The setup hint only appears once we *know* the agent is missing credentials. - Surface `sendMessage` rejections via `console.error('[coco] …')` and the in-panel error band instead of swallowing them, plus a debug log on every submit so the path can be traced from the browser console. Co-authored-by: Cursor --- examples/coco/src/client/chat.ts | 11 ++- examples/coco/src/client/index.ts | 70 ++++++++++++------- examples/coco/src/client/panel.ts | 105 ++++++++++++++++++++++------- examples/coco/src/client/styles.ts | 75 +++++++++++++++++++++ 4 files changed, 210 insertions(+), 51 deletions(-) diff --git a/examples/coco/src/client/chat.ts b/examples/coco/src/client/chat.ts index c1c9a65d2..acc97ea86 100644 --- a/examples/coco/src/client/chat.ts +++ b/examples/coco/src/client/chat.ts @@ -29,13 +29,17 @@ export interface CocoChatCallbacks { onMessages: (messages: Array) => void onLoading: (loading: boolean) => void onError: (error: string | null) => void + /** Emitted as soon as `sendMessage` is invoked, before any network I/O. */ + onSubmit?: () => void } export class CocoChat { private readonly client: ChatClient + private readonly callbacks: CocoChatCallbacks private fwd: ForwardedProps constructor(agent: AgentId, mode: AgentMode, callbacks: CocoChatCallbacks) { + this.callbacks = callbacks this.fwd = { agentId: agent, mode } this.client = new ChatClient({ connection: fetchServerSentEvents('/__coco/api/chat'), @@ -65,7 +69,12 @@ export class CocoChat { send(text: string) { this.applyForwardedProps() - void this.client.sendMessage(text).catch(() => undefined) + console.debug('[coco] send →', { text, forwardedProps: this.fwd }) + this.callbacks.onSubmit?.() + void this.client.sendMessage(text).catch((err) => { + console.error('[coco] sendMessage failed:', err) + this.callbacks.onError(err instanceof Error ? err.message : String(err)) + }) } clear() { diff --git a/examples/coco/src/client/index.ts b/examples/coco/src/client/index.ts index 92a554474..6b857c65e 100644 --- a/examples/coco/src/client/index.ts +++ b/examples/coco/src/client/index.ts @@ -16,18 +16,17 @@ import { DEFAULT_AGENT, type AgentId, type AgentMode } from '../agents.ts' const MOUNT_FLAG = '__coco_mounted__' -const fetchAgentConfig = async (): Promise => { +const fetchAgentConfig = async (): Promise => { try { const res = await fetch('/__coco/api/agents', { cache: 'no-store' }) if (!res.ok) throw new Error(`HTTP ${res.status}`) return (await res.json()) as AgentConfigMap - } catch { - return { - 'claude-code': false, - codex: false, - 'gemini-cli': false, - opencode: false, - } + } catch (err) { + console.warn( + '[coco] failed to fetch /__coco/api/agents; sends will still work but the "needs setup" hint is unavailable.', + err, + ) + return null } } @@ -42,23 +41,20 @@ const main = () => { let mode: AgentMode = 'edit' const panel = new Panel({ - send: (text) => { - const isConfigured = panel.getState().configured[agent] - if (!isConfigured) { - panel.setState({ setupOpen: agent }) - return - } - chat.send(text) - }, + send: (text) => chat.send(text), newSession: () => { chat.clear() - panel.setState({ error: null }) + panel.setState({ error: null, status: 'idle' }) }, selectAgent: (id) => { agent = id chat.setAgent(id) - const configured = panel.getState().configured[id] - panel.setState({ agent: id, setupOpen: configured ? null : id }) + const cfg = panel.getState().configured + const known = cfg !== null + panel.setState({ + agent: id, + setupOpen: known && !cfg[id] ? id : null, + }) }, selectMode: (m) => { mode = m @@ -91,9 +87,31 @@ const main = () => { }) const chat = new CocoChat(agent, mode, { - onMessages: (messages) => panel.setState({ messages }), - onLoading: (isLoading) => panel.setState({ isLoading }), - onError: (error) => panel.setState({ error }), + onSubmit: () => panel.setState({ status: 'sending', error: null }), + onMessages: (messages) => { + // The first chunk that creates an assistant message means the stream + // has started — flip from sending → streaming. + const cur = panel.getState() + const patch: Parameters[0] = { messages } + if ( + cur.status === 'sending' && + messages.some((m) => m.role === 'assistant') + ) { + patch.status = 'streaming' + } + panel.setState(patch) + }, + onLoading: (isLoading) => { + const patch: Parameters[0] = { isLoading } + if (!isLoading) patch.status = 'idle' + else if (panel.getState().status === 'idle') patch.status = 'sending' + panel.setState(patch) + }, + onError: (error) => + panel.setState({ + error, + status: error ? 'idle' : panel.getState().status, + }), }) // Initial route + watcher. @@ -102,8 +120,12 @@ const main = () => { panel.setState({ route }) }) - // Fetch agent-config from the server and surface it in the panel. - void fetchAgentConfig().then((configured) => panel.setState({ configured })) + // Fetch agent-config from the server and surface it in the panel. If the + // fetch fails we leave `configured` as `null` so the panel hides the + // setup hint instead of falsely claiming everything is unconfigured. + void fetchAgentConfig().then((configured) => { + if (configured) panel.setState({ configured }) + }) document.body.appendChild(panel.hostElement) diff --git a/examples/coco/src/client/panel.ts b/examples/coco/src/client/panel.ts index f8c3ca104..fb70b902a 100644 --- a/examples/coco/src/client/panel.ts +++ b/examples/coco/src/client/panel.ts @@ -36,12 +36,24 @@ export interface PanelState { open: boolean agent: AgentId mode: AgentMode - configured: AgentConfigMap + /** + * Per-agent "credentials are configured" hint. `null` when we haven't + * fetched `/__coco/api/agents` yet (or it errored); when null the panel + * shows a soft "(checking…)" and DOES NOT block sends — we let the + * server's response speak for itself. + */ + configured: AgentConfigMap | null route: string selected: SelectedElement | null picking: boolean messages: Array isLoading: boolean + /** + * High-level status for the in-flight indicator. `idle` means nothing is + * happening; `sending` is the moment between submit and the first server + * chunk; `streaming` is once we've started receiving chunks. + */ + status: 'idle' | 'sending' | 'streaming' error: string | null setupOpen: AgentId | null } @@ -125,17 +137,13 @@ export class Panel { open: false, agent: DEFAULT_AGENT, mode: 'edit', - configured: { - 'claude-code': false, - codex: false, - 'gemini-cli': false, - opencode: false, - }, + configured: null, route: window.location.pathname, selected: null, picking: false, messages: [], isLoading: false, + status: 'idle', error: null, setupOpen: null, } @@ -176,18 +184,24 @@ export class Panel { const s = this.state const launcherHtml = ` - + ` const setupHtml = s.setupOpen ? this.renderSetup(s.setupOpen) : '' - const isConfigured = s.configured[s.agent] - const notice = !isConfigured - ? `
- ⚠️ ${escapeHtml(AGENT_SETUP[s.agent].label)} isn't configured. - -
` - : '' + // `configured === null` means we haven't successfully fetched the agent + // status yet — show a soft pending hint but never block the user. + const cfgKnown = s.configured !== null + const isConfigured = cfgKnown && s.configured![s.agent] + const notice = + cfgKnown && !isConfigured + ? `
+ ⚠️ ${escapeHtml(AGENT_SETUP[s.agent].label)} isn't configured. Sending anyway will likely fail. + +
` + : '' const chips: Array = [] chips.push( @@ -201,23 +215,55 @@ export class Panel { } const errorHtml = s.error - ? `
${escapeHtml(s.error)}
` + ? `
⚠ ${escapeHtml(s.error)}
` : '' + const statusLabel = s.error + ? `Error — see message above` + : s.status === 'sending' + ? `Sending to ${s.agent}…` + : s.status === 'streaming' + ? `Streaming from ${s.agent}…` + : !cfgKnown + ? 'Checking agent status…' + : `Ready (${s.agent}, ${s.mode})` + const statusClass = s.error + ? 'error' + : s.status === 'idle' + ? 'idle' + : s.status + const statusBarHtml = `
${escapeHtml(statusLabel)}
` + + const thinkingHtml = + s.status !== 'idle' + ? `
+ + ${ + s.status === 'sending' + ? 'Coco is calling the agent…' + : 'Coco is working…' + } +
` + : '' + const messagesHtml = - s.messages.length === 0 + s.messages.length === 0 && s.status === 'idle' ? `
Ask Coco to change something in this page. Try “make the heading larger” or click 🎯 to point at an element.
` - : s.messages.map(renderMessage).join('') + : s.messages.map(renderMessage).join('') + thinkingHtml const panelHtml = `