From 74dec5ceb7f8d1ce00c720eb3adb7f3b2d5b089e Mon Sep 17 00:00:00 2001 From: Jacky Date: Tue, 21 Apr 2026 11:56:32 +0800 Subject: [PATCH] feat: native Anthropic passthrough for Claude models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Route Claude model requests directly to Copilot's native /v1/messages endpoint instead of translating through OpenAI /chat/completions. Benefits: - Eliminates Anthropic↔OpenAI translation overhead for Claude models - Enables native thinking (adaptive), cache_control, output_config - Streaming SSE directly piped (zero translation latency) - Non-Claude models (GPT etc.) keep existing OpenAI translation path New files: - src/services/copilot/create-messages.ts - Native /v1/messages client - src/lib/sanitize.ts - Copilot backend compatibility sanitizer - Strips unsupported fields (context_management, budget_tokens, etc.) - opus: thinking.enabled→adaptive, effort→medium - Model name normalization (claude-sonnet-4-* → claude-sonnet-4) Modified: - src/routes/messages/handler.ts - Route Claude vs non-Claude models All 42 existing + new tests pass. --- bun.lock | 1 + src/lib/sanitize.ts | 119 +++++++++++++++ src/routes/messages/handler.ts | 77 +++++++++- src/services/copilot/create-messages.ts | 68 +++++++++ tests/sanitize.test.ts | 192 ++++++++++++++++++++++++ 5 files changed, 453 insertions(+), 4 deletions(-) create mode 100644 src/lib/sanitize.ts create mode 100644 src/services/copilot/create-messages.ts create mode 100644 tests/sanitize.test.ts diff --git a/bun.lock b/bun.lock index 20e895e7f..9ece87578 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "copilot-api", diff --git a/src/lib/sanitize.ts b/src/lib/sanitize.ts new file mode 100644 index 000000000..f7a28be38 --- /dev/null +++ b/src/lib/sanitize.ts @@ -0,0 +1,119 @@ +import consola from "consola" + +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" + +/** + * Sanitize an Anthropic /v1/messages payload for Copilot backend compatibility. + * + * Based on live probe results from tests/copilot-native-probes.ts: + * - All models: strip context_management, budget_tokens, defer_loading, tool_reference + * - opus: thinking.enabled → adaptive, effort only accepts "medium" + */ + +export function sanitizeForCopilotBackend( + payload: Record, +): Record { + return sanitizePayload( + payload as unknown as AnthropicMessagesPayload, + ) as unknown as Record +} + +export function sanitizePayload( + payload: AnthropicMessagesPayload, +): AnthropicMessagesPayload { + // Deep clone to avoid mutating the original + const sanitized = structuredClone(payload) as AnthropicMessagesPayload + & Record + + // 1. Normalize model name (Copilot uses short names) + sanitized.model = normalizeModelName(sanitized.model) + + // 2. Strip context_management (rejected by all models) + if ("context_management" in sanitized) { + consola.debug("Stripping context_management from request") + delete sanitized.context_management + } + + // 3. Handle thinking block + if (sanitized.thinking) { + const isOpus = isOpusModel(sanitized.model) + + if (isOpus) { + // opus models: thinking.enabled → adaptive + consola.debug("opus: Converting thinking to adaptive") + sanitized.thinking = { type: "adaptive" } as typeof sanitized.thinking + } + + // All models: strip budget_tokens from thinking + if ("budget_tokens" in sanitized.thinking) { + consola.debug("Stripping budget_tokens from thinking") + const { budget_tokens: _, ...rest } = sanitized.thinking as Record< + string, + unknown + > + sanitized.thinking = rest as typeof sanitized.thinking + } + } + + // 4. Handle output_config.effort for opus + if ("output_config" in sanitized && isOpusModel(sanitized.model)) { + const outputConfig = sanitized.output_config as + | Record + | undefined + if ( + outputConfig + && typeof outputConfig.effort === "string" + && outputConfig.effort !== "medium" + ) { + consola.debug( + `opus: Normalizing effort "${outputConfig.effort}" to "medium"`, + ) + outputConfig.effort = "medium" + } + } + + // 5. Strip defer_loading from tool definitions + if (sanitized.tools) { + for (const tool of sanitized.tools) { + const t = tool as Record + if ("defer_loading" in t) { + delete t.defer_loading + } + } + } + + // 6. Filter tool_reference content blocks from messages + for (const msg of sanitized.messages) { + if (Array.isArray(msg.content)) { + msg.content = (msg.content as Array>).filter( + (block) => block.type !== "tool_reference", + ) as typeof msg.content + } + } + + return sanitized +} + +function normalizeModelName(model: string): string { + if (model.startsWith("claude-sonnet-4-")) { + return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4") + } else if (model.startsWith("claude-opus-4-")) { + return model.replace(/^claude-opus-4-.*/, "claude-opus-4") + } + return model +} + +function isOpusModel(model: string): boolean { + return model.includes("opus") +} + +/** + * Check if a model name is a Claude model (should use native Anthropic passthrough) + */ +export function isClaude(model: string): boolean { + return isClaudeModel(model) +} + +export function isClaudeModel(model: string): boolean { + return model.startsWith("claude-") || model.startsWith("claude_") +} diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf6243..26f30c31b 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -5,12 +5,14 @@ import { streamSSE } from "hono/streaming" import { awaitApproval } from "~/lib/approval" import { checkRateLimit } from "~/lib/rate-limit" +import { sanitizeForCopilotBackend, isClaude } from "~/lib/sanitize" import { state } from "~/lib/state" import { createChatCompletions, type ChatCompletionChunk, type ChatCompletionResponse, } from "~/services/copilot/create-chat-completions" +import { createMessages } from "~/services/copilot/create-messages" import { type AnthropicMessagesPayload, @@ -28,16 +30,83 @@ export async function handleCompletion(c: Context) { const anthropicPayload = await c.req.json() consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload)) + if (state.manualApprove) { + await awaitApproval() + } + + // Route Claude models to native Anthropic passthrough + if (isClaude(anthropicPayload.model)) { + return handleNativeAnthropic(c, anthropicPayload) + } + + // Non-Claude models: use existing OpenAI translation path + return handleOpenAITranslation(c, anthropicPayload) +} + +/** + * Native Anthropic passthrough for Claude models. + * Sends requests directly to Copilot's /v1/messages endpoint. + * Responses are in Anthropic format already - no translation needed. + */ +async function handleNativeAnthropic( + c: Context, + payload: AnthropicMessagesPayload, +) { + consola.debug("Using native Anthropic passthrough for model:", payload.model) + + const sanitized = sanitizeForCopilotBackend( + payload as unknown as Record, + ) + consola.debug("Sanitized payload:", JSON.stringify(sanitized).slice(0, 500)) + + const response = await createMessages(sanitized) + + if (!payload.stream) { + // Non-streaming: Copilot returns Anthropic JSON directly + const body = await response.json() + consola.debug( + "Native non-streaming response:", + JSON.stringify(body).slice(-400), + ) + return c.json(body) + } + + // Streaming: Copilot returns Anthropic SSE format - pipe through directly + consola.debug("Native streaming response - piping SSE directly") + + // Set SSE headers + c.header("Content-Type", "text/event-stream") + c.header("Cache-Control", "no-cache") + c.header("Connection", "keep-alive") + + // Pipe the upstream SSE response body directly to the client + if (!response.body) { + return c.text("No response body", 500) + } + + return new Response(response.body, { + status: 200, + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + Connection: "keep-alive", + }, + }) +} + +/** + * Existing OpenAI translation path for non-Claude models (GPT, etc.) + */ +async function handleOpenAITranslation( + c: Context, + anthropicPayload: AnthropicMessagesPayload, +) { const openAIPayload = translateToOpenAI(anthropicPayload) consola.debug( "Translated OpenAI request payload:", JSON.stringify(openAIPayload), ) - if (state.manualApprove) { - await awaitApproval() - } - const response = await createChatCompletions(openAIPayload) if (isNonStreaming(response)) { diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts new file mode 100644 index 000000000..ab29fc2f6 --- /dev/null +++ b/src/services/copilot/create-messages.ts @@ -0,0 +1,68 @@ +import consola from "consola" + +import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +/** + * Send an Anthropic /v1/messages request directly to the Copilot native endpoint. + * For Claude models, this avoids the OpenAI translation layer entirely. + */ +export const createMessages = async ( + payload: Record, +): Promise => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const enableVision = hasImageContent(payload) + + const isAgentCall = hasAgentMessages(payload) + + const headers: Record = { + ...copilotHeaders(state, enableVision), + "X-Initiator": isAgentCall ? "agent" : "user", + } + + const url = `${copilotBaseUrl(state)}/v1/messages` + consola.debug("Native Anthropic request to:", url) + + const response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(payload), + }) + + if (!response.ok) { + const errorBody = await response.text().catch(() => "") + consola.error( + "Failed to create native messages", + response.status, + errorBody, + ) + throw new HTTPError("Failed to create native messages", response) + } + + return response +} + +function hasImageContent(payload: Record): boolean { + const messages = payload.messages as + | Array> + | undefined + if (!messages) return false + return messages.some((msg) => { + if (!Array.isArray(msg.content)) return false + return msg.content.some( + (block: Record) => block.type === "image", + ) + }) +} + +function hasAgentMessages(payload: Record): boolean { + const messages = payload.messages as + | Array> + | undefined + if (!messages) return false + return messages.some((msg) => + ["assistant", "tool"].includes(msg.role as string), + ) +} diff --git a/tests/sanitize.test.ts b/tests/sanitize.test.ts new file mode 100644 index 000000000..b2e62259d --- /dev/null +++ b/tests/sanitize.test.ts @@ -0,0 +1,192 @@ +import { test, expect, describe } from "bun:test" + +import type { AnthropicMessagesPayload } from "../src/routes/messages/anthropic-types" + +import { sanitizePayload, isClaudeModel } from "../src/lib/sanitize" + +const basePayload: AnthropicMessagesPayload = { + model: "claude-sonnet-4", + max_tokens: 1024, + messages: [{ role: "user", content: "Hello" }], +} + +describe("isClaudeModel", () => { + test("returns true for claude models", () => { + expect(isClaudeModel("claude-sonnet-4")).toBe(true) + expect(isClaudeModel("claude-opus-4")).toBe(true) + expect(isClaudeModel("claude-haiku-3")).toBe(true) + expect(isClaudeModel("claude-sonnet-4-20250514")).toBe(true) + }) + + test("returns false for non-claude models", () => { + expect(isClaudeModel("gpt-4o")).toBe(false) + expect(isClaudeModel("o1-mini")).toBe(false) + expect(isClaudeModel("gemini-pro")).toBe(false) + }) +}) + +describe("sanitizePayload", () => { + describe("model name normalization", () => { + test("normalizes claude-sonnet-4-* to claude-sonnet-4", () => { + const result = sanitizePayload({ + ...basePayload, + model: "claude-sonnet-4-20250514", + }) + expect(result.model).toBe("claude-sonnet-4") + }) + + test("normalizes claude-opus-4-* to claude-opus-4", () => { + const result = sanitizePayload({ + ...basePayload, + model: "claude-opus-4-20250514", + }) + expect(result.model).toBe("claude-opus-4") + }) + + test("preserves models without version suffixes", () => { + const result = sanitizePayload({ + ...basePayload, + model: "claude-sonnet-4", + }) + expect(result.model).toBe("claude-sonnet-4") + }) + + test("preserves non-claude models", () => { + const result = sanitizePayload({ ...basePayload, model: "gpt-4o" }) + expect(result.model).toBe("gpt-4o") + }) + }) + + describe("context_management stripping", () => { + test("strips context_management from payload", () => { + const payload = { + ...basePayload, + context_management: { some: "value" }, + } as AnthropicMessagesPayload & { context_management: unknown } + const result = sanitizePayload(payload) + expect( + (result as Record).context_management, + ).toBeUndefined() + }) + }) + + describe("thinking sanitization", () => { + test("strips budget_tokens from thinking", () => { + const result = sanitizePayload({ + ...basePayload, + thinking: { type: "enabled", budget_tokens: 5000 }, + }) + expect(result.thinking).toEqual({ type: "enabled" }) + }) + + test("converts enabled to adaptive for opus models", () => { + const result = sanitizePayload({ + ...basePayload, + model: "claude-opus-4-20250514", + thinking: { type: "enabled", budget_tokens: 5000 }, + }) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + test("preserves thinking type for non-opus models", () => { + const result = sanitizePayload({ + ...basePayload, + model: "claude-sonnet-4", + thinking: { type: "enabled" }, + }) + expect(result.thinking).toEqual({ type: "enabled" }) + }) + }) + + describe("tools sanitization", () => { + test("strips defer_loading from tools", () => { + const payload = { + ...basePayload, + tools: [ + { + name: "get_weather", + input_schema: { type: "object" }, + defer_loading: true, + } as unknown as AnthropicMessagesPayload["tools"] extends ( + Array | undefined + ) ? + T + : never, + ], + } + const result = sanitizePayload( + payload as unknown as AnthropicMessagesPayload, + ) + expect(result.tools?.[0]).toEqual({ + name: "get_weather", + input_schema: { type: "object" }, + }) + }) + }) + + describe("tool_reference filtering", () => { + test("filters tool_reference blocks from messages", () => { + const result = sanitizePayload({ + ...basePayload, + messages: [ + { + role: "user", + content: [ + { type: "text", text: "Hello" }, + { + type: "tool_reference" as "text", + tool_use_id: "abc", + }, + ], + }, + ], + }) + const content = result.messages[0].content as Array<{ type: string }> + expect(content).toHaveLength(1) + expect(content[0].type).toBe("text") + }) + + test("preserves string content messages", () => { + const result = sanitizePayload({ + ...basePayload, + messages: [{ role: "user", content: "Hello" }], + }) + expect(result.messages[0].content).toBe("Hello") + }) + }) + + describe("output_config normalization for opus", () => { + test("normalizes non-medium effort to medium for opus", () => { + const payload = { + ...basePayload, + model: "claude-opus-4-20250514", + output_config: { effort: "high" }, + } as unknown as AnthropicMessagesPayload + const result = sanitizePayload(payload) + expect((result as Record).output_config).toEqual({ + effort: "medium", + }) + }) + + test("does not modify medium effort for opus", () => { + const payload = { + ...basePayload, + model: "claude-opus-4-20250514", + output_config: { effort: "medium" }, + } as unknown as AnthropicMessagesPayload + const result = sanitizePayload(payload) + // effort is already medium, so it should not be modified + expect((result as Record).output_config).toEqual({ + effort: "medium", + }) + }) + }) + + describe("does not mutate original payload", () => { + test("returns a new object", () => { + const original = { ...basePayload } + const result = sanitizePayload(original) + expect(result).not.toBe(original) + }) + }) +})