From 74dec5ceb7f8d1ce00c720eb3adb7f3b2d5b089e Mon Sep 17 00:00:00 2001
From: Jacky <jacky@JackydeMac-mini.local>
Date: Tue, 21 Apr 2026 11:56:32 +0800
Subject: [PATCH] feat: native Anthropic passthrough for Claude models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Route Claude model requests directly to Copilot's native /v1/messages
endpoint instead of translating through OpenAI /chat/completions.

Benefits:
- Eliminates Anthropic↔OpenAI translation overhead for Claude models
- Enables native thinking (adaptive), cache_control, output_config
- Streaming SSE directly piped (zero translation latency)
- Non-Claude models (GPT etc.) keep existing OpenAI translation path

New files:
- src/services/copilot/create-messages.ts - Native /v1/messages client
- src/lib/sanitize.ts - Copilot backend compatibility sanitizer
  - Strips unsupported fields (context_management, budget_tokens, etc.)
  - opus: thinking.enabled→adaptive, effort→medium
  - Model name normalization (claude-sonnet-4-* → claude-sonnet-4)

Modified:
- src/routes/messages/handler.ts - Route Claude vs non-Claude models

All 42 existing + new tests pass.
---
 bun.lock                                |   1 +
 src/lib/sanitize.ts                     | 119 +++++++++++++++
 src/routes/messages/handler.ts          |  77 +++++++++-
 src/services/copilot/create-messages.ts |  68 +++++++++
 tests/sanitize.test.ts                  | 192 ++++++++++++++++++++++++
 5 files changed, 453 insertions(+), 4 deletions(-)
 create mode 100644 src/lib/sanitize.ts
 create mode 100644 src/services/copilot/create-messages.ts
 create mode 100644 tests/sanitize.test.ts

diff --git a/bun.lock b/bun.lock
index 20e895e7f..9ece87578 100644
--- a/bun.lock
+++ b/bun.lock
@@ -1,5 +1,6 @@
 {
   "lockfileVersion": 1,
+  "configVersion": 0,
   "workspaces": {
     "": {
       "name": "copilot-api",
diff --git a/src/lib/sanitize.ts b/src/lib/sanitize.ts
new file mode 100644
index 000000000..f7a28be38
--- /dev/null
+++ b/src/lib/sanitize.ts
@@ -0,0 +1,119 @@
+import consola from "consola"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+
+/**
+ * Sanitize an Anthropic /v1/messages payload for Copilot backend compatibility.
+ *
+ * Based on live probe results from tests/copilot-native-probes.ts:
+ * - All models: strip context_management, budget_tokens, defer_loading, tool_reference
+ * - opus: thinking.enabled → adaptive, effort only accepts "medium"
+ */
+
+export function sanitizeForCopilotBackend(
+  payload: Record<string, unknown>,
+): Record<string, unknown> {
+  return sanitizePayload(
+    payload as unknown as AnthropicMessagesPayload,
+  ) as unknown as Record<string, unknown>
+}
+
+export function sanitizePayload(
+  payload: AnthropicMessagesPayload,
+): AnthropicMessagesPayload {
+  // Deep clone to avoid mutating the original
+  const sanitized = structuredClone(payload) as AnthropicMessagesPayload
+    & Record<string, unknown>
+
+  // 1. Normalize model name (Copilot uses short names)
+  sanitized.model = normalizeModelName(sanitized.model)
+
+  // 2. Strip context_management (rejected by all models)
+  if ("context_management" in sanitized) {
+    consola.debug("Stripping context_management from request")
+    delete sanitized.context_management
+  }
+
+  // 3. Handle thinking block
+  if (sanitized.thinking) {
+    const isOpus = isOpusModel(sanitized.model)
+
+    if (isOpus) {
+      // opus models: thinking.enabled → adaptive
+      consola.debug("opus: Converting thinking to adaptive")
+      sanitized.thinking = { type: "adaptive" } as typeof sanitized.thinking
+    }
+
+    // All models: strip budget_tokens from thinking
+    if ("budget_tokens" in sanitized.thinking) {
+      consola.debug("Stripping budget_tokens from thinking")
+      const { budget_tokens: _, ...rest } = sanitized.thinking as Record<
+        string,
+        unknown
+      >
+      sanitized.thinking = rest as typeof sanitized.thinking
+    }
+  }
+
+  // 4. Handle output_config.effort for opus
+  if ("output_config" in sanitized && isOpusModel(sanitized.model)) {
+    const outputConfig = sanitized.output_config as
+      | Record<string, unknown>
+      | undefined
+    if (
+      outputConfig
+      && typeof outputConfig.effort === "string"
+      && outputConfig.effort !== "medium"
+    ) {
+      consola.debug(
+        `opus: Normalizing effort "${outputConfig.effort}" to "medium"`,
+      )
+      outputConfig.effort = "medium"
+    }
+  }
+
+  // 5. Strip defer_loading from tool definitions
+  if (sanitized.tools) {
+    for (const tool of sanitized.tools) {
+      const t = tool as Record<string, unknown>
+      if ("defer_loading" in t) {
+        delete t.defer_loading
+      }
+    }
+  }
+
+  // 6. Filter tool_reference content blocks from messages
+  for (const msg of sanitized.messages) {
+    if (Array.isArray(msg.content)) {
+      msg.content = (msg.content as Array<Record<string, unknown>>).filter(
+        (block) => block.type !== "tool_reference",
+      ) as typeof msg.content
+    }
+  }
+
+  return sanitized
+}
+
+function normalizeModelName(model: string): string {
+  if (model.startsWith("claude-sonnet-4-")) {
+    return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
+  } else if (model.startsWith("claude-opus-4-")) {
+    return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
+  }
+  return model
+}
+
+function isOpusModel(model: string): boolean {
+  return model.includes("opus")
+}
+
+/**
+ * Check if a model name is a Claude model (should use native Anthropic passthrough)
+ */
+export function isClaude(model: string): boolean {
+  return isClaudeModel(model)
+}
+
+export function isClaudeModel(model: string): boolean {
+  return model.startsWith("claude-") || model.startsWith("claude_")
+}
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf6243..26f30c31b 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -5,12 +5,14 @@ import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
+import { sanitizeForCopilotBackend, isClaude } from "~/lib/sanitize"
 import { state } from "~/lib/state"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import { createMessages } from "~/services/copilot/create-messages"
 
 import {
   type AnthropicMessagesPayload,
@@ -28,16 +30,83 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  // Route Claude models to native Anthropic passthrough
+  if (isClaude(anthropicPayload.model)) {
+    return handleNativeAnthropic(c, anthropicPayload)
+  }
+
+  // Non-Claude models: use existing OpenAI translation path
+  return handleOpenAITranslation(c, anthropicPayload)
+}
+
+/**
+ * Native Anthropic passthrough for Claude models.
+ * Sends requests directly to Copilot's /v1/messages endpoint.
+ * Responses are in Anthropic format already - no translation needed.
+ */
+async function handleNativeAnthropic(
+  c: Context,
+  payload: AnthropicMessagesPayload,
+) {
+  consola.debug("Using native Anthropic passthrough for model:", payload.model)
+
+  const sanitized = sanitizeForCopilotBackend(
+    payload as unknown as Record<string, unknown>,
+  )
+  consola.debug("Sanitized payload:", JSON.stringify(sanitized).slice(0, 500))
+
+  const response = await createMessages(sanitized)
+
+  if (!payload.stream) {
+    // Non-streaming: Copilot returns Anthropic JSON directly
+    const body = await response.json()
+    consola.debug(
+      "Native non-streaming response:",
+      JSON.stringify(body).slice(-400),
+    )
+    return c.json(body)
+  }
+
+  // Streaming: Copilot returns Anthropic SSE format - pipe through directly
+  consola.debug("Native streaming response - piping SSE directly")
+
+  // Set SSE headers
+  c.header("Content-Type", "text/event-stream")
+  c.header("Cache-Control", "no-cache")
+  c.header("Connection", "keep-alive")
+
+  // Pipe the upstream SSE response body directly to the client
+  if (!response.body) {
+    return c.text("No response body", 500)
+  }
+
+  return new Response(response.body, {
+    status: 200,
+    headers: {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache",
+      Connection: "keep-alive",
+    },
+  })
+}
+
+/**
+ * Existing OpenAI translation path for non-Claude models (GPT, etc.)
+ */
+async function handleOpenAITranslation(
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+) {
   const openAIPayload = translateToOpenAI(anthropicPayload)
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
   )
 
-  if (state.manualApprove) {
-    await awaitApproval()
-  }
-
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
new file mode 100644
index 000000000..ab29fc2f6
--- /dev/null
+++ b/src/services/copilot/create-messages.ts
@@ -0,0 +1,68 @@
+import consola from "consola"
+
+import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+/**
+ * Send an Anthropic /v1/messages request directly to the Copilot native endpoint.
+ * For Claude models, this avoids the OpenAI translation layer entirely.
+ */
+export const createMessages = async (
+  payload: Record<string, unknown>,
+): Promise<Response> => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const enableVision = hasImageContent(payload)
+
+  const isAgentCall = hasAgentMessages(payload)
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, enableVision),
+    "X-Initiator": isAgentCall ? "agent" : "user",
+  }
+
+  const url = `${copilotBaseUrl(state)}/v1/messages`
+  consola.debug("Native Anthropic request to:", url)
+
+  const response = await fetch(url, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    const errorBody = await response.text().catch(() => "")
+    consola.error(
+      "Failed to create native messages",
+      response.status,
+      errorBody,
+    )
+    throw new HTTPError("Failed to create native messages", response)
+  }
+
+  return response
+}
+
+function hasImageContent(payload: Record<string, unknown>): boolean {
+  const messages = payload.messages as
+    | Array<Record<string, unknown>>
+    | undefined
+  if (!messages) return false
+  return messages.some((msg) => {
+    if (!Array.isArray(msg.content)) return false
+    return msg.content.some(
+      (block: Record<string, unknown>) => block.type === "image",
+    )
+  })
+}
+
+function hasAgentMessages(payload: Record<string, unknown>): boolean {
+  const messages = payload.messages as
+    | Array<Record<string, unknown>>
+    | undefined
+  if (!messages) return false
+  return messages.some((msg) =>
+    ["assistant", "tool"].includes(msg.role as string),
+  )
+}
diff --git a/tests/sanitize.test.ts b/tests/sanitize.test.ts
new file mode 100644
index 000000000..b2e62259d
--- /dev/null
+++ b/tests/sanitize.test.ts
@@ -0,0 +1,192 @@
+import { test, expect, describe } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "../src/routes/messages/anthropic-types"
+
+import { sanitizePayload, isClaudeModel } from "../src/lib/sanitize"
+
+const basePayload: AnthropicMessagesPayload = {
+  model: "claude-sonnet-4",
+  max_tokens: 1024,
+  messages: [{ role: "user", content: "Hello" }],
+}
+
+describe("isClaudeModel", () => {
+  test("returns true for claude models", () => {
+    expect(isClaudeModel("claude-sonnet-4")).toBe(true)
+    expect(isClaudeModel("claude-opus-4")).toBe(true)
+    expect(isClaudeModel("claude-haiku-3")).toBe(true)
+    expect(isClaudeModel("claude-sonnet-4-20250514")).toBe(true)
+  })
+
+  test("returns false for non-claude models", () => {
+    expect(isClaudeModel("gpt-4o")).toBe(false)
+    expect(isClaudeModel("o1-mini")).toBe(false)
+    expect(isClaudeModel("gemini-pro")).toBe(false)
+  })
+})
+
+describe("sanitizePayload", () => {
+  describe("model name normalization", () => {
+    test("normalizes claude-sonnet-4-* to claude-sonnet-4", () => {
+      const result = sanitizePayload({
+        ...basePayload,
+        model: "claude-sonnet-4-20250514",
+      })
+      expect(result.model).toBe("claude-sonnet-4")
+    })
+
+    test("normalizes claude-opus-4-* to claude-opus-4", () => {
+      const result = sanitizePayload({
+        ...basePayload,
+        model: "claude-opus-4-20250514",
+      })
+      expect(result.model).toBe("claude-opus-4")
+    })
+
+    test("preserves models without version suffixes", () => {
+      const result = sanitizePayload({
+        ...basePayload,
+        model: "claude-sonnet-4",
+      })
+      expect(result.model).toBe("claude-sonnet-4")
+    })
+
+    test("preserves non-claude models", () => {
+      const result = sanitizePayload({ ...basePayload, model: "gpt-4o" })
+      expect(result.model).toBe("gpt-4o")
+    })
+  })
+
+  describe("context_management stripping", () => {
+    test("strips context_management from payload", () => {
+      const payload = {
+        ...basePayload,
+        context_management: { some: "value" },
+      } as AnthropicMessagesPayload & { context_management: unknown }
+      const result = sanitizePayload(payload)
+      expect(
+        (result as Record<string, unknown>).context_management,
+      ).toBeUndefined()
+    })
+  })
+
+  describe("thinking sanitization", () => {
+    test("strips budget_tokens from thinking", () => {
+      const result = sanitizePayload({
+        ...basePayload,
+        thinking: { type: "enabled", budget_tokens: 5000 },
+      })
+      expect(result.thinking).toEqual({ type: "enabled" })
+    })
+
+    test("converts enabled to adaptive for opus models", () => {
+      const result = sanitizePayload({
+        ...basePayload,
+        model: "claude-opus-4-20250514",
+        thinking: { type: "enabled", budget_tokens: 5000 },
+      })
+      expect(result.thinking).toEqual({ type: "adaptive" })
+    })
+
+    test("preserves thinking type for non-opus models", () => {
+      const result = sanitizePayload({
+        ...basePayload,
+        model: "claude-sonnet-4",
+        thinking: { type: "enabled" },
+      })
+      expect(result.thinking).toEqual({ type: "enabled" })
+    })
+  })
+
+  describe("tools sanitization", () => {
+    test("strips defer_loading from tools", () => {
+      const payload = {
+        ...basePayload,
+        tools: [
+          {
+            name: "get_weather",
+            input_schema: { type: "object" },
+            defer_loading: true,
+          } as unknown as AnthropicMessagesPayload["tools"] extends (
+            Array<infer T> | undefined
+          ) ?
+            T
+          : never,
+        ],
+      }
+      const result = sanitizePayload(
+        payload as unknown as AnthropicMessagesPayload,
+      )
+      expect(result.tools?.[0]).toEqual({
+        name: "get_weather",
+        input_schema: { type: "object" },
+      })
+    })
+  })
+
+  describe("tool_reference filtering", () => {
+    test("filters tool_reference blocks from messages", () => {
+      const result = sanitizePayload({
+        ...basePayload,
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "Hello" },
+              {
+                type: "tool_reference" as "text",
+                tool_use_id: "abc",
+              },
+            ],
+          },
+        ],
+      })
+      const content = result.messages[0].content as Array<{ type: string }>
+      expect(content).toHaveLength(1)
+      expect(content[0].type).toBe("text")
+    })
+
+    test("preserves string content messages", () => {
+      const result = sanitizePayload({
+        ...basePayload,
+        messages: [{ role: "user", content: "Hello" }],
+      })
+      expect(result.messages[0].content).toBe("Hello")
+    })
+  })
+
+  describe("output_config normalization for opus", () => {
+    test("normalizes non-medium effort to medium for opus", () => {
+      const payload = {
+        ...basePayload,
+        model: "claude-opus-4-20250514",
+        output_config: { effort: "high" },
+      } as unknown as AnthropicMessagesPayload
+      const result = sanitizePayload(payload)
+      expect((result as Record<string, unknown>).output_config).toEqual({
+        effort: "medium",
+      })
+    })
+
+    test("does not modify medium effort for opus", () => {
+      const payload = {
+        ...basePayload,
+        model: "claude-opus-4-20250514",
+        output_config: { effort: "medium" },
+      } as unknown as AnthropicMessagesPayload
+      const result = sanitizePayload(payload)
+      // effort is already medium, so it should not be modified
+      expect((result as Record<string, unknown>).output_config).toEqual({
+        effort: "medium",
+      })
+    })
+  })
+
+  describe("does not mutate original payload", () => {
+    test("returns a new object", () => {
+      const original = { ...basePayload }
+      const result = sanitizePayload(original)
+      expect(result).not.toBe(original)
+    })
+  })
+})