ericc-ch · jackylam0812 · Apr 21, 2026
diff --git a/bun.lock b/bun.lock
diff --git a/src/lib/sanitize.ts b/src/lib/sanitize.ts
@@ -0,0 +1,119 @@
+import consola from "consola"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+
+/**
+ * Sanitize an Anthropic /v1/messages payload for Copilot backend compatibility.
+ *
+ * Based on live probe results from tests/copilot-native-probes.ts:
+ * - All models: strip context_management, budget_tokens, defer_loading, tool_reference
+ * - opus: thinking.enabled → adaptive, effort only accepts "medium"
+ */
+
+export function sanitizeForCopilotBackend(
+  payload: Record<string, unknown>,
+): Record<string, unknown> {
+  return sanitizePayload(
+    payload as unknown as AnthropicMessagesPayload,
+  ) as unknown as Record<string, unknown>
+}
+
+export function sanitizePayload(
+  payload: AnthropicMessagesPayload,
+): AnthropicMessagesPayload {
+  // Deep clone to avoid mutating the original
+  const sanitized = structuredClone(payload) as AnthropicMessagesPayload
+    & Record<string, unknown>
+
+  // 1. Normalize model name (Copilot uses short names)
+  sanitized.model = normalizeModelName(sanitized.model)
+
+  // 2. Strip context_management (rejected by all models)
+  if ("context_management" in sanitized) {
+    consola.debug("Stripping context_management from request")
+    delete sanitized.context_management
+  }
+
+  // 3. Handle thinking block
+  if (sanitized.thinking) {
+    const isOpus = isOpusModel(sanitized.model)
+
+    if (isOpus) {
+      // opus models: thinking.enabled → adaptive
+      consola.debug("opus: Converting thinking to adaptive")
+      sanitized.thinking = { type: "adaptive" } as typeof sanitized.thinking
+    }
+
+    // All models: strip budget_tokens from thinking
+    if ("budget_tokens" in sanitized.thinking) {
+      consola.debug("Stripping budget_tokens from thinking")
+      const { budget_tokens: _, ...rest } = sanitized.thinking as Record<
+        string,
+        unknown
+      >
+      sanitized.thinking = rest as typeof sanitized.thinking
+    }
+  }
+
+  // 4. Handle output_config.effort for opus
+  if ("output_config" in sanitized && isOpusModel(sanitized.model)) {
+    const outputConfig = sanitized.output_config as
+      | Record<string, unknown>
+      | undefined
+    if (
+      outputConfig
+      && typeof outputConfig.effort === "string"
+      && outputConfig.effort !== "medium"
+    ) {
+      consola.debug(
+        `opus: Normalizing effort "${outputConfig.effort}" to "medium"`,
+      )
+      outputConfig.effort = "medium"
+    }
+  }
+
+  // 5. Strip defer_loading from tool definitions
+  if (sanitized.tools) {
+    for (const tool of sanitized.tools) {
+      const t = tool as Record<string, unknown>
+      if ("defer_loading" in t) {
+        delete t.defer_loading
+      }
+    }
+  }
+
+  // 6. Filter tool_reference content blocks from messages
+  for (const msg of sanitized.messages) {
+    if (Array.isArray(msg.content)) {
+      msg.content = (msg.content as Array<Record<string, unknown>>).filter(
+        (block) => block.type !== "tool_reference",
+      ) as typeof msg.content
+    }
+  }
+
+  return sanitized
+}
+
+function normalizeModelName(model: string): string {
+  if (model.startsWith("claude-sonnet-4-")) {
+    return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
+  } else if (model.startsWith("claude-opus-4-")) {
+    return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
+  }
+  return model
+}
+
+function isOpusModel(model: string): boolean {
+  return model.includes("opus")
+}
+
+/**
+ * Check if a model name is a Claude model (should use native Anthropic passthrough)
+ */
+export function isClaude(model: string): boolean {
+  return isClaudeModel(model)
+}
+
+export function isClaudeModel(model: string): boolean {
+  return model.startsWith("claude-") || model.startsWith("claude_")
+}
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
@@ -5,12 +5,14 @@ import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
+import { sanitizeForCopilotBackend, isClaude } from "~/lib/sanitize"
 import { state } from "~/lib/state"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import { createMessages } from "~/services/copilot/create-messages"
 
 import {
   type AnthropicMessagesPayload,
@@ -28,16 +30,83 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  // Route Claude models to native Anthropic passthrough
+  if (isClaude(anthropicPayload.model)) {
+    return handleNativeAnthropic(c, anthropicPayload)
+  }
+
+  // Non-Claude models: use existing OpenAI translation path
+  return handleOpenAITranslation(c, anthropicPayload)
+}
+
+/**
+ * Native Anthropic passthrough for Claude models.
+ * Sends requests directly to Copilot's /v1/messages endpoint.
+ * Responses are in Anthropic format already - no translation needed.
+ */
+async function handleNativeAnthropic(
+  c: Context,
+  payload: AnthropicMessagesPayload,
+) {
+  consola.debug("Using native Anthropic passthrough for model:", payload.model)
+
+  const sanitized = sanitizeForCopilotBackend(
+    payload as unknown as Record<string, unknown>,
+  )
+  consola.debug("Sanitized payload:", JSON.stringify(sanitized).slice(0, 500))
+
+  const response = await createMessages(sanitized)
+
+  if (!payload.stream) {
+    // Non-streaming: Copilot returns Anthropic JSON directly
+    const body = await response.json()
+    consola.debug(
+      "Native non-streaming response:",
+      JSON.stringify(body).slice(-400),
+    )
+    return c.json(body)
+  }
+
+  // Streaming: Copilot returns Anthropic SSE format - pipe through directly
+  consola.debug("Native streaming response - piping SSE directly")
+
+  // Set SSE headers
+  c.header("Content-Type", "text/event-stream")
+  c.header("Cache-Control", "no-cache")
+  c.header("Connection", "keep-alive")
+
+  // Pipe the upstream SSE response body directly to the client
+  if (!response.body) {
+    return c.text("No response body", 500)
+  }
+
+  return new Response(response.body, {
+    status: 200,
+    headers: {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache",
+      Connection: "keep-alive",
+    },
+  })
+}
+
+/**
+ * Existing OpenAI translation path for non-Claude models (GPT, etc.)
+ */
+async function handleOpenAITranslation(
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+) {
   const openAIPayload = translateToOpenAI(anthropicPayload)
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
   )
 
-  if (state.manualApprove) {
-    await awaitApproval()
-  }
-
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {

diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
@@ -0,0 +1,68 @@
+import consola from "consola"
+
+import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+/**
+ * Send an Anthropic /v1/messages request directly to the Copilot native endpoint.
+ * For Claude models, this avoids the OpenAI translation layer entirely.
+ */
+export const createMessages = async (
+  payload: Record<string, unknown>,
+): Promise<Response> => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const enableVision = hasImageContent(payload)
+
+  const isAgentCall = hasAgentMessages(payload)
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, enableVision),
+    "X-Initiator": isAgentCall ? "agent" : "user",
+  }
+
+  const url = `${copilotBaseUrl(state)}/v1/messages`
+  consola.debug("Native Anthropic request to:", url)
+
+  const response = await fetch(url, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    const errorBody = await response.text().catch(() => "")
+    consola.error(
+      "Failed to create native messages",
+      response.status,
+      errorBody,
+    )
+    throw new HTTPError("Failed to create native messages", response)
+  }
+
+  return response
+}
+
+function hasImageContent(payload: Record<string, unknown>): boolean {
+  const messages = payload.messages as
+    | Array<Record<string, unknown>>
+    | undefined
+  if (!messages) return false
+  return messages.some((msg) => {
+    if (!Array.isArray(msg.content)) return false
+    return msg.content.some(
+      (block: Record<string, unknown>) => block.type === "image",
+    )
+  })
+}
+
+function hasAgentMessages(payload: Record<string, unknown>): boolean {
+  const messages = payload.messages as
+    | Array<Record<string, unknown>>
+    | undefined
+  if (!messages) return false
+  return messages.some((msg) =>
+    ["assistant", "tool"].includes(msg.role as string),
+  )
+}