ericc-ch · caozhiyuan · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
@@ -7,11 +7,11 @@ export const standardHeaders = () => ({
   accept: "application/json",
 })
 
-const COPILOT_VERSION = "0.26.7"
+const COPILOT_VERSION = "0.35.0"
 const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`
 const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
-const API_VERSION = "2025-04-01"
+const API_VERSION = "2025-10-01"
 
 export const copilotBaseUrl = (state: State) =>
   state.accountType === "individual" ?
@@ -25,7 +25,7 @@ export const copilotHeaders = (state: State, vision: boolean = false) => {
     "editor-version": `vscode/${state.vsCodeVersion}`,
     "editor-plugin-version": EDITOR_PLUGIN_VERSION,
     "user-agent": USER_AGENT,
-    "openai-intent": "conversation-panel",
+    "openai-intent": "conversation-agent",
     "x-github-api-version": API_VERSION,
     "x-request-id": randomUUID(),
     "x-vscode-user-agent-library-version": "electron-fetch",

diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts
@@ -73,6 +73,9 @@ const calculateMessageTokens = (
   const tokensPerName = 1
   let tokens = tokensPerMessage
   for (const [key, value] of Object.entries(message)) {
+    if (key === "reasoning_opaque") {
+      continue
+    }
     if (typeof value === "string") {
       tokens += encoder.encode(value).length
     }

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
@@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  signature: string
 }
 
 export type AnthropicUserContentBlock =
@@ -196,6 +197,7 @@ export interface AnthropicStreamState {
   messageStartSent: boolean
   contentBlockIndex: number
   contentBlockOpen: boolean
+  thinkingBlockOpen: boolean
   toolCalls: {
     [openAIToolIndex: number]: {
       id: string

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
@@ -60,6 +60,7 @@ export async function handleCompletion(c: Context) {
       contentBlockIndex: 0,
       contentBlockOpen: false,
       toolCalls: {},
+      thinkingBlockOpen: false,
     }
 
     for await (const rawEvent of response) {

diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
@@ -1,3 +1,6 @@
+import type { Model } from "~/services/copilot/get-models"
+
+import { state } from "~/lib/state"
 import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
@@ -11,7 +14,6 @@ import {
 import {
   type AnthropicAssistantContentBlock,
   type AnthropicAssistantMessage,
-  type AnthropicMessage,
   type AnthropicMessagesPayload,
   type AnthropicResponse,
   type AnthropicTextBlock,
@@ -29,11 +31,15 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils"
 export function translateToOpenAI(
   payload: AnthropicMessagesPayload,
 ): ChatCompletionsPayload {
+  const modelId = translateModelName(payload.model)
+  const model = state.models?.data.find((m) => m.id === modelId)
+  const thinkingBudget = getThinkingBudget(payload, model)
   return {
-    model: translateModelName(payload.model),
+    model: modelId,
     messages: translateAnthropicMessagesToOpenAI(
-      payload.messages,
-      payload.system,
+      payload,
+      modelId,
+      thinkingBudget,
     ),
     max_tokens: payload.max_tokens,
     stop: payload.stop_sequences,
@@ -43,45 +49,110 @@ export function translateToOpenAI(
     user: payload.metadata?.user_id,
     tools: translateAnthropicToolsToOpenAI(payload.tools),
     tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
+    thinking_budget: thinkingBudget,
   }
 }
 
+function getThinkingBudget(
+  payload: AnthropicMessagesPayload,
+  model: Model | undefined,
+): number | undefined {
+  const thinking = payload.thinking
+  if (model && thinking) {
+    const maxThinkingBudget = Math.min(
+      model.capabilities.supports.max_thinking_budget ?? 0,
+      (model.capabilities.limits.max_output_tokens ?? 0) - 1,
-    const maxThinkingBudget = Math.min(
-      model.capabilities.supports.max_thinking_budget ?? 0,
-      (model.capabilities.limits.max_output_tokens ?? 0) - 1,
+    const maxTokensLimit =
+      model.capabilities.limits.max_output_tokens &&
+      model.capabilities.limits.max_output_tokens > 1
+        ? model.capabilities.limits.max_output_tokens - 1
+        : 0
+
+    const maxThinkingBudget = Math.min(
+      model.capabilities.supports.max_thinking_budget ?? 0,
+      maxTokensLimit,
-    const maxThinkingBudget = Math.min(
-      model.capabilities.supports.max_thinking_budget ?? 0,
-      (model.capabilities.limits.max_output_tokens ?? 0) - 1,
+    const maxTokensLimit =
+      model.capabilities.limits.max_output_tokens &&
+      model.capabilities.limits.max_output_tokens > 1
+        ? model.capabilities.limits.max_output_tokens - 1
+        : 0
+
+    const maxThinkingBudget = Math.min(
+      model.capabilities.supports.max_thinking_budget ?? 0,
+      maxTokensLimit,
+    )
+    if (maxThinkingBudget > 0 && thinking.budget_tokens !== undefined) {
+      const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget)
+      return Math.max(
+        budgetTokens,
+        model.capabilities.supports.min_thinking_budget ?? 1024,
+      )
-      const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget)
-      return Math.max(
-        budgetTokens,
-        model.capabilities.supports.min_thinking_budget ?? 1024,
-      )
+      const minThinkingBudget =
+        model.capabilities.supports.min_thinking_budget ?? 1024
+      // If the minimum required budget exceeds the maximum allowed, the
+      // constraints cannot be satisfied; fall back to no thinking budget.
+      if (minThinkingBudget > maxThinkingBudget) {
+        return undefined
+      }
+      const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget)
+      return Math.max(budgetTokens, minThinkingBudget)
-      const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget)
-      return Math.max(
-        budgetTokens,
-        model.capabilities.supports.min_thinking_budget ?? 1024,
-      )
+      const minThinkingBudget =
+        model.capabilities.supports.min_thinking_budget ?? 1024
+      // If the minimum required budget exceeds the maximum allowed, the
+      // constraints cannot be satisfied; fall back to no thinking budget.
+      if (minThinkingBudget > maxThinkingBudget) {
+        return undefined
+      }
+      const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget)
+      return Math.max(budgetTokens, minThinkingBudget)
+    }
+  }
+  return undefined
+}
+
 function translateModelName(model: string): string {
   // Subagent requests use a specific model number which Copilot doesn't support
   if (model.startsWith("claude-sonnet-4-")) {
     return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
-  } else if (model.startsWith("claude-opus-")) {
+  } else if (model.startsWith("claude-opus-4-")) {
     return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
-  } else if (model.startsWith("claude-opus-4-")) {
-    return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
+  } else if (model.startsWith("claude-opus-")) {
+    return model.replace(/^(claude-opus-\d+).*/, "$1")
-  } else if (model.startsWith("claude-opus-4-")) {
-    return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
+  } else if (model.startsWith("claude-opus-")) {
+    return model.replace(/^(claude-opus-\d+).*/, "$1")
   }
   return model
 }
 
 function translateAnthropicMessagesToOpenAI(
-  anthropicMessages: Array<AnthropicMessage>,
-  system: string | Array<AnthropicTextBlock> | undefined,
+  payload: AnthropicMessagesPayload,
+  modelId: string,
+  thinkingBudget: number | undefined,
 ): Array<Message> {
-  const systemMessages = handleSystemPrompt(system)
-
-  const otherMessages = anthropicMessages.flatMap((message) =>
+  const systemMessages = handleSystemPrompt(
+    payload.system,
+    modelId,
+    thinkingBudget,
+  )
+  const otherMessages = payload.messages.flatMap((message) =>
     message.role === "user" ?
       handleUserMessage(message)
-    : handleAssistantMessage(message),
+    : handleAssistantMessage(message, modelId),
   )
-
+  if (modelId.startsWith("claude") && thinkingBudget) {
+    const reminder =
+      "<system-reminder>you MUST follow interleaved_thinking_protocol</system-reminder>"
+    const firstUserIndex = otherMessages.findIndex((m) => m.role === "user")
+    if (firstUserIndex !== -1) {
+      const userMessage = otherMessages[firstUserIndex]
+      if (typeof userMessage.content === "string") {
+        userMessage.content = reminder + "\n\n" + userMessage.content
+      } else if (Array.isArray(userMessage.content)) {
+        userMessage.content = [
+          { type: "text", text: reminder },
+          ...userMessage.content,
+        ] as Array<ContentPart>
+      }
+    }
+  }
   return [...systemMessages, ...otherMessages]
 }
 
 function handleSystemPrompt(
   system: string | Array<AnthropicTextBlock> | undefined,
+  modelId: string,
+  thinkingBudget: number | undefined,
 ): Array<Message> {
   if (!system) {
     return []
   }
 
+  let extraPrompt = ""
+  if (modelId.startsWith("claude") && thinkingBudget) {
+    extraPrompt = `
+<interleaved_thinking_protocol>
+ABSOLUTE REQUIREMENT - NON-NEGOTIABLE:
+The current thinking_mode is interleaved, Whenever you have the result of a function call, think carefully , MUST output a thinking block
-The current thinking_mode is interleaved, Whenever you have the result of a function call, think carefully , MUST output a thinking block
+The current thinking_mode is interleaved, Whenever you have the result of a function call, think carefully, MUST output a thinking block
-The current thinking_mode is interleaved, Whenever you have the result of a function call, think carefully , MUST output a thinking block
+The current thinking_mode is interleaved, Whenever you have the result of a function call, think carefully, MUST output a thinking block
+RULES:
+Tool result → thinking block (ALWAYS, no exceptions)
+This is NOT optional - it is a hard requirement
+The thinking block must contain substantive reasoning (minimum 3-5 sentences)
+Think about: what the results mean, what to do next, how to answer the user
+NEVER skip this step, even if the result seems simple or obvious
+</interleaved_thinking_protocol>`
+  }
+
   if (typeof system === "string") {
-    return [{ role: "system", content: system }]
+    return [{ role: "system", content: system + extraPrompt }]
   } else {
-    const systemText = system.map((block) => block.text).join("\n\n")
+    const systemText = system
+      .map((block, index) => {
+        if (index === 0) {
+          return block.text + extraPrompt
+        }
+        return block.text
+      })
+      .join("\n\n")
     return [{ role: "system", content: systemText }]
   }
 }
@@ -125,6 +196,7 @@ function handleUserMessage(message: AnthropicUserMessage): Array<Message> {
 
 function handleAssistantMessage(
   message: AnthropicAssistantMessage,
+  modelId: string,
 ): Array<Message> {
   if (!Array.isArray(message.content)) {
     return [
@@ -139,25 +211,40 @@ function handleAssistantMessage(
     (block): block is AnthropicToolUseBlock => block.type === "tool_use",
   )
 
-  const textBlocks = message.content.filter(
-    (block): block is AnthropicTextBlock => block.type === "text",
-  )
-
-  const thinkingBlocks = message.content.filter(
+  let thinkingBlocks = message.content.filter(
     (block): block is AnthropicThinkingBlock => block.type === "thinking",
   )
 
-  // Combine text and thinking blocks, as OpenAI doesn't have separate thinking blocks
-  const allTextContent = [
-    ...textBlocks.map((b) => b.text),
-    ...thinkingBlocks.map((b) => b.thinking),
-  ].join("\n\n")
+  if (modelId.startsWith("claude")) {
+    thinkingBlocks = thinkingBlocks.filter(
+      (b) =>
+        b.thinking
+        && b.thinking.length > 0
+        && b.signature
+        && b.signature.length > 0
+        // gpt signature has @ in it, so filter those out for claude models
+        && !b.signature.includes("@"),
-        && b.signature.length > 0
-        // gpt signature has @ in it, so filter those out for claude models
-        && !b.signature.includes("@"),
+        && b.signature.length > 0,
-        && b.signature.length > 0
-        // gpt signature has @ in it, so filter those out for claude models
-        && !b.signature.includes("@"),
+        && b.signature.length > 0,
+    )
+  }
+
+  const thinkingContents = thinkingBlocks
+    .filter((b) => b.thinking && b.thinking.length > 0)
+    .map((b) => b.thinking)
-  if (modelId.startsWith("claude")) {
-    thinkingBlocks = thinkingBlocks.filter(
-      (b) =>
-        b.thinking
-        && b.thinking.length > 0
-        && b.signature
-        && b.signature.length > 0
-        // gpt signature has @ in it, so filter those out for claude models
-        && !b.signature.includes("@"),
-    )
-  }
-
-  const thinkingContents = thinkingBlocks
-    .filter((b) => b.thinking && b.thinking.length > 0)
-    .map((b) => b.thinking)
+  // First, ensure all thinking blocks have non-empty thinking content
+  thinkingBlocks = thinkingBlocks.filter(
+    (b) => b.thinking && b.thinking.length > 0,
+  )
+
+  if (modelId.startsWith("claude")) {
+    thinkingBlocks = thinkingBlocks.filter(
+      (b) =>
+        b.signature
+        && b.signature.length > 0
+        // gpt signature has @ in it, so filter those out for claude models
+        && !b.signature.includes("@"),
+    )
+  }
+
+  const thinkingContents = thinkingBlocks.map((b) => b.thinking)
-  if (modelId.startsWith("claude")) {
-    thinkingBlocks = thinkingBlocks.filter(
-      (b) =>
-        b.thinking
-        && b.thinking.length > 0
-        && b.signature
-        && b.signature.length > 0
-        // gpt signature has @ in it, so filter those out for claude models
-        && !b.signature.includes("@"),
-    )
-  }
-
-  const thinkingContents = thinkingBlocks
-    .filter((b) => b.thinking && b.thinking.length > 0)
-    .map((b) => b.thinking)
+  // First, ensure all thinking blocks have non-empty thinking content
+  thinkingBlocks = thinkingBlocks.filter(
+    (b) => b.thinking && b.thinking.length > 0,
+  )
+
+  if (modelId.startsWith("claude")) {
+    thinkingBlocks = thinkingBlocks.filter(
+      (b) =>
+        b.signature
+        && b.signature.length > 0
+        // gpt signature has @ in it, so filter those out for claude models
+        && !b.signature.includes("@"),
+    )
+  }
+
+  const thinkingContents = thinkingBlocks.map((b) => b.thinking)
+
+  const allThinkingContent =
+    thinkingContents.length > 0 ? thinkingContents.join("\n\n") : undefined
+
+  const signature = thinkingBlocks.find(
+    (b) => b.signature && b.signature.length > 0,
+  )?.signature
 
   return toolUseBlocks.length > 0 ?
       [
         {
           role: "assistant",
-          content: allTextContent || null,
+          content: mapContent(message.content),
+          reasoning_text: allThinkingContent,
+          reasoning_opaque: signature,
           tool_calls: toolUseBlocks.map((toolUse) => ({
             id: toolUse.id,
             type: "function",
@@ -172,6 +259,8 @@ function handleAssistantMessage(
         {
           role: "assistant",
           content: mapContent(message.content),
+          reasoning_text: allThinkingContent,
+          reasoning_opaque: signature,
         },
       ]
 }
@@ -191,11 +280,8 @@ function mapContent(
   const hasImage = content.some((block) => block.type === "image")
   if (!hasImage) {
     return content
-      .filter(
-        (block): block is AnthropicTextBlock | AnthropicThinkingBlock =>
-          block.type === "text" || block.type === "thinking",
-      )
-      .map((block) => (block.type === "text" ? block.text : block.thinking))
+      .filter((block): block is AnthropicTextBlock => block.type === "text")
+      .map((block) => block.text)
       .join("\n\n")
   }
 
@@ -204,12 +290,6 @@ function mapContent(
     switch (block.type) {
       case "text": {
         contentParts.push({ type: "text", text: block.text })
-
-        break
-      }
-      case "thinking": {
-        contentParts.push({ type: "text", text: block.thinking })
-
         break
       }
       case "image": {
@@ -219,7 +299,6 @@ function mapContent(
             url: `data:${block.source.media_type};base64,${block.source.data}`,
           },
         })
-
         break
       }
       // No default
@@ -282,34 +361,32 @@ export function translateToAnthropic(
   response: ChatCompletionResponse,
 ): AnthropicResponse {
   // Merge content from all choices
-  const allTextBlocks: Array<AnthropicTextBlock> = []
-  const allToolUseBlocks: Array<AnthropicToolUseBlock> = []
-  let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null =
-    null // default
-  stopReason = response.choices[0]?.finish_reason ?? stopReason
+  const assistantContentBlocks: Array<AnthropicAssistantContentBlock> = []
+  let stopReason = response.choices[0]?.finish_reason ?? null
 
   // Process all choices to extract text and tool use blocks
   for (const choice of response.choices) {
     const textBlocks = getAnthropicTextBlocks(choice.message.content)
+    const thinkBlocks = getAnthropicThinkBlocks(
+      choice.message.reasoning_text,
+      choice.message.reasoning_opaque,
+    )
     const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
 
-    allTextBlocks.push(...textBlocks)
-    allToolUseBlocks.push(...toolUseBlocks)
+    assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks)
-    assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks)
+    assistantContentBlocks.push(...textBlocks, ...thinkBlocks, ...toolUseBlocks)
-    assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks)
+    assistantContentBlocks.push(...textBlocks, ...thinkBlocks, ...toolUseBlocks)
 
     // Use the finish_reason from the first choice, or prioritize tool_calls
     if (choice.finish_reason === "tool_calls" || stopReason === "stop") {
       stopReason = choice.finish_reason
     }
   }
 
-  // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses
-
   return {
     id: response.id,
     type: "message",
     role: "assistant",
     model: response.model,
-    content: [...allTextBlocks, ...allToolUseBlocks],
+    content: assistantContentBlocks,
     stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
     stop_sequence: null,
     usage: {
@@ -329,7 +406,7 @@ export function translateToAnthropic(
 function getAnthropicTextBlocks(
   messageContent: Message["content"],
 ): Array<AnthropicTextBlock> {
-  if (typeof messageContent === "string") {
+  if (typeof messageContent === "string" && messageContent.length > 0) {
     return [{ type: "text", text: messageContent }]
   }
 
@@ -342,6 +419,31 @@ function getAnthropicTextBlocks(
   return []
 }
 
+function getAnthropicThinkBlocks(
+  reasoningText: string | null | undefined,
+  reasoningOpaque: string | null | undefined,
+): Array<AnthropicThinkingBlock> {
+  if (reasoningText && reasoningText.length > 0) {
+    return [
+      {
+        type: "thinking",
+        thinking: reasoningText,
+        signature: reasoningOpaque || "",
-  if (reasoningText && reasoningText.length > 0) {
-    return [
-      {
-        type: "thinking",
-        thinking: reasoningText,
-        signature: reasoningOpaque || "",
+  if (
+    reasoningText &&
+    reasoningText.length > 0 &&
+    reasoningOpaque &&
+    reasoningOpaque.length > 0
+  ) {
+    return [
+      {
+        type: "thinking",
+        thinking: reasoningText,
+        signature: reasoningOpaque,
-  if (reasoningText && reasoningText.length > 0) {
-    return [
-      {
-        type: "thinking",
-        thinking: reasoningText,
-        signature: reasoningOpaque || "",
+  if (
+    reasoningText &&
+    reasoningText.length > 0 &&
+    reasoningOpaque &&
+    reasoningOpaque.length > 0
+  ) {
+    return [
+      {
+        type: "thinking",
+        thinking: reasoningText,
+        signature: reasoningOpaque,
+      },
+    ]
+  }
+  if (reasoningOpaque && reasoningOpaque.length > 0) {
+    return [
+      {
+        type: "thinking",
+        thinking: "",
+        signature: reasoningOpaque,
+      },
+    ]
+  }
+  return []
+}
+
 function getAnthropicToolUseBlocks(
   toolCalls: Array<ToolCall> | undefined,
 ): Array<AnthropicToolUseBlock> {