Merge pull request #18 from caozhiyuan/feature/chat-completions-reasoning

cuipengfei · web-flow · commit b85665e1925c · 2025-12-12T22:46:35.000+08:00
Feature/chat completions reasoning
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
@@ -7,11 +7,11 @@ export const standardHeaders = () => ({
   accept: "application/json",
 })
 
-const COPILOT_VERSION = "0.26.7"
+const COPILOT_VERSION = "0.35.0"
 const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`
 const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
-const API_VERSION = "2025-04-01"
+const API_VERSION = "2025-10-01"
 
 export const copilotBaseUrl = (state: State) =>
   state.accountType === "individual" ?
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
@@ -1,3 +1,6 @@
+import type { Model } from "~/services/copilot/get-models"
+
+import { state } from "~/lib/state"
 import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
@@ -29,11 +32,15 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils"
 export function translateToOpenAI(
   payload: AnthropicMessagesPayload,
 ): ChatCompletionsPayload {
+  const modelId = translateModelName(payload.model)
+  const model = state.models?.data.find((m) => m.id === modelId)
+  const thinkingBudget = getThinkingBudget(payload, model)
   return {
-    model: translateModelName(payload.model),
+    model: modelId,
     messages: translateAnthropicMessagesToOpenAI(
       payload.messages,
       payload.system,
+      modelId,
     ),
     max_tokens: payload.max_tokens,
     stop: payload.stop_sequences,
@@ -43,14 +50,36 @@ export function translateToOpenAI(
     user: payload.metadata?.user_id,
     tools: translateAnthropicToolsToOpenAI(payload.tools),
     tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
+    thinking_budget: thinkingBudget,
   }
 }
 
+function getThinkingBudget(
+  payload: AnthropicMessagesPayload,
+  model: Model | undefined,
+): number | undefined {
+  const thinking = payload.thinking
+  if (model && thinking) {
+    const maxThinkingBudget = Math.min(
+      model.capabilities.supports.max_thinking_budget ?? 0,
+      (model.capabilities.limits.max_output_tokens ?? 0) - 1,
+    )
+    if (maxThinkingBudget > 0 && thinking.budget_tokens !== undefined) {
+      const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget)
+      return Math.max(
+        budgetTokens,
+        model.capabilities.supports.min_thinking_budget ?? 1024,
+      )
+    }
+  }
+  return undefined
+}
+
 function translateModelName(model: string): string {
   // Subagent requests use a specific model number which Copilot doesn't support
   if (model.startsWith("claude-sonnet-4-")) {
     return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
-  } else if (model.startsWith("claude-opus-")) {
+  } else if (model.startsWith("claude-opus-4-")) {
     return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
   }
   return model
@@ -59,13 +88,14 @@ function translateModelName(model: string): string {
 function translateAnthropicMessagesToOpenAI(
   anthropicMessages: Array<AnthropicMessage>,
   system: string | Array<AnthropicTextBlock> | undefined,
+  modelId: string,
 ): Array<Message> {
   const systemMessages = handleSystemPrompt(system)
 
   const otherMessages = anthropicMessages.flatMap((message) =>
     message.role === "user" ?
       handleUserMessage(message)
-    : handleAssistantMessage(message),
+    : handleAssistantMessage(message, modelId),
   )
 
   return [...systemMessages, ...otherMessages]
@@ -125,6 +155,7 @@ function handleUserMessage(message: AnthropicUserMessage): Array<Message> {
 
 function handleAssistantMessage(
   message: AnthropicAssistantMessage,
+  modelId: string,
 ): Array<Message> {
   if (!Array.isArray(message.content)) {
     return [
@@ -139,14 +170,28 @@ function handleAssistantMessage(
     (block): block is AnthropicToolUseBlock => block.type === "tool_use",
   )
 
-  const thinkingBlocks = message.content.filter(
+  let thinkingBlocks = message.content.filter(
     (block): block is AnthropicThinkingBlock => block.type === "thinking",
   )
 
-  const allThinkingContent = thinkingBlocks
+  if (modelId.startsWith("claude")) {
+    thinkingBlocks = thinkingBlocks.filter(
+      (b) =>
+        b.thinking
+        && b.thinking.length > 0
+        && b.signature
+        && b.signature.length > 0
+        // gpt signature has @ in it, so filter those out for claude models
+        && !b.signature.includes("@"),
+    )
+  }
+
+  const thinkingContents = thinkingBlocks
     .filter((b) => b.thinking && b.thinking.length > 0)
     .map((b) => b.thinking)
-    .join("\n\n")
+
+  const allThinkingContent =
+    thinkingContents.length > 0 ? thinkingContents.join("\n\n") : undefined
 
   const signature = thinkingBlocks.find(
     (b) => b.signature && b.signature.length > 0,
@@ -281,13 +326,13 @@ export function translateToAnthropic(
   // Process all choices to extract text and tool use blocks
   for (const choice of response.choices) {
     const textBlocks = getAnthropicTextBlocks(choice.message.content)
-    const thingBlocks = getAnthropicThinkBlocks(
+    const thinkBlocks = getAnthropicThinkBlocks(
       choice.message.reasoning_text,
       choice.message.reasoning_opaque,
     )
     const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
 
-    assistantContentBlocks.push(...thingBlocks, ...textBlocks, ...toolUseBlocks)
+    assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks)
 
     // Use the finish_reason from the first choice, or prioritize tool_calls
     if (choice.finish_reason === "tool_calls" || stopReason === "stop") {
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
@@ -212,6 +212,30 @@ function handleContent(
       },
     })
   }
+
+  // handle for claude model
+  if (
+    delta.content === ""
+    && delta.reasoning_opaque
+    && delta.reasoning_opaque.length > 0
+  ) {
+    events.push(
+      {
+        type: "content_block_delta",
+        index: state.contentBlockIndex,
+        delta: {
+          type: "signature_delta",
+          signature: delta.reasoning_opaque,
+        },
+      },
+      {
+        type: "content_block_stop",
+        index: state.contentBlockIndex,
+      },
+    )
+    state.contentBlockIndex++
+    state.thinkingBlockOpen = false
+  }
 }
 
 function handleMessageStart(
@@ -313,25 +337,6 @@ function handleThinkingText(
         thinking: delta.reasoning_text,
       },
     })
-
-    if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) {
-      events.push(
-        {
-          type: "content_block_delta",
-          index: state.contentBlockIndex,
-          delta: {
-            type: "signature_delta",
-            signature: delta.reasoning_opaque,
-          },
-        },
-        {
-          type: "content_block_stop",
-          index: state.contentBlockIndex,
-        },
-      )
-      state.contentBlockIndex++
-      state.thinkingBlockOpen = false
-    }
   }
 }
 
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
@@ -152,6 +152,7 @@ export interface ChatCompletionsPayload {
     | { type: "function"; function: { name: string } }
     | null
   user?: string | null
+  thinking_budget?: number
 }
 
 export interface Tool {
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
@@ -25,6 +25,8 @@ interface ModelLimits {
 }
 
 interface ModelSupports {
+  max_thinking_budget?: number
+  min_thinking_budget?: number
   tool_calls?: boolean
   parallel_tool_calls?: boolean
   dimensions?: boolean
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
@@ -1,4 +1,4 @@
-const FALLBACK = "1.104.3"
+const FALLBACK = "1.107.0"
 
 export async function getVSCodeVersion() {
   const controller = new AbortController()

Original file line number	Diff line number	Diff line change
`@@ -152,6 +152,7 @@ export interface ChatCompletionsPayload {`
`152`	`152`	`\| { type: "function"; function: { name: string } }`
`153`	`153`	`\| null`
`154`	`154`	`user?: string \| null`
	`155`	`+ thinking_budget?: number`
`155`	`156`	`}`
`156`	`157`
`157`	`158`	`export interface Tool {`
Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,8 @@ interface ModelLimits {`
`25`	`25`	`}`
`26`	`26`
`27`	`27`	`interface ModelSupports {`
	`28`	`+ max_thinking_budget?: number`
	`29`	`+ min_thinking_budget?: number`
`28`	`30`	`tool_calls?: boolean`
`29`	`31`	`parallel_tool_calls?: boolean`
`30`	`32`	`dimensions?: boolean`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-const FALLBACK = "1.104.3"`
	`1`	`+const FALLBACK = "1.107.0"`
`2`	`2`
`3`	`3`	`export async function getVSCodeVersion() {`
`4`	`4`	`const controller = new AbortController()`