-
-
Notifications
You must be signed in to change notification settings - Fork 638
feature about chat completions reasoning, support gemini-3-pro thinking and support claude model enable thinking and interleaved thinking #163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
29668ce
a2467d3
58f7a45
3fa5519
dfb40d2
7657d87
7f8187b
cbe12eb
ebcacb2
0d6f7aa
dcafbe1
5175245
dd80c8d
e45c6db
0afccfa
8191930
67b357a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,3 +1,6 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| import type { Model } from "~/services/copilot/get-models" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| import { state } from "~/lib/state" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| import { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type ChatCompletionResponse, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type ChatCompletionsPayload, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -11,7 +14,6 @@ import { | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| import { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type AnthropicAssistantContentBlock, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type AnthropicAssistantMessage, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type AnthropicMessage, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type AnthropicMessagesPayload, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type AnthropicResponse, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type AnthropicTextBlock, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -29,11 +31,15 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils" | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| export function translateToOpenAI( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| payload: AnthropicMessagesPayload, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ): ChatCompletionsPayload { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| const modelId = translateModelName(payload.model) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| const model = state.models?.data.find((m) => m.id === modelId) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| const thinkingBudget = getThinkingBudget(payload, model) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| return { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| model: translateModelName(payload.model), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| model: modelId, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| messages: translateAnthropicMessagesToOpenAI( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| payload.messages, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| payload.system, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| payload, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| modelId, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| thinkingBudget, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| max_tokens: payload.max_tokens, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| stop: payload.stop_sequences, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -43,45 +49,96 @@ export function translateToOpenAI( | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| user: payload.metadata?.user_id, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tools: translateAnthropicToolsToOpenAI(payload.tools), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| thinking_budget: thinkingBudget, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| function getThinkingBudget( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| payload: AnthropicMessagesPayload, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| model: Model | undefined, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ): number | undefined { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| const thinking = payload.thinking | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (model && thinking) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| const maxThinkingBudget = Math.min( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| model.capabilities.supports.max_thinking_budget ?? 0, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| (model.capabilities.limits.max_output_tokens ?? 0) - 1, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+62
to
+64
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| const maxThinkingBudget = Math.min( | |
| model.capabilities.supports.max_thinking_budget ?? 0, | |
| (model.capabilities.limits.max_output_tokens ?? 0) - 1, | |
| const maxTokensLimit = | |
| model.capabilities.limits.max_output_tokens && | |
| model.capabilities.limits.max_output_tokens > 1 | |
| ? model.capabilities.limits.max_output_tokens - 1 | |
| : 0 | |
| const maxThinkingBudget = Math.min( | |
| model.capabilities.supports.max_thinking_budget ?? 0, | |
| maxTokensLimit, |
Copilot
AI
Dec 31, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The thinking budget calculation could return values that don't satisfy the minimum requirement. When thinking.budget_tokens is less than min_thinking_budget, the Math.max ensures the minimum is met. However, this could exceed maxThinkingBudget if the minimum is larger than the maximum. Consider validating that min_thinking_budget <= maxThinkingBudget before the calculation, or returning undefined if the constraints cannot be satisfied.
| const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget) | |
| return Math.max( | |
| budgetTokens, | |
| model.capabilities.supports.min_thinking_budget ?? 1024, | |
| ) | |
| const minThinkingBudget = | |
| model.capabilities.supports.min_thinking_budget ?? 1024 | |
| // If the minimum required budget exceeds the maximum allowed, the | |
| // constraints cannot be satisfied; fall back to no thinking budget. | |
| if (minThinkingBudget > maxThinkingBudget) { | |
| return undefined | |
| } | |
| const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget) | |
| return Math.max(budgetTokens, minThinkingBudget) |
Copilot
AI
Dec 31, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new thinking budget calculation logic (getThinkingBudget function) and interleaved thinking prompt injection for Claude models lack test coverage. These are significant new features that handle complex logic including min/max budget constraints and model-specific behavior. Consider adding tests that verify: 1) budget calculation with various model capabilities, 2) the system prompt injection for Claude models with thinking enabled, 3) the system-reminder message insertion.
Copilot
AI
Dec 31, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The model name translation for "claude-opus-4-" now requires the prefix to be exactly "claude-opus-4-" (with the trailing dash), while the previous version matched "claude-opus-" (without the "4"). This change appears intentional but may break compatibility with "claude-opus-3-" or other opus model variants that were previously supported. Please verify this is the intended behavior or restore support for all opus model versions.
| } else if (model.startsWith("claude-opus-4-")) { | |
| return model.replace(/^claude-opus-4-.*/, "claude-opus-4") | |
| } else if (model.startsWith("claude-opus-")) { | |
| return model.replace(/^(claude-opus-\d+).*/, "$1") |
Copilot
AI
Dec 31, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The system-reminder message is placed immediately after system messages and before all other messages. This placement may break the expected message order when there are existing user/assistant message exchanges. The reminder should ideally be placed at the end of the messages array to avoid disrupting the conversation flow, or inserted more strategically based on the context. Consider moving it to the end or documenting why this specific placement is required.
| return [...systemMessages, thinkingMessage, ...otherMessages] | |
| return [...systemMessages, ...otherMessages, thinkingMessage] |
Copilot
AI
Jan 4, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The system prompt injection logic (lines 102-117 and 131-143) only activates when thinkingBudget is truthy. However, getThinkingBudget returns undefined in several cases: when model is not found, when payload.thinking is not provided, when thinking.budget_tokens is undefined, or when maxThinkingBudget is 0 or negative. This means the interleaved thinking protocol instructions won't be injected unless all these conditions are met. Consider whether the protocol instructions should be injected whenever payload.thinking exists, regardless of budget calculation success, or document this behavior clearly so users understand when thinking protocol is enabled.
Copilot
AI
Jan 4, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new thinking budget calculation logic (lines 56-75) and system prompt injection logic (lines 102-117, 131-143) lack test coverage. These are critical features that manipulate model behavior and user inputs. Consider adding tests that verify: 1) thinking budget is correctly calculated when thinking.budget_tokens is provided, 2) thinking budget respects min/max boundaries from model capabilities, 3) system prompt injection happens only for Claude models with thinking budget, 4) the interleaved thinking protocol reminder is correctly prepended to the first user message.
Copilot
AI
Jan 4, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The thinking blocks filtering logic for Claude models includes a check for signatures that contain "@" (line 226) with the comment "gpt signature has @ in it, so filter those out for claude models". However, this heuristic approach is brittle - there's no guarantee that all GPT signatures will contain "@" or that Claude signatures will never contain "@". Consider using a more robust approach, such as checking the model ID of the original message source or using a dedicated signature format field to distinguish between model types.
| && b.signature.length > 0 | |
| // gpt signature has @ in it, so filter those out for claude models | |
| && !b.signature.includes("@"), | |
| && b.signature.length > 0, |
Copilot
AI
Dec 31, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The thinking blocks are filtered twice - once on line 200-202 to extract all thinking blocks, then again on lines 216-218 to filter those with non-empty thinking content. This redundant filtering is inefficient. Consider combining these filters or restructuring the logic to avoid processing the same blocks multiple times.
Copilot
AI
Jan 4, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The filtering on line 231 checks if b.thinking && b.thinking.length > 0, which is redundant for Claude models because the same check was already done on lines 221-222. While this doesn't cause incorrect behavior, it adds unnecessary processing. Consider restructuring to avoid double filtering - for example, apply the thinking content filter before the Claude-specific signature filter, or ensure thinking blocks always have valid thinking content when they're created.
| if (modelId.startsWith("claude")) { | |
| thinkingBlocks = thinkingBlocks.filter( | |
| (b) => | |
| b.thinking | |
| && b.thinking.length > 0 | |
| && b.signature | |
| && b.signature.length > 0 | |
| // gpt signature has @ in it, so filter those out for claude models | |
| && !b.signature.includes("@"), | |
| ) | |
| } | |
| const thinkingContents = thinkingBlocks | |
| .filter((b) => b.thinking && b.thinking.length > 0) | |
| .map((b) => b.thinking) | |
| // First, ensure all thinking blocks have non-empty thinking content | |
| thinkingBlocks = thinkingBlocks.filter( | |
| (b) => b.thinking && b.thinking.length > 0, | |
| ) | |
| if (modelId.startsWith("claude")) { | |
| thinkingBlocks = thinkingBlocks.filter( | |
| (b) => | |
| b.signature | |
| && b.signature.length > 0 | |
| // gpt signature has @ in it, so filter those out for claude models | |
| && !b.signature.includes("@"), | |
| ) | |
| } | |
| const thinkingContents = thinkingBlocks.map((b) => b.thinking) |
Copilot
AI
Dec 31, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The thinking blocks are placed before text blocks in the response (line 362), which means the thinking content will always appear first in the response regardless of its original position. This may not accurately represent the interleaved thinking flow if text was generated before thinking or if there were multiple rounds of thinking and text. Consider tracking the original order of blocks or documenting why thinking blocks must always come first in the response.
| assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks) | |
| assistantContentBlocks.push(...textBlocks, ...thinkBlocks, ...toolUseBlocks) |
Copilot
AI
Jan 4, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The signature field in AnthropicThinkingBlock is now required (line 59 in anthropic-types.ts), but when reasoningOpaque is not provided, it defaults to an empty string (line 431). This is a breaking API change that could affect API consumers. Consider: 1) making the signature field optional to maintain backwards compatibility, 2) documenting that signature can be an empty string and what that means semantically, or 3) only including thinking blocks when both thinking and signature are non-empty to avoid exposing incomplete thinking blocks.
| if (reasoningText && reasoningText.length > 0) { | |
| return [ | |
| { | |
| type: "thinking", | |
| thinking: reasoningText, | |
| signature: reasoningOpaque || "", | |
| if ( | |
| reasoningText && | |
| reasoningText.length > 0 && | |
| reasoningOpaque && | |
| reasoningOpaque.length > 0 | |
| ) { | |
| return [ | |
| { | |
| type: "thinking", | |
| thinking: reasoningText, | |
| signature: reasoningOpaque, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The refactored copilotBaseUrl function now unconditionally uses the template
https://api.${state.accountType}.githubcopilot.com, which will result inhttps://api.individual.githubcopilot.comfor individual accounts. The previous implementation usedhttps://api.githubcopilot.com(without the subdomain) for individual accounts. This is a breaking change that may cause API requests to fail for individual account users. Please verify that the GitHub Copilot API supports the new URL format for individual accounts, or restore the conditional logic.