fix(copilot): lower static Claude context limits and expose them to Claude Code

kunish · kunish · commit 578c31266063 · 2026-04-08T17:02:53.000+08:00
The Copilot API enforces per-account prompt token limits (128K individual,
168K business) that are lower than the total context window (200K). When
the dynamic /models API fetch fails or returns no capabilities.limits,
the static fallback of 200K exceeds the real enforced limit, causing
intermittent "prompt token count exceeds the limit" errors.

Two complementary fixes:

1. Lower static Copilot Claude model ContextLength from 200000 to 128000
   (the conservative default matching defaultCopilotContextLength). Dynamic
   API limits override this when available.

2. Add context_length and max_completion_tokens to Claude-format model
   responses so Claude Code CLI can learn the actual Copilot limit instead
   of relying on its built-in 1M context configuration.
diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
@@ -323,6 +323,13 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 	return nil
 }
 
+// defaultCopilotClaudeContextLength is the conservative prompt token limit for
+// Claude models accessed via the GitHub Copilot API. Individual accounts are
+// capped at 128K; business accounts at 168K. When the dynamic /models API fetch
+// succeeds, the real per-account limit overrides this value. This constant is
+// only used as a safe fallback.
+const defaultCopilotClaudeContextLength = 128000
+
 // GetGitHubCopilotModels returns the available models for GitHub Copilot.
 // These models are available through the GitHub Copilot API at api.githubcopilot.com.
 func GetGitHubCopilotModels() []*ModelInfo {
@@ -534,7 +541,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Claude Haiku 4.5",
 			Description:         "Anthropic Claude Haiku 4.5 via GitHub Copilot",
-			ContextLength:       200000,
+			ContextLength:       defaultCopilotClaudeContextLength,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
 		},
@@ -546,7 +553,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Claude Opus 4.1",
 			Description:         "Anthropic Claude Opus 4.1 via GitHub Copilot",
-			ContextLength:       200000,
+			ContextLength:       defaultCopilotClaudeContextLength,
 			MaxCompletionTokens: 32000,
 			SupportedEndpoints:  []string{"/chat/completions"},
 		},
@@ -558,7 +565,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Claude Opus 4.5",
 			Description:         "Anthropic Claude Opus 4.5 via GitHub Copilot",
-			ContextLength:       200000,
+			ContextLength:       defaultCopilotClaudeContextLength,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
@@ -571,7 +578,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Claude Opus 4.6",
 			Description:         "Anthropic Claude Opus 4.6 via GitHub Copilot",
-			ContextLength:       200000,
+			ContextLength:       defaultCopilotClaudeContextLength,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
@@ -584,7 +591,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Claude Sonnet 4",
 			Description:         "Anthropic Claude Sonnet 4 via GitHub Copilot",
-			ContextLength:       200000,
+			ContextLength:       defaultCopilotClaudeContextLength,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
@@ -597,7 +604,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Claude Sonnet 4.5",
 			Description:         "Anthropic Claude Sonnet 4.5 via GitHub Copilot",
-			ContextLength:       200000,
+			ContextLength:       defaultCopilotClaudeContextLength,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
@@ -610,7 +617,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Type:                "github-copilot",
 			DisplayName:         "Claude Sonnet 4.6",
 			Description:         "Anthropic Claude Sonnet 4.6 via GitHub Copilot",
-			ContextLength:       200000,
+			ContextLength:       defaultCopilotClaudeContextLength,
 			MaxCompletionTokens: 64000,
 			SupportedEndpoints:  []string{"/chat/completions"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go
@@ -1177,6 +1177,16 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 				"dynamic_allowed": model.Thinking.DynamicAllowed,
 			}
 		}
+		// Include context limits so Claude Code can manage conversation
+		// context correctly, especially for Copilot-proxied models whose
+		// real prompt limit (128K-168K) is much lower than the 1M window
+		// that Claude Code may assume for Opus 4.6 with 1M context enabled.
+		if model.ContextLength > 0 {
+			result["context_length"] = model.ContextLength
+		}
+		if model.MaxCompletionTokens > 0 {
+			result["max_completion_tokens"] = model.MaxCompletionTokens
+		}
 		return result
 
 	case "gemini":