Remove maxContextWindowTokens and refine token limit docs

MackinnonBuck · Copilot · MackinnonBuck · commit d0d283c903db · 2026-03-31T13:11:11.000-07:00
Remove maxContextWindowTokens from all SDKs - it is an internal runtime
fallback that should not be exposed as public SDK API.

Refine doc comments for maxOutputTokens and maxPromptTokens to explain
what happens when each limit is hit:
- maxOutputTokens: sent as max_tokens per LLM request; model stops
  generating and returns a truncated response when hit.
- maxPromptTokens: used by the runtime to trigger conversation
  compaction before sending a request when the prompt exceeds this limit.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs
@@ -1117,26 +1117,21 @@ public class ProviderConfig
     public AzureOptions? Azure { get; set; }
 
     /// <summary>
-    /// Overrides the maximum number of output tokens the model can generate.
-    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+    /// Maximum number of tokens the model can generate in a single response.
+    /// Sent as <c>max_tokens</c> per LLM API request. When hit, the model stops
+    /// generating and returns a truncated response.
     /// </summary>
     [JsonPropertyName("maxOutputTokens")]
     public int? MaxOutputTokens { get; set; }
 
     /// <summary>
-    /// Overrides the maximum number of prompt/input tokens.
-    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+    /// Maximum number of tokens allowed in the prompt for a single LLM API request.
+    /// Used by the runtime to trigger conversation compaction before sending a request
+    /// when the prompt (system message, history, tool definitions, user message) exceeds this limit.
     /// </summary>
     [JsonPropertyName("maxPromptTokens")]
     public int? MaxPromptTokens { get; set; }
 
-    /// <summary>
-    /// Overrides the maximum context window size in tokens.
-    /// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
-    /// </summary>
-    [JsonPropertyName("maxContextWindowTokens")]
-    public int? MaxContextWindowTokens { get; set; }
-
     /// <summary>
     /// Specifies the model ID used to look up default token limits from the capability catalog.
     /// When unset, the session's configured model ID (see <see cref="SessionConfig.Model"/>) is used.
diff --git a/go/types.go b/go/types.go
@@ -601,15 +601,14 @@ type ProviderConfig struct {
 	BearerToken string `json:"bearerToken,omitempty"`
 	// Azure contains Azure-specific options
 	Azure *AzureProviderOptions `json:"azure,omitempty"`
-	// MaxOutputTokens overrides the maximum number of output tokens the model can generate.
-	// When set, takes precedence over the default limit from the model's capability catalog entry.
+	// MaxOutputTokens is the maximum number of tokens the model can generate in a single response.
+	// Sent as max_tokens per LLM API request. When hit, the model stops generating and returns
+	// a truncated response.
 	MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
-	// MaxPromptTokens overrides the maximum number of prompt/input tokens.
-	// When set, takes precedence over the default limit from the model's capability catalog entry.
+	// MaxPromptTokens is the maximum number of tokens allowed in the prompt for a single LLM API
+	// request. Used by the runtime to trigger conversation compaction before sending a request
+	// when the prompt (system message, history, tool definitions, user message) exceeds this limit.
 	MaxPromptTokens int `json:"maxPromptTokens,omitempty"`
-	// MaxContextWindowTokens overrides the maximum context window size in tokens.
-	// When set, takes precedence over the default limit from the model's capability catalog entry.
-	MaxContextWindowTokens int `json:"maxContextWindowTokens,omitempty"`
 	// ModelLimitsID specifies the model ID used to look up default token limits from the capability catalog.
 	// When unset, the session's configured model ID is used.
 	// Useful for fine-tuned models that share the same limits as a base model.
diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts
@@ -1262,23 +1262,19 @@ export interface ProviderConfig {
     };
 
     /**
-     * Overrides the maximum number of output tokens the model can generate.
-     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+     * Maximum number of tokens the model can generate in a single response.
+     * Sent as {@link https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens max_tokens} per LLM API request.
+     * When hit, the model stops generating and returns a truncated response.
      */
     maxOutputTokens?: number;
 
     /**
-     * Overrides the maximum number of prompt/input tokens.
-     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
+     * Maximum number of tokens allowed in the prompt for a single LLM API request.
+     * Used by the runtime to trigger conversation compaction before sending a request
+     * when the prompt (system message, history, tool definitions, user message) exceeds this limit.
      */
     maxPromptTokens?: number;
 
-    /**
-     * Overrides the maximum context window size in tokens.
-     * When set, takes precedence over the default limit resolved from the model's capability catalog entry.
-     */
-    maxContextWindowTokens?: number;
-
     /**
      * Specifies the model ID used to look up default token limits from the capability catalog.
      * When unset, the session's configured model ID is used.
diff --git a/python/copilot/client.py b/python/copilot/client.py
@@ -1966,8 +1966,6 @@ def _convert_provider_to_wire_format(
             wire_provider["maxOutputTokens"] = provider["max_output_tokens"]
         if "max_prompt_tokens" in provider:
             wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"]
-        if "max_context_window_tokens" in provider:
-            wire_provider["maxContextWindowTokens"] = provider["max_context_window_tokens"]
         if "model_limits_id" in provider:
             wire_provider["modelLimitsId"] = provider["model_limits_id"]
         return wire_provider
diff --git a/python/copilot/session.py b/python/copilot/session.py
@@ -507,15 +507,15 @@ class ProviderConfig(TypedDict, total=False):
     # Takes precedence over api_key when both are set.
     bearer_token: str
     azure: AzureProviderOptions  # Azure-specific options
-    # Overrides the maximum number of output tokens the model can generate.
-    # Takes precedence over the default limit from the model's capability catalog entry.
+    # Maximum number of tokens the model can generate in a single response.
+    # Sent as max_tokens per LLM API request. When hit, the model stops
+    # generating and returns a truncated response.
     max_output_tokens: int
-    # Overrides the maximum number of prompt/input tokens.
-    # Takes precedence over the default limit from the model's capability catalog entry.
+    # Maximum number of tokens allowed in the prompt for a single LLM API request.
+    # Used by the runtime to trigger conversation compaction before sending a
+    # request when the prompt (system message, history, tool definitions, user
+    # message) exceeds this limit.
     max_prompt_tokens: int
-    # Overrides the maximum context window size in tokens.
-    # Takes precedence over the default limit from the model's capability catalog entry.
-    max_context_window_tokens: int
     # Model ID used to look up default token limits from the capability catalog.
     # When unset, the session's configured model ID is used.
     # Useful for fine-tuned models that share the same limits as a base model.