Skip to content

Commit d0d283c

Browse files
Remove maxContextWindowTokens and refine token limit docs
Remove maxContextWindowTokens from all SDKs - it is an internal runtime fallback that should not be exposed as public SDK API. Refine doc comments for maxOutputTokens and maxPromptTokens to explain what happens when each limit is hit: - maxOutputTokens: sent as max_tokens per LLM request; model stops generating and returns a truncated response when hit. - maxPromptTokens: used by the runtime to trigger conversation compaction before sending a request when the prompt exceeds this limit. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 7bafc5f commit d0d283c

5 files changed

Lines changed: 25 additions & 37 deletions

File tree

dotnet/src/Types.cs

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,26 +1117,21 @@ public class ProviderConfig
11171117
public AzureOptions? Azure { get; set; }
11181118

11191119
/// <summary>
1120-
/// Overrides the maximum number of output tokens the model can generate.
1121-
/// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
1120+
/// Maximum number of tokens the model can generate in a single response.
1121+
/// Sent as <c>max_tokens</c> per LLM API request. When hit, the model stops
1122+
/// generating and returns a truncated response.
11221123
/// </summary>
11231124
[JsonPropertyName("maxOutputTokens")]
11241125
public int? MaxOutputTokens { get; set; }
11251126

11261127
/// <summary>
1127-
/// Overrides the maximum number of prompt/input tokens.
1128-
/// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
1128+
/// Maximum number of tokens allowed in the prompt for a single LLM API request.
1129+
/// Used by the runtime to trigger conversation compaction before sending a request
1130+
/// when the prompt (system message, history, tool definitions, user message) exceeds this limit.
11291131
/// </summary>
11301132
[JsonPropertyName("maxPromptTokens")]
11311133
public int? MaxPromptTokens { get; set; }
11321134

1133-
/// <summary>
1134-
/// Overrides the maximum context window size in tokens.
1135-
/// When set, takes precedence over the default limit resolved from the model's capability catalog entry.
1136-
/// </summary>
1137-
[JsonPropertyName("maxContextWindowTokens")]
1138-
public int? MaxContextWindowTokens { get; set; }
1139-
11401135
/// <summary>
11411136
/// Specifies the model ID used to look up default token limits from the capability catalog.
11421137
/// When unset, the session's configured model ID (see <see cref="SessionConfig.Model"/>) is used.

go/types.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -601,15 +601,14 @@ type ProviderConfig struct {
601601
BearerToken string `json:"bearerToken,omitempty"`
602602
// Azure contains Azure-specific options
603603
Azure *AzureProviderOptions `json:"azure,omitempty"`
604-
// MaxOutputTokens overrides the maximum number of output tokens the model can generate.
605-
// When set, takes precedence over the default limit from the model's capability catalog entry.
604+
// MaxOutputTokens is the maximum number of tokens the model can generate in a single response.
605+
// Sent as max_tokens per LLM API request. When hit, the model stops generating and returns
606+
// a truncated response.
606607
MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
607-
// MaxPromptTokens overrides the maximum number of prompt/input tokens.
608-
// When set, takes precedence over the default limit from the model's capability catalog entry.
608+
// MaxPromptTokens is the maximum number of tokens allowed in the prompt for a single LLM API
609+
// request. Used by the runtime to trigger conversation compaction before sending a request
610+
// when the prompt (system message, history, tool definitions, user message) exceeds this limit.
609611
MaxPromptTokens int `json:"maxPromptTokens,omitempty"`
610-
// MaxContextWindowTokens overrides the maximum context window size in tokens.
611-
// When set, takes precedence over the default limit from the model's capability catalog entry.
612-
MaxContextWindowTokens int `json:"maxContextWindowTokens,omitempty"`
613612
// ModelLimitsID specifies the model ID used to look up default token limits from the capability catalog.
614613
// When unset, the session's configured model ID is used.
615614
// Useful for fine-tuned models that share the same limits as a base model.

nodejs/src/types.ts

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,23 +1262,19 @@ export interface ProviderConfig {
12621262
};
12631263

12641264
/**
1265-
* Overrides the maximum number of output tokens the model can generate.
1266-
* When set, takes precedence over the default limit resolved from the model's capability catalog entry.
1265+
* Maximum number of tokens the model can generate in a single response.
1266+
* Sent as {@link https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens max_tokens} per LLM API request.
1267+
* When hit, the model stops generating and returns a truncated response.
12671268
*/
12681269
maxOutputTokens?: number;
12691270

12701271
/**
1271-
* Overrides the maximum number of prompt/input tokens.
1272-
* When set, takes precedence over the default limit resolved from the model's capability catalog entry.
1272+
* Maximum number of tokens allowed in the prompt for a single LLM API request.
1273+
* Used by the runtime to trigger conversation compaction before sending a request
1274+
* when the prompt (system message, history, tool definitions, user message) exceeds this limit.
12731275
*/
12741276
maxPromptTokens?: number;
12751277

1276-
/**
1277-
* Overrides the maximum context window size in tokens.
1278-
* When set, takes precedence over the default limit resolved from the model's capability catalog entry.
1279-
*/
1280-
maxContextWindowTokens?: number;
1281-
12821278
/**
12831279
* Specifies the model ID used to look up default token limits from the capability catalog.
12841280
* When unset, the session's configured model ID is used.

python/copilot/client.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,8 +1966,6 @@ def _convert_provider_to_wire_format(
19661966
wire_provider["maxOutputTokens"] = provider["max_output_tokens"]
19671967
if "max_prompt_tokens" in provider:
19681968
wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"]
1969-
if "max_context_window_tokens" in provider:
1970-
wire_provider["maxContextWindowTokens"] = provider["max_context_window_tokens"]
19711969
if "model_limits_id" in provider:
19721970
wire_provider["modelLimitsId"] = provider["model_limits_id"]
19731971
return wire_provider

python/copilot/session.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -507,15 +507,15 @@ class ProviderConfig(TypedDict, total=False):
507507
# Takes precedence over api_key when both are set.
508508
bearer_token: str
509509
azure: AzureProviderOptions # Azure-specific options
510-
# Overrides the maximum number of output tokens the model can generate.
511-
# Takes precedence over the default limit from the model's capability catalog entry.
510+
# Maximum number of tokens the model can generate in a single response.
511+
# Sent as max_tokens per LLM API request. When hit, the model stops
512+
# generating and returns a truncated response.
512513
max_output_tokens: int
513-
# Overrides the maximum number of prompt/input tokens.
514-
# Takes precedence over the default limit from the model's capability catalog entry.
514+
# Maximum number of tokens allowed in the prompt for a single LLM API request.
515+
# Used by the runtime to trigger conversation compaction before sending a
516+
# request when the prompt (system message, history, tool definitions, user
517+
# message) exceeds this limit.
515518
max_prompt_tokens: int
516-
# Overrides the maximum context window size in tokens.
517-
# Takes precedence over the default limit from the model's capability catalog entry.
518-
max_context_window_tokens: int
519519
# Model ID used to look up default token limits from the capability catalog.
520520
# When unset, the session's configured model ID is used.
521521
# Useful for fine-tuned models that share the same limits as a base model.

0 commit comments

Comments
 (0)