feat(model): 实现基于上下文窗口的动态隐式输出预留逻辑

jqknono · jqknono · commit e832b5b30d15 · 2026-04-16T14:52:08.000+08:00
- 引入根据总上下文 20% 动态推导预留输出的逻辑，并将其收敛在 4096 到 30000 token 之间。
- 更新配置存储与 Provider 核心逻辑以支持该动态预留，并同步修订了多语言文档与配置元数据。
- 增加针对不同上下文规模的测试用例，并优化了 Commit 消息生成器的提示词指令。
diff --git a/DEV.md b/DEV.md
@@ -161,7 +161,8 @@ npm run test:pages
   - `anthropic`：请求 `baseUrl + /messages`
 - `coding-plans.vendors[].usageUrl` 为可选套餐 usage 接口；当前默认按 `Authorization: Bearer <API Key>` 轮询，并将识别到的小时额度、周额度或次数额度以百分比显示在状态栏。
 - `coding-plans.vendors[].models[].contextSize` 现在是描述模型上下文的首选字段。
-- `coding-plans.advanced.defaultReservedOutput` 的默认值为 `60000`，用于全局输出预算；发送请求时会自动按模型上限收敛。
+- 未显式设置 `maxOutputTokens` 时，运行时会按总上下文动态推导隐式输出预留：`min(30000, max(4096, floor(totalContextWindow * 0.2)))`；极小上下文窗口会再按总窗口安全收敛。
+- `coding-plans.advanced.defaultReservedOutput` 的默认值为 `60000`，用于请求侧输出预算覆盖；发送请求时会自动按模型上限收敛，不改变模型隐式默认输出预留的推导公式。
 - `coding-plans.vendors[].models[].maxInputTokens` / `maxOutputTokens` 已标记为 deprecated，保留兼容旧配置与特殊覆盖用途。两者仍允许配置为 `0`。其中 `maxInputTokens: 0` 的语义为”未设置”；`maxOutputTokens` 默认值就是 `0`，表示”未设置”；在 `openai-chat` / `openai-responses` 下不主动下发 `max_tokens` / `max_output_tokens`，但当上游协议端点强制要求 `max_tokens` 时需自动补发兼容值。`maxInputTokens` 仍仅用于本地元数据和预算，不直接传给 API。自动刷新/写回 `vendors` 配置时不再默认补入这两个字段；只有用户显式配置的现有模型项会被原样保留。
 - 新增采样参数：
   - `coding-plans.vendors[].defaultTemperature` / `defaultTopP`：供应商默认采样值
diff --git a/README.md b/README.md
@@ -159,10 +159,10 @@ code --install-extension techfetch-dev.coding-plans-for-copilot
 | `coding-plans.vendors[].models[].temperature` | `number` | 继承供应商 | 模型级 temperature 覆盖。 |
 | `coding-plans.vendors[].models[].topP` | `number` | 继承供应商 | 模型级 topP 覆盖。 |
 | `coding-plans.vendors[].models[].capabilities` | `object` | `{ tools: true, vision: false }` | 模型能力声明。 |
-| `coding-plans.vendors[].models[].contextSize` | `number` | 空 | 模型总上下文窗口。 |
+| `coding-plans.vendors[].models[].contextSize` | `number` | 空 | 模型总上下文窗口；未显式设置 `maxOutputTokens` 时，运行时会基于它动态推导隐式输出预留。 |
 | `coding-plans.vendors[].models[].maxInputTokens` | `number` | 空 | 已废弃，建议使用 `contextSize`。 |
-| `coding-plans.vendors[].models[].maxOutputTokens` | `number` | `0` | 已废弃，建议使用 `contextSize`。 |
-| `coding-plans.advanced.defaultReservedOutput` | `number` | `60000` | 全局默认输出 token 预算。 |
+| `coding-plans.vendors[].models[].maxOutputTokens` | `number` | `0` | 已废弃，建议使用 `contextSize`。`0` 表示未设置，此时运行时默认按总上下文的 `20%` 推导隐式输出预留，并收敛到 `4096-30000`。 |
+| `coding-plans.advanced.defaultReservedOutput` | `number` | `60000` | 请求侧默认输出 token 预算；仅作为发送请求时的预算覆盖值，最终仍会按模型输出上限收敛。 |
 | `coding-plans.commitMessage.showGenerateCommand` | `boolean` | `true` | 是否显示"生成 Commit 消息"命令。 |
 | `coding-plans.commitMessage.language` | `string` | `en` | 提交消息语言：`en` / `zh-cn`。 |
 | `coding-plans.commitMessage.useRecentCommitStyle` | `boolean` | `false` | 是否参考最近 20 条 commit 风格。 |
diff --git a/README_en.md b/README_en.md
@@ -159,10 +159,10 @@ To switch to OpenAI-compatible endpoints, modify the vendor's `baseUrl` and `def
 | `coding-plans.vendors[].models[].temperature` | `number` | Inherit from vendor | Model-level temperature override. |
 | `coding-plans.vendors[].models[].topP` | `number` | Inherit from vendor | Model-level topP override. |
 | `coding-plans.vendors[].models[].capabilities` | `object` | `{ tools: true, vision: false }` | Model capability declaration. |
-| `coding-plans.vendors[].models[].contextSize` | `number` | Empty | Model total context window. |
+| `coding-plans.vendors[].models[].contextSize` | `number` | Empty | Model total context window. When `maxOutputTokens` is unset, runtime derives the implicit reserved output budget from this total window. |
 | `coding-plans.vendors[].models[].maxInputTokens` | `number` | Empty | Deprecated,建议使用 `contextSize`. |
-| `coding-plans.vendors[].models[].maxOutputTokens` | `number` | `0` | Deprecated,建议使用 `contextSize`. |
-| `coding-plans.advanced.defaultReservedOutput` | `number` | `60000` | Global default output token budget. |
+| `coding-plans.vendors[].models[].maxOutputTokens` | `number` | `0` | Deprecated,建议使用 `contextSize`. `0` means unset; runtime then derives an implicit reserved output budget as 20% of total context, clamped to 4096-30000. |
+| `coding-plans.advanced.defaultReservedOutput` | `number` | `60000` | Request-side default output token budget. It only overrides request budgeting and is still capped by the model output limit. |
 | `coding-plans.commitMessage.showGenerateCommand` | `boolean` | `true` | Whether to show "Generate Commit Message" command. |
 | `coding-plans.commitMessage.language` | `string` | `en` | Commit message language: `en` / `zh-cn`. |
 | `coding-plans.commitMessage.useRecentCommitStyle` | `boolean` | `false` | Whether to reference the style of the last 20 commits. |
@@ -242,4 +242,4 @@ MIT License
 2. Create a feature branch (`git checkout -b feature/AmazingFeature`)
 3. Commit changes (`git commit -m 'Add some AmazingFeature'`)
 4. Push to the branch (`git push origin feature/AmazingFeature`)
-5. Open a Pull Request
+5. Open a Pull Request
diff --git a/package.json b/package.json
@@ -2,7 +2,7 @@
     "name": "coding-plans-for-copilot",
     "displayName": "%displayName%",
     "description": "%description%",
-    "version": "0.6.28",
+    "version": "0.7.0",
     "publisher": "techfetch-dev",
     "repository": {
         "type": "git",
@@ -315,7 +315,7 @@
                                         },
                                         "contextSize": {
                                             "type": "number",
-                                            "description": "Preferred total context window size for the model. Language model context display uses this value directly when provided."
+                                            "description": "Preferred total context window size for the model. Language model context display uses this value directly when provided. When maxOutputTokens is left unset, runtime derives an implicit reserved output budget from this total window."
                                         },
                                         "maxInputTokens": {
                                             "type": "number",
@@ -325,7 +325,7 @@
                                         "maxOutputTokens": {
                                             "type": "number",
                                             "default": 0,
-                                            "description": "Deprecated: legacy maximum output tokens override. Prefer contextSize for model context. When contextSize is provided and this value exceeds it, it will be capped to contextSize. Defaults to 0, which treats it as unset. For openai-chat/openai-responses it suppresses proactively sending output-token limits, while protocol endpoints that require max_tokens will trigger an automatic compatible retry.",
+                                            "description": "Deprecated: legacy maximum output tokens override. Prefer contextSize for model context. When contextSize is provided and this value exceeds it, it will be capped to contextSize. Defaults to 0, which treats it as unset; runtime then derives an implicit reserved output budget from total context (20%, clamped to 4096-30000). For openai-chat/openai-responses it suppresses proactively sending output-token limits, while protocol endpoints that require max_tokens will trigger an automatic compatible retry.",
                                             "deprecationMessage": "Deprecated: prefer contextSize to describe model context. Keep maxOutputTokens only when you need a legacy output-cap override."
                                         },
                                         "apiStyle": {
diff --git a/src/config/configStore.ts b/src/config/configStore.ts
@@ -5,8 +5,8 @@ import {
   DEFAULT_MODEL_CAPABILITIES_TOOLS,
   DEFAULT_MODEL_CAPABILITIES_VISION,
   DEFAULT_CONTEXT_WINDOW_SIZE,
-  DEFAULT_RESERVED_OUTPUT_TOKENS,
-  VENDOR_API_KEY_PREFIX
+  VENDOR_API_KEY_PREFIX,
+  resolveImplicitReservedOutputTokens
 } from '../constants';
 
 export type VendorApiStyle = 'openai-chat' | 'openai-responses' | 'anthropic';
@@ -520,7 +520,7 @@ export class ConfigStore implements vscode.Disposable {
   ): { maxInputTokens: number; maxOutputTokens: number } {
     const hasExplicitTotalContextWindow = legacyContextWindow !== undefined;
     const fallbackTotal = Math.max(2, Math.floor(legacyContextWindow ?? DEFAULT_CONTEXT_WINDOW_SIZE));
-    const defaultReservedOutputTokens = Math.max(1, Math.min(DEFAULT_RESERVED_OUTPUT_TOKENS, fallbackTotal - 1));
+    const defaultReservedOutputTokens = resolveImplicitReservedOutputTokens(fallbackTotal);
     const normalizeTokenValue = (value: number | undefined): number | undefined => {
       if (value === undefined) {
         return undefined;
diff --git a/src/constants.ts b/src/constants.ts
@@ -79,9 +79,24 @@ export const DEFAULT_TOKEN_SIDE_LIMIT = 200000;
 export const DEFAULT_CONTEXT_WINDOW_SIZE = DEFAULT_TOKEN_SIDE_LIMIT;
 export const DEFAULT_REQUEST_MAX_TOKENS = DEFAULT_TOKEN_SIDE_LIMIT;
 export const DEFAULT_RESERVED_OUTPUT_TOKENS = 30000;
+export const MIN_DYNAMIC_RESERVED_OUTPUT_TOKENS = 4096;
+export const DEFAULT_RESERVED_OUTPUT_RATIO = 0.2;
 export const DEFAULT_MODEL_CAPABILITIES_TOOLS = true;
 export const DEFAULT_MODEL_CAPABILITIES_VISION = false;
 
+export function resolveImplicitReservedOutputTokens(totalContextWindow: number): number {
+  const normalizedTotalContextWindow = Math.max(2, Math.floor(totalContextWindow));
+  const desiredReservedOutputTokens = Math.min(
+    DEFAULT_RESERVED_OUTPUT_TOKENS,
+    Math.max(
+      MIN_DYNAMIC_RESERVED_OUTPUT_TOKENS,
+      Math.floor(normalizedTotalContextWindow * DEFAULT_RESERVED_OUTPUT_RATIO)
+    )
+  );
+
+  return Math.max(1, Math.min(desiredReservedOutputTokens, normalizedTotalContextWindow - 1));
+}
+
 export const LOG_LEVEL_PRIORITY = {
   DEBUG: 10,
   INFO: 20,
diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts
@@ -4,7 +4,8 @@ import {
   DEFAULT_CONTEXT_WINDOW_SIZE,
   DEFAULT_RESERVED_OUTPUT_TOKENS,
   DEFAULT_TOKEN_SIDE_LIMIT,
-  MODEL_VERSION_LABEL
+  MODEL_VERSION_LABEL,
+  resolveImplicitReservedOutputTokens
 } from '../constants';
 import { logger } from '../logging/outputChannelLogger';
 
@@ -463,7 +464,7 @@ export abstract class BaseAIProvider implements vscode.Disposable {
   ): Pick<ResolvedModelRuntimeSettings, 'maxTokens' | 'maxInputTokens' | 'maxOutputTokens'> {
     const hasExplicitTotalContextWindow = totalContextWindow !== undefined;
     const fallbackTotal = Math.max(2, Math.floor(totalContextWindow ?? DEFAULT_CONTEXT_WINDOW_SIZE));
-    const defaultReservedOutputTokens = Math.max(1, Math.min(DEFAULT_RESERVED_OUTPUT_TOKENS, fallbackTotal - 1));
+    const defaultReservedOutputTokens = resolveImplicitReservedOutputTokens(fallbackTotal);
     const normalizeTokenValue = (value: number | undefined): number | undefined => {
       if (value === undefined) {
         return undefined;
diff --git a/src/test/runTest.ts b/src/test/runTest.ts