Skip to content

Commit 4175714

Browse files
jimgqyuDeepSeek
andcommitted
feat: global max_tokens config with 32768 default
- Raise max_tokens default from 8192 to 32768 across all providers - Wire max_tokens through full config pipeline: settings.json → Engine → Provider - Priority: CODER_MAX_TOKENS env > model_entry.max_tokens > settings.max_tokens - All models share a single global max_tokens value Co-Authored-By: DeepSeek <noreply@deepseek.com>
1 parent e07c0c3 commit 4175714

8 files changed

Lines changed: 29 additions & 6 deletions

File tree

configs/default-settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"theme": "dark",
3+
"max_tokens": 32768,
34
"default_model": "deepseek/deepseek-v4-pro",
45
"model_list": [
56
{

packages/cli/src/gateway/coder-client.ts

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ interface ModelEntry {
5050
base_url?: string // Provider endpoint URL
5151
auth_token_env?: string // API key / auth token
5252
proxy?: string // HTTP/HTTPS proxy URL for this provider (e.g. "http://127.0.0.1:7890")
53+
max_tokens?: number // Maximum output tokens for this model (default: 32768)
5354
provider?: string // e.g. "anthropic", "deepseek", "openai"
5455
price?: {
5556
input: number
@@ -80,6 +81,7 @@ interface ClaudeSettings {
8081
theme?: string
8182
model_list?: ModelEntry[]
8283
default_model?: string
84+
max_tokens?: number // Global max output tokens for all providers (default: 32768)
8385
display?: {
8486
tui_auto_resume_recent?: boolean
8587
}
@@ -99,6 +101,7 @@ function resolveModelConfig(settings: ClaudeSettings, fallbackModel: string): {
99101
baseUrl?: string
100102
apiKey?: string
101103
proxy?: string
104+
maxTokens?: number
102105
name: string
103106
provider: string
104107
} {
@@ -121,6 +124,7 @@ function resolveModelConfig(settings: ClaudeSettings, fallbackModel: string): {
121124
baseUrl: entry.base_url,
122125
apiKey: entry.auth_token_env,
123126
proxy: entry.proxy,
127+
maxTokens: entry.max_tokens,
124128
name: selectedModel,
125129
provider: entry.provider ?? inferProvider(selectedModel),
126130
}
@@ -227,7 +231,7 @@ export class CoderGatewayClient extends EventEmitter implements IGatewayClient {
227231
private thinkingBudget: number
228232

229233
// ── Model config ────────────────────────────────────────────────────
230-
private modelConfig: { model: string; baseUrl?: string; apiKey?: string; proxy?: string; name: string; provider: string } | null = null
234+
private modelConfig: { model: string; baseUrl?: string; apiKey?: string; proxy?: string; maxTokens?: number; name: string; provider: string } | null = null
231235

232236
// ── Session fork config ─────────────────────────────────────────────
233237
private forkSessionId?: string
@@ -249,7 +253,7 @@ export class CoderGatewayClient extends EventEmitter implements IGatewayClient {
249253
// CODER_MODEL env var — highest-priority model override.
250254
// Check before resolveModelConfig so the env var wins over settings.json.
251255
const coderModel = process.env.CODER_MODEL
252-
let resolved: { model: string; baseUrl?: string; apiKey?: string; proxy?: string; name: string; provider: string }
256+
let resolved: { model: string; baseUrl?: string; apiKey?: string; proxy?: string; maxTokens?: number; name: string; provider: string }
253257
if (coderModel) {
254258
// Helper: resolve from a model_list entry
255259
const resolveEntry = (entry: ModelEntry, preferredModel?: string) => {
@@ -261,6 +265,7 @@ export class CoderGatewayClient extends EventEmitter implements IGatewayClient {
261265
baseUrl: entry.base_url,
262266
apiKey: entry.auth_token_env,
263267
proxy: entry.proxy,
268+
maxTokens: entry.max_tokens,
264269
name: selectedModel,
265270
provider: entry.provider ?? inferProvider(selectedModel),
266271
}
@@ -838,6 +843,12 @@ export class CoderGatewayClient extends EventEmitter implements IGatewayClient {
838843
env.CODER_PROXY ??
839844
process.env.CODER_PROXY
840845

846+
// Resolve maxTokens: env var > per-model entry > global settings > undefined (uses provider default)
847+
const maxTokens =
848+
process.env.CODER_MAX_TOKENS
849+
? parseInt(process.env.CODER_MAX_TOKENS, 10)
850+
: (modelCfg?.maxTokens ?? settings.max_tokens)
851+
841852
// Check CODER_COORDINATOR_MODE env var (set by entry.tsx or manually)
842853
const coordinatorMode =
843854
this.coordinatorMode ||
@@ -858,6 +869,7 @@ export class CoderGatewayClient extends EventEmitter implements IGatewayClient {
858869
model: this.model,
859870
providerName: modelCfg?.provider,
860871
maxTurns: 100,
872+
maxTokens,
861873
sessionId: this.gatewaySessionId ?? undefined,
862874
sessionManager,
863875
coordinatorMode,

packages/cli/src/gateway/engine-factory.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ export interface EngineFactoryOptions {
116116
thinkingMode?: boolean;
117117
/** Extended thinking budget in tokens (default: 1024) */
118118
thinkingBudget?: number;
119+
/** Maximum output tokens for the model (default: 32768) */
120+
maxTokens?: number;
119121
/** External SessionManager — when provided, the engine shares the same
120122
* instance as the gateway (session.create/list/resume RPCs). Without
121123
* this, each engine creates its own instance, leading to session state
@@ -350,6 +352,7 @@ export function createQueryEngine(
350352
mode: engineMode,
351353
thinkingConfig,
352354
hookManager,
355+
maxTokens: opts.maxTokens,
353356
});
354357

355358
// ── 7. Interrupt function ──────────────────────────────────────────

packages/core/src/provider-adapter.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export function createCallModelFromProvider(
2727
provider: Provider,
2828
model: string,
2929
thinking?: ThinkingConfig,
30+
maxTokens?: number,
3031
): (params: CallModelParams) => AsyncGenerator<SharedStreamEvent | AssistantMessage> {
3132
return async function* callModel(
3233
params: CallModelParams,
@@ -70,8 +71,11 @@ export function createCallModelFromProvider(
7071
if (out) { enqueue(out); if (drain) { const d = drain; drain = null; d(); } }
7172
};
7273

73-
// Build ModelConfig with optional thinking configuration
74+
// Build ModelConfig with optional thinking configuration and maxTokens
7475
const modelConfig: ModelConfig = { model };
76+
if (maxTokens !== undefined) {
77+
modelConfig.maxTokens = maxTokens;
78+
}
7579
if (thinking && thinking.mode !== 'disabled') {
7680
modelConfig.thinking = thinking;
7781
}

packages/core/src/query-engine.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ export interface QueryEngineConfig {
8181
mode?: 'default' | 'coordinator' | 'worker';
8282
/** Extended thinking configuration (passed to Provider via ModelConfig.thinking) */
8383
thinkingConfig?: ThinkingConfig;
84+
/** Maximum output tokens for the model (default: provider-specific fallback, 32768) */
85+
maxTokens?: number;
8486
/** Optional HookManager for lifecycle hook execution (UserPromptSubmit, etc.) */
8587
hookManager?: HookManager;
8688
}
@@ -391,6 +393,7 @@ export class QueryEngine {
391393
this.config.provider,
392394
this.config.providerModel,
393395
this.config.thinkingConfig,
396+
this.config.maxTokens,
394397
);
395398
}
396399
return mockCallModel;

packages/provider/src/anthropic.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ export class AnthropicProvider implements Provider {
161161
model: modelConfig.model,
162162
system: system || undefined,
163163
messages: apiMessages as MessageParam[],
164-
max_tokens: modelConfig.maxTokens ?? 8192,
164+
max_tokens: modelConfig.maxTokens ?? 32768,
165165
temperature: modelConfig.temperature,
166166
tools: anthropicTools as Anthropic.MessageCreateParams['tools'],
167167
thinking: this.buildThinkingConfig(modelConfig),

packages/provider/src/openai-compat.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ export class OpenAICompatProvider implements Provider {
241241
messages: openaiMessages,
242242
stream: true,
243243
stream_options: { include_usage: true },
244-
max_tokens: modelConfig.maxTokens ?? 8192,
244+
max_tokens: modelConfig.maxTokens ?? 32768,
245245
};
246246

247247
if (modelConfig.temperature !== undefined) {

packages/shared/src/types/config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ export const DEFAULT_CODER_CONFIG: CoderConfig = {
220220
model: {
221221
provider: 'anthropic',
222222
model: 'claude-sonnet-4-6',
223-
maxTokens: 8192,
223+
maxTokens: 32768,
224224
thinking: {
225225
mode: 'adaptive',
226226
budgetTokens: 16000,

0 commit comments

Comments
 (0)