Skip to content

Commit 8fa4087

Browse files
committed
fix(api): align /v1/responses max_output_tokens cap to 128k for all models (refs #112)
The translator strips max_output_tokens before forwarding to Codex (which rejects the field), so the local cap is only a sanity bound. Holding non-gpt-5.5 models at 65536 rejected legitimate SDK defaults in the 100k+ range without any upstream benefit, and both CLIProxyAPI and sub2api apply no local ceiling at all on the Codex path. Collapse the per-model branch into a single 128000 cap and update the OpenAPI description to make it explicit that the ceiling is enforced upstream, not here. ResponsesMaxOutputTokensForModel keeps its signature for API stability.
1 parent 22600d8 commit 8fa4087

3 files changed

Lines changed: 29 additions & 21 deletions

File tree

api/openapi.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,11 @@ components:
342342
type: integer
343343
minimum: 1
344344
maximum: 128000
345-
description: Maximum output tokens. gpt-5.5 supports up to 128000; other models may be capped lower.
345+
description: |
346+
Client-side hint for maximum output tokens. The proxy strips this
347+
field before forwarding to the Codex upstream (which does not
348+
accept it), so the effective ceiling is enforced upstream, not here.
349+
The 128000 cap above is a sanity bound for obviously-absurd values.
346350
temperature:
347351
type: number
348352
minimum: 0

api/validation.go

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,14 @@ import (
1414
"github.com/tidwall/gjson"
1515
)
1616

17-
const (
18-
defaultResponsesMaxOutputTokens = 65536
19-
gpt55ResponsesMaxOutputTokens = 128000
20-
)
17+
// responsesMaxOutputTokensCap is the upper bound enforced by the local
18+
// validator on the /v1/responses max_output_tokens field. The translator
19+
// strips the field before forwarding to the Codex upstream (which does not
20+
// accept it), so this cap only guards against obviously-absurd client values
21+
// — it does not control the actual output length, which is decided upstream.
22+
// Aligned to 128000 to match the highest cap OpenAI currently advertises on
23+
// any Codex-served model, so SDKs that default to 100k+ pass through.
24+
const responsesMaxOutputTokensCap = 128000
2125

2226
// ValidationRule represents a validation rule function
2327
type ValidationRule func(value gjson.Result, path string) *ValidationError
@@ -424,16 +428,12 @@ func ChatCompletionValidationRules() map[string][]ValidationRule {
424428
}
425429
}
426430

427-
// ResponsesMaxOutputTokensForModel returns the downstream validation cap for
428-
// max_output_tokens. Most Codex models still use the legacy 64k output cap,
429-
// while gpt-5.5 clients may legitimately request up to 128k.
430-
func ResponsesMaxOutputTokensForModel(model string) int {
431-
switch strings.ToLower(strings.TrimSpace(model)) {
432-
case "gpt-5.5":
433-
return gpt55ResponsesMaxOutputTokens
434-
default:
435-
return defaultResponsesMaxOutputTokens
436-
}
431+
// ResponsesMaxOutputTokensForModel returns the local validation cap for
432+
// max_output_tokens. The cap is intentionally model-agnostic: the translator
433+
// drops the field before forwarding to Codex, so the real upstream ceiling is
434+
// enforced server-side. The model argument is kept for API compatibility.
435+
func ResponsesMaxOutputTokensForModel(_ string) int {
436+
return responsesMaxOutputTokensCap
437437
}
438438

439439
// ResponsesAPIValidationRules returns validation rules for responses API request
@@ -442,12 +442,11 @@ func ResponsesAPIValidationRules() map[string][]ValidationRule {
442442
return ResponsesAPIValidationRulesForModel("")
443443
}
444444

445-
func ResponsesAPIValidationRulesForModel(model string) map[string][]ValidationRule {
446-
maxOutputTokens := ResponsesMaxOutputTokensForModel(model)
445+
func ResponsesAPIValidationRulesForModel(_ string) map[string][]ValidationRule {
447446
return map[string][]ValidationRule{
448447
"model": {Required(), TypeString(), MaxLength(64)},
449448
// input validation is handled separately to support both string and array formats
450-
"max_output_tokens": {TypeNumber(), MinValue(1), MaxValue(float64(maxOutputTokens))},
449+
"max_output_tokens": {TypeNumber(), MinValue(1), MaxValue(float64(responsesMaxOutputTokensCap))},
451450
"temperature": {TypeNumber(), Range(0, 2)},
452451
"top_p": {TypeNumber(), Range(0, 1)},
453452
"stream": {TypeBoolean()},

api/validation_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ func TestValidateResponsesAPIRequestAllowsCompactionInputType(t *testing.T) {
7575
}
7676
}
7777

78-
func TestValidateResponsesAPIRequestUsesModelAwareMaxOutputTokens(t *testing.T) {
78+
func TestValidateResponsesAPIRequestMaxOutputTokensCap(t *testing.T) {
7979
tests := []struct {
8080
name string
8181
body []byte
@@ -92,8 +92,13 @@ func TestValidateResponsesAPIRequestUsesModelAwareMaxOutputTokens(t *testing.T)
9292
valid: false,
9393
},
9494
{
95-
name: "other models keep 64k output cap",
96-
body: []byte(`{"model":"gpt-5.4","input":"hello","max_output_tokens":65537}`),
95+
name: "other models also allow up to 128k (aligned cap, upstream decides actual ceiling)",
96+
body: []byte(`{"model":"gpt-5.4","input":"hello","max_output_tokens":100000}`),
97+
valid: true,
98+
},
99+
{
100+
name: "other models reject above 128k",
101+
body: []byte(`{"model":"gpt-5.4","input":"hello","max_output_tokens":128001}`),
97102
valid: false,
98103
},
99104
}

0 commit comments

Comments
 (0)