Skip to content

Commit 7893af4

Browse files
coadlerMux
andauthored
🤖 fix: cap GPT-5.5 OAuth context (#3333)
Summary Caps GPT-5.5's effective context window at 272K tokens when requests are routed through Codex OAuth, while preserving the public 1.05M API-key context window. Background GPT-5.5's API metadata advertises a 1.05M context window, but the ChatGPT/Codex OAuth route has a lower practical routing-layer limit. Mux was using the API metadata everywhere, so OAuth-routed users could see token meters and compaction thresholds that allowed prompts far beyond the OAuth backend's cap. Implementation - Adds a Codex OAuth context-window override for `gpt-5.5`. - Applies the override from shared effective context-limit logic only when provider config indicates Codex OAuth is the active route. - Keeps API-key-selected GPT-5.5 requests on the 1.05M public API limit. - Routes token meter calculations through the same effective limit helper used by compaction. - Uses the safe provider config view for backend compaction checks so env/file API-key source and OAuth presence metadata are visible. Validation - `bun test src/common/utils/compaction/contextLimit.test.ts src/common/utils/tokens/tokenMeterUtils.test.ts` - `bunx eslint src/common/constants/codexOAuth.ts src/common/utils/compaction/contextLimit.ts src/common/utils/compaction/contextLimit.test.ts src/common/utils/tokens/models-extra.ts src/common/utils/tokens/tokenMeterUtils.ts src/common/utils/tokens/tokenMeterUtils.test.ts src/node/services/agentSession.ts` - `bunx prettier --check src/common/constants/codexOAuth.ts src/common/utils/compaction/contextLimit.ts src/common/utils/compaction/contextLimit.test.ts src/common/utils/tokens/models-extra.ts src/common/utils/tokens/tokenMeterUtils.ts src/common/utils/tokens/tokenMeterUtils.test.ts src/node/services/agentSession.ts` - `git diff --check` - `nix fmt -- flake.nix` - `make static-check NIX=` Risks Low-to-medium risk, limited to context-limit display and compaction decisions. The main behavioral risk is misclassifying OpenAI auth route from provider config; tests cover OAuth-only, OAuth-default, explicit API-key, and env API-key cases. Pains Local `make static-check` required environment repair: `node-pty`, `shfmt`, and `hadolint` were missing from the workspace. The repo's `fmt-nix-check` target also fails locally when its temporary copied flake is outside a git repo, so I separately ran `nix fmt -- flake.nix` and then ran `make static-check NIX=`. --- _Generated with `mux` • Model: `openai:gpt-5.5` • Thinking: `xhigh` • Cost: `$0.00`_ <!-- mux-attribution: model=openai:gpt-5.5 thinking=xhigh costs=0.00 --> Co-authored-by: Mux <noreply@coder.com>
1 parent 698d0b0 commit 7893af4

7 files changed

Lines changed: 250 additions & 26 deletions

File tree

src/common/constants/codexOAuth.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,17 @@ export const CODEX_OAUTH_REQUIRED_MODELS = new Set<string>([
113113
"gpt-5.3-codex-spark",
114114
]);
115115

116+
/**
117+
* Runtime context caps that differ when an otherwise public API model is routed through
118+
* ChatGPT/Codex OAuth. Keep these separate from model metadata so API-key requests can
119+
* still use the public OpenAI limits.
120+
*/
121+
const CODEX_OAUTH_CONTEXT_WINDOW_OVERRIDES: Record<string, number> = {
122+
// User-reported routing limit: GPT-5.5's public API window is 1.05M, but the
123+
// ChatGPT/Codex OAuth backend rejects prompts near that size and must compact at ~270K.
124+
"gpt-5.5": 272_000,
125+
};
126+
116127
function normalizeCodexOauthModelId(modelId: string): string {
117128
// Accept either provider:model or bare model ids and normalize to providerModelId.
118129
const colonIndex = modelId.indexOf(":");
@@ -130,3 +141,7 @@ export function isCodexOauthAllowedModelId(modelId: string): boolean {
130141
export function isCodexOauthRequiredModelId(modelId: string): boolean {
131142
return CODEX_OAUTH_REQUIRED_MODELS.has(normalizeCodexOauthModelId(modelId));
132143
}
144+
145+
export function getCodexOauthContextWindowOverride(modelId: string): number | null {
146+
return CODEX_OAUTH_CONTEXT_WINDOW_OVERRIDES[normalizeCodexOauthModelId(modelId)] ?? null;
147+
}

src/common/utils/compaction/contextLimit.test.ts

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,19 @@ import type { ProvidersConfigMap } from "@/common/orpc/types";
44
import { getModelStats } from "@/common/utils/tokens/modelStats";
55
import { getEffectiveContextLimit } from "./contextLimit";
66

7+
type ProviderConfigInfo = NonNullable<ProvidersConfigMap[string]>;
8+
9+
function providersWithOpenAI(overrides: Partial<ProviderConfigInfo>): ProvidersConfigMap {
10+
return {
11+
openai: {
12+
apiKeySet: false,
13+
isEnabled: true,
14+
isConfigured: true,
15+
...overrides,
16+
},
17+
};
18+
}
19+
720
describe("getEffectiveContextLimit", () => {
821
test("uses mapped model metadata for context limits", () => {
922
const config: ProvidersConfigMap = {
@@ -48,6 +61,89 @@ describe("getEffectiveContextLimit", () => {
4861
expect(toggledLimit).toBe(1_050_000);
4962
});
5063

64+
test("caps GPT-5.5 at the Codex OAuth context window when OAuth is the active auth route", () => {
65+
const oauthOnlyLimit = getEffectiveContextLimit(
66+
KNOWN_MODELS.GPT.id,
67+
false,
68+
providersWithOpenAI({ codexOauthSet: true })
69+
);
70+
expect(oauthOnlyLimit).toBe(272_000);
71+
72+
const defaultOauthLimit = getEffectiveContextLimit(
73+
KNOWN_MODELS.GPT.id,
74+
false,
75+
providersWithOpenAI({ apiKeySet: true, codexOauthSet: true })
76+
);
77+
expect(defaultOauthLimit).toBe(272_000);
78+
});
79+
80+
test("does not apply the GPT-5.5 OAuth cap to gateway-routed models", () => {
81+
const limit = getEffectiveContextLimit(
82+
"openrouter:openai/gpt-5.5",
83+
false,
84+
providersWithOpenAI({ codexOauthSet: true })
85+
);
86+
87+
expect(limit).toBe(1_050_000);
88+
});
89+
90+
test("keeps GPT-5.5's API context window when API key auth is selected", () => {
91+
const limit = getEffectiveContextLimit(
92+
KNOWN_MODELS.GPT.id,
93+
false,
94+
providersWithOpenAI({
95+
apiKeySet: true,
96+
codexOauthSet: true,
97+
codexOauthDefaultAuth: "apiKey",
98+
})
99+
);
100+
101+
expect(limit).toBe(1_050_000);
102+
});
103+
104+
test("does not treat unresolved API-key files as active API-key auth", () => {
105+
const limit = getEffectiveContextLimit(
106+
KNOWN_MODELS.GPT.id,
107+
false,
108+
providersWithOpenAI({
109+
apiKeyFile: "/missing/openai-key",
110+
codexOauthSet: true,
111+
codexOauthDefaultAuth: "apiKey",
112+
})
113+
);
114+
115+
expect(limit).toBe(272_000);
116+
});
117+
118+
test("uses GPT-5.5's API context window for resolved API-key files", () => {
119+
const limit = getEffectiveContextLimit(
120+
KNOWN_MODELS.GPT.id,
121+
false,
122+
providersWithOpenAI({
123+
apiKeyFile: "/readable/openai-key",
124+
apiKeySource: "file",
125+
codexOauthSet: true,
126+
codexOauthDefaultAuth: "apiKey",
127+
})
128+
);
129+
130+
expect(limit).toBe(1_050_000);
131+
});
132+
133+
test("detects env-sourced API keys when deciding GPT-5.5 Codex OAuth routing", () => {
134+
const limit = getEffectiveContextLimit(
135+
KNOWN_MODELS.GPT.id,
136+
false,
137+
providersWithOpenAI({
138+
apiKeySource: "env",
139+
codexOauthSet: true,
140+
codexOauthDefaultAuth: "apiKey",
141+
})
142+
);
143+
144+
expect(limit).toBe(1_050_000);
145+
});
146+
51147
test("uses Claude Sonnet 4.6's native 1M context without the beta toggle", () => {
52148
const baseLimit = getEffectiveContextLimit(KNOWN_MODELS.SONNET.id, false, null);
53149
const toggledLimit = getEffectiveContextLimit(KNOWN_MODELS.SONNET.id, true, null);

src/common/utils/compaction/contextLimit.ts

Lines changed: 107 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,14 @@
22
* Shared context limit utilities for compaction logic.
33
*
44
* Used by autoCompactionCheck and contextSwitchCheck to calculate
5-
* effective context limits accounting for 1M context toggle.
5+
* effective context limits accounting for auth-route caps and the 1M context toggle.
66
*/
77

8+
import {
9+
getCodexOauthContextWindowOverride,
10+
isCodexOauthAllowedModelId,
11+
isCodexOauthRequiredModelId,
12+
} from "@/common/constants/codexOAuth";
813
import type { ProvidersConfigMap } from "@/common/orpc/types";
914
import { supports1MContext } from "@/common/utils/ai/models";
1015
import {
@@ -13,8 +18,100 @@ import {
1318
} from "@/common/utils/providers/modelEntries";
1419
import { getModelStats } from "@/common/utils/tokens/modelStats";
1520

21+
function asRecord(value: unknown): Record<string, unknown> | null {
22+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
23+
return null;
24+
}
25+
return value as Record<string, unknown>;
26+
}
27+
28+
function hasNonEmptyString(value: unknown): value is string {
29+
return typeof value === "string" && value.trim().length > 0;
30+
}
31+
32+
function getOpenAIProviderModelId(model: string): string | null {
33+
const separatorIndex = model.indexOf(":");
34+
if (separatorIndex <= 0 || separatorIndex === model.length - 1) {
35+
return null;
36+
}
37+
38+
const provider = model.slice(0, separatorIndex);
39+
if (provider !== "openai") {
40+
return null;
41+
}
42+
43+
return model.slice(separatorIndex + 1);
44+
}
45+
46+
function hasCodexOauthTokens(config: unknown): boolean {
47+
const record = asRecord(config);
48+
if (!record) {
49+
return false;
50+
}
51+
52+
if (record.codexOauthSet === true) {
53+
return true;
54+
}
55+
56+
// Backend compaction can receive raw providers.jsonc config in older tests/fallback paths.
57+
// Detect the stored token shape without importing node-only OAuth parsing into common code.
58+
const oauth = asRecord(record.codexOauth);
59+
return (
60+
oauth?.type === "oauth" &&
61+
hasNonEmptyString(oauth.access) &&
62+
hasNonEmptyString(oauth.refresh) &&
63+
typeof oauth.expires === "number" &&
64+
Number.isFinite(oauth.expires)
65+
);
66+
}
67+
68+
function hasOpenAIApiKey(config: unknown): boolean {
69+
const record = asRecord(config);
70+
if (!record) {
71+
return false;
72+
}
73+
74+
const apiKeySource = record.apiKeySource;
75+
if (apiKeySource === "config" || apiKeySource === "file" || apiKeySource === "env") {
76+
return true;
77+
}
78+
79+
return record.apiKeySet === true || hasNonEmptyString(record.apiKey);
80+
}
81+
82+
function getCodexOauthContextLimit(
83+
model: string,
84+
providersConfig: ProvidersConfigMap | null
85+
): number | null {
86+
const modelId = getOpenAIProviderModelId(model);
87+
if (!modelId || !isCodexOauthAllowedModelId(modelId)) {
88+
return null;
89+
}
90+
91+
const oauthLimit = getCodexOauthContextWindowOverride(modelId);
92+
if (oauthLimit == null) {
93+
return null;
94+
}
95+
96+
const openAIConfig = providersConfig?.openai;
97+
if (!hasCodexOauthTokens(openAIConfig)) {
98+
return null;
99+
}
100+
101+
if (isCodexOauthRequiredModelId(modelId)) {
102+
return oauthLimit;
103+
}
104+
105+
if (!hasOpenAIApiKey(openAIConfig)) {
106+
return oauthLimit;
107+
}
108+
109+
const record = asRecord(openAIConfig);
110+
return record?.codexOauthDefaultAuth === "apiKey" ? null : oauthLimit;
111+
}
112+
16113
/**
17-
* Get effective context limit for a model, accounting for custom overrides and 1M toggle.
114+
* Get effective context limit for a model, accounting for custom overrides, auth-route caps, and 1M toggle.
18115
*
19116
* @param model - Model ID (e.g., "anthropic:claude-sonnet-4-5")
20117
* @param use1M - Whether 1M context is enabled in settings
@@ -32,6 +129,14 @@ export function getEffectiveContextLimit(
32129
const baseLimit = customOverride ?? stats?.max_input_tokens ?? null;
33130
if (!baseLimit) return null;
34131

132+
// ChatGPT/Codex OAuth can impose a smaller routing-layer cap than the public OpenAI
133+
// API metadata. Cap the effective window so auto-compaction and token meters compact
134+
// before OAuth requests reach provider-side validation failures.
135+
const codexOauthLimit = getCodexOauthContextLimit(model, providersConfig);
136+
if (codexOauthLimit != null) {
137+
return Math.min(baseLimit, codexOauthLimit);
138+
}
139+
35140
// Anthropic's optional 1M beta is a runtime capability, so it must be gated on the
36141
// runtime model, not the mapped metadata model. Native 1M models already expose their
37142
// larger window through model stats above.

src/common/utils/tokens/models-extra.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ export const modelsExtra: Record<string, ModelData> = {
131131

132132
// GPT-5.5 - Released April 23, 2026
133133
// Public API support covers Responses, Chat Completions, and Batch with a native
134-
// 1.05M context window and 128K max output. When routed through Codex OAuth, the
135-
// effective per-request cap remains 400K because of a routing-layer constraint.
134+
// 1.05M context window and 128K max output. When routed through Codex OAuth, Mux
135+
// caps the effective window separately at 272K because the ChatGPT routing layer is lower.
136136
// Base pricing: $5/M input, $30/M output, $0.50/M cached input.
137137
// Above 272K prompt tokens: $10/M input, $45/M output, $1/M cached input.
138138
"gpt-5.5": {

src/common/utils/tokens/tokenMeterUtils.test.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { describe, expect, test } from "bun:test";
2+
import { KNOWN_MODELS } from "@/common/constants/knownModels";
23
import type { ProvidersConfigMap } from "@/common/orpc/types";
34
import { calculateTokenMeterData, formatTokens } from "./tokenMeterUtils";
45

@@ -76,6 +77,20 @@ describe("calculateTokenMeterData", () => {
7677
expect(result.totalPercentage).toBeCloseTo(1.1);
7778
});
7879

80+
test("uses the Codex OAuth cap for GPT-5.5 token meter percentages", () => {
81+
const result = calculateTokenMeterData(SAMPLE_USAGE, KNOWN_MODELS.GPT.id, false, false, {
82+
openai: {
83+
apiKeySet: false,
84+
isEnabled: true,
85+
isConfigured: true,
86+
codexOauthSet: true,
87+
},
88+
});
89+
90+
expect(result.maxTokens).toBe(272_000);
91+
expect(result.totalPercentage).toBeCloseTo((11_000 / 272_000) * 100);
92+
});
93+
7994
test("uses Claude Sonnet 4.6's native 1M context even when the beta toggle is off", () => {
8095
const result = calculateTokenMeterData(SAMPLE_USAGE, "anthropic:claude-sonnet-4-6", false);
8196

src/common/utils/tokens/tokenMeterUtils.ts

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
import type { ProvidersConfigMap } from "@/common/orpc/types";
2-
import {
3-
getModelContextWindowOverride,
4-
resolveModelForMetadata,
5-
} from "@/common/utils/providers/modelEntries";
2+
import { getEffectiveContextLimit } from "@/common/utils/compaction/contextLimit";
63
import type { ChatUsageDisplay } from "./usageAggregator";
7-
import { getModelStats } from "./modelStats";
8-
import { supports1MContext } from "../ai/models";
94

105
// NOTE: Provide theme-matching fallbacks so token meters render consistently
116
// even if a host environment doesn't define the CSS variables (e.g., an embedded UI).
@@ -69,13 +64,7 @@ export function calculateTokenMeterData(
6964
): TokenMeterData {
7065
if (!usage) return { segments: [], totalTokens: 0, totalPercentage: 0 };
7166

72-
const metadataModel = resolveModelForMetadata(model, providersConfig);
73-
const modelStats = getModelStats(metadataModel);
74-
const customContextWindow = getModelContextWindowOverride(model, providersConfig);
75-
const maxTokens =
76-
use1M && supports1MContext(model)
77-
? 1_000_000
78-
: (customContextWindow ?? modelStats?.max_input_tokens);
67+
const maxTokens = getEffectiveContextLimit(model, use1M, providersConfig) ?? undefined;
7968

8069
// Total tokens used in the request.
8170
// For Anthropic prompt caching, cacheCreate tokens are reported separately but still

src/node/services/agentSession.ts

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2881,22 +2881,26 @@ export class AgentSession {
28812881

28822882
private getProvidersConfigForCompaction(): ProvidersConfigMap | null {
28832883
try {
2884-
// Some unit tests provide a minimal Config mock without providers helpers.
2884+
// Prefer ProviderService's safe config view: it includes env/file API-key source
2885+
// metadata plus the Codex OAuth presence bit, which context-limit resolution needs
2886+
// to distinguish GPT-5.5 API-key requests from lower-cap OAuth-routed requests.
2887+
const maybeAIService = this.aiService as AIService & {
2888+
getProvidersConfig?: () => ProvidersConfigMap | null;
2889+
};
2890+
if (typeof maybeAIService.getProvidersConfig === "function") {
2891+
return maybeAIService.getProvidersConfig();
2892+
}
2893+
2894+
// Some unit tests provide minimal service mocks; fall back to raw config so custom
2895+
// provider model context overrides still work in those environments.
28852896
const maybeConfig = this.config as Config & {
28862897
loadProvidersConfig?: () => ProvidersConfigMap | null;
28872898
};
28882899
if (typeof maybeConfig.loadProvidersConfig !== "function") {
28892900
return null;
28902901
}
28912902

2892-
const providersConfig = maybeConfig.loadProvidersConfig();
2893-
if (!providersConfig) {
2894-
return null;
2895-
}
2896-
2897-
// Compaction limit resolution only reads provider model overrides (models[*].contextWindow*).
2898-
// Runtime config stores these in providers.jsonc, so the raw config shape is sufficient here.
2899-
return providersConfig as unknown as ProvidersConfigMap;
2903+
return maybeConfig.loadProvidersConfig() as unknown as ProvidersConfigMap | null;
29002904
} catch {
29012905
// Best-effort read: if config cannot be loaded, keep null and rely on
29022906
// built-in model limits. This matches prior behavior without crashing.

0 commit comments

Comments
 (0)