🤖 fix: cap GPT-5.5 OAuth context (#3333)

coadler · Mux · web-flow · commit 7893af434343 · 2026-05-19T16:34:55.000Z
Summary

Caps GPT-5.5's effective context window at 272K tokens when requests are
routed through Codex OAuth, while preserving the public 1.05M API-key
context window.

Background

GPT-5.5's API metadata advertises a 1.05M context window, but the
ChatGPT/Codex OAuth route has a lower practical routing-layer limit. Mux
was using the API metadata everywhere, so OAuth-routed users could see
token meters and compaction thresholds that allowed prompts far beyond
the OAuth backend's cap.

Implementation

- Adds a Codex OAuth context-window override for `gpt-5.5`.
- Applies the override from shared effective context-limit logic only
when provider config indicates Codex OAuth is the active route.
- Keeps API-key-selected GPT-5.5 requests on the 1.05M public API limit.
- Routes token meter calculations through the same effective limit
helper used by compaction.
- Uses the safe provider config view for backend compaction checks so
env/file API-key source and OAuth presence metadata are visible.

Validation

- `bun test src/common/utils/compaction/contextLimit.test.ts
src/common/utils/tokens/tokenMeterUtils.test.ts`
- `bunx eslint src/common/constants/codexOAuth.ts
src/common/utils/compaction/contextLimit.ts
src/common/utils/compaction/contextLimit.test.ts
src/common/utils/tokens/models-extra.ts
src/common/utils/tokens/tokenMeterUtils.ts
src/common/utils/tokens/tokenMeterUtils.test.ts
src/node/services/agentSession.ts`
- `bunx prettier --check src/common/constants/codexOAuth.ts
src/common/utils/compaction/contextLimit.ts
src/common/utils/compaction/contextLimit.test.ts
src/common/utils/tokens/models-extra.ts
src/common/utils/tokens/tokenMeterUtils.ts
src/common/utils/tokens/tokenMeterUtils.test.ts
src/node/services/agentSession.ts`
- `git diff --check`
- `nix fmt -- flake.nix`
- `make static-check NIX=`

Risks

Low-to-medium risk, limited to context-limit display and compaction
decisions. The main behavioral risk is misclassifying OpenAI auth route
from provider config; tests cover OAuth-only, OAuth-default, explicit
API-key, and env API-key cases.

Pains

Local `make static-check` required environment repair: `node-pty`,
`shfmt`, and `hadolint` were missing from the workspace. The repo's
`fmt-nix-check` target also fails locally when its temporary copied
flake is outside a git repo, so I separately ran `nix fmt -- flake.nix`
and then ran `make static-check NIX=`.

---

_Generated with `mux` • Model: `openai:gpt-5.5` • Thinking: `xhigh` •
Cost: `$0.00`_

&lt;!-- mux-attribution: model=openai:gpt-5.5 thinking=xhigh costs=0.00 --&gt;

Co-authored-by: Mux &lt;noreply@coder.com&gt;
diff --git a/src/common/constants/codexOAuth.ts b/src/common/constants/codexOAuth.ts
@@ -113,6 +113,17 @@ export const CODEX_OAUTH_REQUIRED_MODELS = new Set<string>([
   "gpt-5.3-codex-spark",
 ]);
 
+/**
+ * Runtime context caps that differ when an otherwise public API model is routed through
+ * ChatGPT/Codex OAuth. Keep these separate from model metadata so API-key requests can
+ * still use the public OpenAI limits.
+ */
+const CODEX_OAUTH_CONTEXT_WINDOW_OVERRIDES: Record<string, number> = {
+  // User-reported routing limit: GPT-5.5's public API window is 1.05M, but the
+  // ChatGPT/Codex OAuth backend rejects prompts near that size and must compact at ~270K.
+  "gpt-5.5": 272_000,
+};
+
 function normalizeCodexOauthModelId(modelId: string): string {
   // Accept either provider:model or bare model ids and normalize to providerModelId.
   const colonIndex = modelId.indexOf(":");
@@ -130,3 +141,7 @@ export function isCodexOauthAllowedModelId(modelId: string): boolean {
 export function isCodexOauthRequiredModelId(modelId: string): boolean {
   return CODEX_OAUTH_REQUIRED_MODELS.has(normalizeCodexOauthModelId(modelId));
 }
+
+export function getCodexOauthContextWindowOverride(modelId: string): number | null {
+  return CODEX_OAUTH_CONTEXT_WINDOW_OVERRIDES[normalizeCodexOauthModelId(modelId)] ?? null;
+}
diff --git a/src/common/utils/compaction/contextLimit.test.ts b/src/common/utils/compaction/contextLimit.test.ts
@@ -4,6 +4,19 @@ import type { ProvidersConfigMap } from "@/common/orpc/types";
 import { getModelStats } from "@/common/utils/tokens/modelStats";
 import { getEffectiveContextLimit } from "./contextLimit";
 
+type ProviderConfigInfo = NonNullable<ProvidersConfigMap[string]>;
+
+function providersWithOpenAI(overrides: Partial<ProviderConfigInfo>): ProvidersConfigMap {
+  return {
+    openai: {
+      apiKeySet: false,
+      isEnabled: true,
+      isConfigured: true,
+      ...overrides,
+    },
+  };
+}
+
 describe("getEffectiveContextLimit", () => {
   test("uses mapped model metadata for context limits", () => {
     const config: ProvidersConfigMap = {
@@ -48,6 +61,89 @@ describe("getEffectiveContextLimit", () => {
     expect(toggledLimit).toBe(1_050_000);
   });
 
+  test("caps GPT-5.5 at the Codex OAuth context window when OAuth is the active auth route", () => {
+    const oauthOnlyLimit = getEffectiveContextLimit(
+      KNOWN_MODELS.GPT.id,
+      false,
+      providersWithOpenAI({ codexOauthSet: true })
+    );
+    expect(oauthOnlyLimit).toBe(272_000);
+
+    const defaultOauthLimit = getEffectiveContextLimit(
+      KNOWN_MODELS.GPT.id,
+      false,
+      providersWithOpenAI({ apiKeySet: true, codexOauthSet: true })
+    );
+    expect(defaultOauthLimit).toBe(272_000);
+  });
+
+  test("does not apply the GPT-5.5 OAuth cap to gateway-routed models", () => {
+    const limit = getEffectiveContextLimit(
+      "openrouter:openai/gpt-5.5",
+      false,
+      providersWithOpenAI({ codexOauthSet: true })
+    );
+
+    expect(limit).toBe(1_050_000);
+  });
+
+  test("keeps GPT-5.5's API context window when API key auth is selected", () => {
+    const limit = getEffectiveContextLimit(
+      KNOWN_MODELS.GPT.id,
+      false,
+      providersWithOpenAI({
+        apiKeySet: true,
+        codexOauthSet: true,
+        codexOauthDefaultAuth: "apiKey",
+      })
+    );
+
+    expect(limit).toBe(1_050_000);
+  });
+
+  test("does not treat unresolved API-key files as active API-key auth", () => {
+    const limit = getEffectiveContextLimit(
+      KNOWN_MODELS.GPT.id,
+      false,
+      providersWithOpenAI({
+        apiKeyFile: "/missing/openai-key",
+        codexOauthSet: true,
+        codexOauthDefaultAuth: "apiKey",
+      })
+    );
+
+    expect(limit).toBe(272_000);
+  });
+
+  test("uses GPT-5.5's API context window for resolved API-key files", () => {
+    const limit = getEffectiveContextLimit(
+      KNOWN_MODELS.GPT.id,
+      false,
+      providersWithOpenAI({
+        apiKeyFile: "/readable/openai-key",
+        apiKeySource: "file",
+        codexOauthSet: true,
+        codexOauthDefaultAuth: "apiKey",
+      })
+    );
+
+    expect(limit).toBe(1_050_000);
+  });
+
+  test("detects env-sourced API keys when deciding GPT-5.5 Codex OAuth routing", () => {
+    const limit = getEffectiveContextLimit(
+      KNOWN_MODELS.GPT.id,
+      false,
+      providersWithOpenAI({
+        apiKeySource: "env",
+        codexOauthSet: true,
+        codexOauthDefaultAuth: "apiKey",
+      })
+    );
+
+    expect(limit).toBe(1_050_000);
+  });
+
   test("uses Claude Sonnet 4.6's native 1M context without the beta toggle", () => {
     const baseLimit = getEffectiveContextLimit(KNOWN_MODELS.SONNET.id, false, null);
     const toggledLimit = getEffectiveContextLimit(KNOWN_MODELS.SONNET.id, true, null);
diff --git a/src/common/utils/compaction/contextLimit.ts b/src/common/utils/compaction/contextLimit.ts
@@ -2,9 +2,14 @@
  * Shared context limit utilities for compaction logic.
  *
  * Used by autoCompactionCheck and contextSwitchCheck to calculate
- * effective context limits accounting for 1M context toggle.
+ * effective context limits accounting for auth-route caps and the 1M context toggle.
  */
 
+import {
+  getCodexOauthContextWindowOverride,
+  isCodexOauthAllowedModelId,
+  isCodexOauthRequiredModelId,
+} from "@/common/constants/codexOAuth";
 import type { ProvidersConfigMap } from "@/common/orpc/types";
 import { supports1MContext } from "@/common/utils/ai/models";
 import {
@@ -13,8 +18,100 @@ import {
 } from "@/common/utils/providers/modelEntries";
 import { getModelStats } from "@/common/utils/tokens/modelStats";
 
+function asRecord(value: unknown): Record<string, unknown> | null {
+  if (typeof value !== "object" || value === null || Array.isArray(value)) {
+    return null;
+  }
+  return value as Record<string, unknown>;
+}
+
+function hasNonEmptyString(value: unknown): value is string {
+  return typeof value === "string" && value.trim().length > 0;
+}
+
+function getOpenAIProviderModelId(model: string): string | null {
+  const separatorIndex = model.indexOf(":");
+  if (separatorIndex <= 0 || separatorIndex === model.length - 1) {
+    return null;
+  }
+
+  const provider = model.slice(0, separatorIndex);
+  if (provider !== "openai") {
+    return null;
+  }
+
+  return model.slice(separatorIndex + 1);
+}
+
+function hasCodexOauthTokens(config: unknown): boolean {
+  const record = asRecord(config);
+  if (!record) {
+    return false;
+  }
+
+  if (record.codexOauthSet === true) {
+    return true;
+  }
+
+  // Backend compaction can receive raw providers.jsonc config in older tests/fallback paths.
+  // Detect the stored token shape without importing node-only OAuth parsing into common code.
+  const oauth = asRecord(record.codexOauth);
+  return (
+    oauth?.type === "oauth" &&
+    hasNonEmptyString(oauth.access) &&
+    hasNonEmptyString(oauth.refresh) &&
+    typeof oauth.expires === "number" &&
+    Number.isFinite(oauth.expires)
+  );
+}
+
+function hasOpenAIApiKey(config: unknown): boolean {
+  const record = asRecord(config);
+  if (!record) {
+    return false;
+  }
+
+  const apiKeySource = record.apiKeySource;
+  if (apiKeySource === "config" || apiKeySource === "file" || apiKeySource === "env") {
+    return true;
+  }
+
+  return record.apiKeySet === true || hasNonEmptyString(record.apiKey);
+}
+
+function getCodexOauthContextLimit(
+  model: string,
+  providersConfig: ProvidersConfigMap | null
+): number | null {
+  const modelId = getOpenAIProviderModelId(model);
+  if (!modelId || !isCodexOauthAllowedModelId(modelId)) {
+    return null;
+  }
+
+  const oauthLimit = getCodexOauthContextWindowOverride(modelId);
+  if (oauthLimit == null) {
+    return null;
+  }
+
+  const openAIConfig = providersConfig?.openai;
+  if (!hasCodexOauthTokens(openAIConfig)) {
+    return null;
+  }
+
+  if (isCodexOauthRequiredModelId(modelId)) {
+    return oauthLimit;
+  }
+
+  if (!hasOpenAIApiKey(openAIConfig)) {
+    return oauthLimit;
+  }
+
+  const record = asRecord(openAIConfig);
+  return record?.codexOauthDefaultAuth === "apiKey" ? null : oauthLimit;
+}
+
 /**
- * Get effective context limit for a model, accounting for custom overrides and 1M toggle.
+ * Get effective context limit for a model, accounting for custom overrides, auth-route caps, and 1M toggle.
  *
  * @param model - Model ID (e.g., "anthropic:claude-sonnet-4-5")
  * @param use1M - Whether 1M context is enabled in settings
@@ -32,6 +129,14 @@ export function getEffectiveContextLimit(
   const baseLimit = customOverride ?? stats?.max_input_tokens ?? null;
   if (!baseLimit) return null;
 
+  // ChatGPT/Codex OAuth can impose a smaller routing-layer cap than the public OpenAI
+  // API metadata. Cap the effective window so auto-compaction and token meters compact
+  // before OAuth requests reach provider-side validation failures.
+  const codexOauthLimit = getCodexOauthContextLimit(model, providersConfig);
+  if (codexOauthLimit != null) {
+    return Math.min(baseLimit, codexOauthLimit);
+  }
+
   // Anthropic's optional 1M beta is a runtime capability, so it must be gated on the
   // runtime model, not the mapped metadata model. Native 1M models already expose their
   // larger window through model stats above.
diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts
@@ -131,8 +131,8 @@ export const modelsExtra: Record<string, ModelData> = {
 
   // GPT-5.5 - Released April 23, 2026
   // Public API support covers Responses, Chat Completions, and Batch with a native
-  // 1.05M context window and 128K max output. When routed through Codex OAuth, the
-  // effective per-request cap remains 400K because of a routing-layer constraint.
+  // 1.05M context window and 128K max output. When routed through Codex OAuth, Mux
+  // caps the effective window separately at 272K because the ChatGPT routing layer is lower.
   // Base pricing: $5/M input, $30/M output, $0.50/M cached input.
   // Above 272K prompt tokens: $10/M input, $45/M output, $1/M cached input.
   "gpt-5.5": {
diff --git a/src/common/utils/tokens/tokenMeterUtils.test.ts b/src/common/utils/tokens/tokenMeterUtils.test.ts
@@ -1,4 +1,5 @@
 import { describe, expect, test } from "bun:test";
+import { KNOWN_MODELS } from "@/common/constants/knownModels";
 import type { ProvidersConfigMap } from "@/common/orpc/types";
 import { calculateTokenMeterData, formatTokens } from "./tokenMeterUtils";
 
@@ -76,6 +77,20 @@ describe("calculateTokenMeterData", () => {
     expect(result.totalPercentage).toBeCloseTo(1.1);
   });
 
+  test("uses the Codex OAuth cap for GPT-5.5 token meter percentages", () => {
+    const result = calculateTokenMeterData(SAMPLE_USAGE, KNOWN_MODELS.GPT.id, false, false, {
+      openai: {
+        apiKeySet: false,
+        isEnabled: true,
+        isConfigured: true,
+        codexOauthSet: true,
+      },
+    });
+
+    expect(result.maxTokens).toBe(272_000);
+    expect(result.totalPercentage).toBeCloseTo((11_000 / 272_000) * 100);
+  });
+
   test("uses Claude Sonnet 4.6's native 1M context even when the beta toggle is off", () => {
     const result = calculateTokenMeterData(SAMPLE_USAGE, "anthropic:claude-sonnet-4-6", false);
 
diff --git a/src/common/utils/tokens/tokenMeterUtils.ts b/src/common/utils/tokens/tokenMeterUtils.ts
@@ -1,11 +1,6 @@
 import type { ProvidersConfigMap } from "@/common/orpc/types";
-import {
-  getModelContextWindowOverride,
-  resolveModelForMetadata,
-} from "@/common/utils/providers/modelEntries";
+import { getEffectiveContextLimit } from "@/common/utils/compaction/contextLimit";
 import type { ChatUsageDisplay } from "./usageAggregator";
-import { getModelStats } from "./modelStats";
-import { supports1MContext } from "../ai/models";
 
 // NOTE: Provide theme-matching fallbacks so token meters render consistently
 // even if a host environment doesn't define the CSS variables (e.g., an embedded UI).
@@ -69,13 +64,7 @@ export function calculateTokenMeterData(
 ): TokenMeterData {
   if (!usage) return { segments: [], totalTokens: 0, totalPercentage: 0 };
 
-  const metadataModel = resolveModelForMetadata(model, providersConfig);
-  const modelStats = getModelStats(metadataModel);
-  const customContextWindow = getModelContextWindowOverride(model, providersConfig);
-  const maxTokens =
-    use1M && supports1MContext(model)
-      ? 1_000_000
-      : (customContextWindow ?? modelStats?.max_input_tokens);
+  const maxTokens = getEffectiveContextLimit(model, use1M, providersConfig) ?? undefined;
 
   // Total tokens used in the request.
   // For Anthropic prompt caching, cacheCreate tokens are reported separately but still
diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts
@@ -2881,22 +2881,26 @@ export class AgentSession {
 
   private getProvidersConfigForCompaction(): ProvidersConfigMap | null {
     try {
-      // Some unit tests provide a minimal Config mock without providers helpers.
+      // Prefer ProviderService's safe config view: it includes env/file API-key source
+      // metadata plus the Codex OAuth presence bit, which context-limit resolution needs
+      // to distinguish GPT-5.5 API-key requests from lower-cap OAuth-routed requests.
+      const maybeAIService = this.aiService as AIService & {
+        getProvidersConfig?: () => ProvidersConfigMap | null;
+      };
+      if (typeof maybeAIService.getProvidersConfig === "function") {
+        return maybeAIService.getProvidersConfig();
+      }
+
+      // Some unit tests provide minimal service mocks; fall back to raw config so custom
+      // provider model context overrides still work in those environments.
       const maybeConfig = this.config as Config & {
         loadProvidersConfig?: () => ProvidersConfigMap | null;
       };
       if (typeof maybeConfig.loadProvidersConfig !== "function") {
         return null;
       }
 
-      const providersConfig = maybeConfig.loadProvidersConfig();
-      if (!providersConfig) {
-        return null;
-      }
-
-      // Compaction limit resolution only reads provider model overrides (models[*].contextWindow*).
-      // Runtime config stores these in providers.jsonc, so the raw config shape is sufficient here.
-      return providersConfig as unknown as ProvidersConfigMap;
+      return maybeConfig.loadProvidersConfig() as unknown as ProvidersConfigMap | null;
     } catch {
       // Best-effort read: if config cannot be loaded, keep null and rely on
       // built-in model limits. This matches prior behavior without crashing.