From 2df70a27c21c8b6a82219148f541167a8d29aee0 Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Thu, 18 Jun 2026 19:39:48 -0700 Subject: [PATCH 1/3] fix(api-proxy): 403 for terminal caps; fix Anthropic input credits Two related token-budget fixes: 1. Terminal hard caps (effective_tokens, max_runs, max_cache_misses, ai_credits) now reject with HTTP 403 instead of 429. LLM SDK clients treat 429 as a transient rate-limit and retry-storm against a cap that never recovers, exhausting the run budget until the step times out. 403 is non-retryable, so the agent stops cleanly. The per-IP rate limiter keeps returning 429 (with Retry-After) since it is recoverable. 2. AI-credit calculation is now provider-aware. Anthropic reports input_tokens as the NON-cached input only (cache_read/cache_creation are additive), whereas OpenAI reports it as the TOTAL with cache as a subset. The old code always subtracted cache from input, over-counting cache and under-counting fresh input for Anthropic. provider is now threaded through applyAiCreditsUsage -> calculateAiCredits. Provider string literals in the new code use centralized constants from the new provider-names module (named to avoid colliding with the providers/ adapter directory). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- containers/api-proxy/Dockerfile | 2 +- .../api-proxy/guards/ai-credits-guard.js | 24 ++++-- .../api-proxy/guards/ai-credits-guard.test.js | 73 ++++++++++++++++--- .../api-proxy/guards/common-guard-checks.js | 15 +++- containers/api-proxy/provider-names.js | 23 ++++++ .../api-proxy/server.token-guards.test.js | 24 +++--- containers/api-proxy/server.websocket.test.js | 16 ++-- containers/api-proxy/token-budget-log.js | 2 +- docs/api-proxy-sidecar.md | 16 ++-- docs/awf-config-spec.md | 12 +-- docs/awf-config.schema.json | 8 +- src/awf-config-schema.json | 8 +- 12 files changed, 155 insertions(+), 68 deletions(-) create mode 100644 containers/api-proxy/provider-names.js diff --git a/containers/api-proxy/Dockerfile b/containers/api-proxy/Dockerfile index 5d77393ee..8be0a2e4c 100644 --- a/containers/api-proxy/Dockerfile +++ b/containers/api-proxy/Dockerfile @@ -29,7 +29,7 @@ COPY server.js logging.js metrics.js rate-limiter.js \ deprecated-header-tracker.js billing-headers.js upstream-response.js \ anthropic-cache.js otel.js otel-exporters.js otel-serialization.js \ token-budget-log.js blocked-request-diagnostics.js \ - provider-env-constants.js ./ + provider-env-constants.js provider-names.js ./ COPY guards/ ./guards/ COPY providers/ ./providers/ COPY transforms/ ./transforms/ diff --git a/containers/api-proxy/guards/ai-credits-guard.js b/containers/api-proxy/guards/ai-credits-guard.js index e1c3758e5..daf67760c 100644 --- a/containers/api-proxy/guards/ai-credits-guard.js +++ b/containers/api-proxy/guards/ai-credits-guard.js @@ -4,6 +4,7 @@ const { logRequest, sanitizeForLog } = require('../logging'); const pricingByModel = require('../ai-credits-pricing'); const { resolveCatalogModel } = require('../models-dev-catalog'); const { parsePositiveNumber } = require('./guard-utils'); +const { PROVIDER_ANTHROPIC } = require('../provider-names'); const TOKENS_PER_MILLION = 1_000_000; const DOLLARS_PER_CREDIT = 0.01; @@ -165,17 +166,24 @@ function checkUnknownModelRejection(model) { }; } -function calculateAiCredits(normalizedUsage, model, state = aiCreditsState) { +function calculateAiCredits(normalizedUsage, model, state = aiCreditsState, provider = undefined) { const pricing = resolveModelPricing(model, state); if (!pricing) return null; - // Both Anthropic and OpenAI report input_tokens as the TOTAL input including - // cache_read and cache_creation tokens. To avoid double-counting, subtract - // cached portions before applying the full input rate. - const totalInput = normalizedUsage.input_tokens || 0; + // input_tokens semantics differ by provider: + // - Anthropic reports input_tokens as the NON-cached input only; + // cache_read_input_tokens and cache_creation_input_tokens are reported + // separately and are ADDITIVE to input_tokens. Subtracting them here would + // over-subtract and undercount the genuinely-fresh input tokens. + // - OpenAI (and OpenAI-compatible providers) report prompt_tokens/input_tokens + // as the TOTAL input, with cached tokens being a SUBSET. Those must be + // subtracted before applying the full input rate to avoid double-counting. + const reportedInput = normalizedUsage.input_tokens || 0; const cacheReadTokens = normalizedUsage.cache_read_tokens || 0; const cacheWriteTokens = normalizedUsage.cache_write_tokens || 0; - const nonCachedInput = Math.max(0, totalInput - cacheReadTokens - cacheWriteTokens); + const nonCachedInput = provider === PROVIDER_ANTHROPIC + ? reportedInput + : Math.max(0, reportedInput - cacheReadTokens - cacheWriteTokens); const inputCredits = (nonCachedInput * pricing.input) / CREDIT_DENOMINATOR; const cachedInputCredits = (cacheReadTokens * pricing.cachedInput) / CREDIT_DENOMINATOR; @@ -194,10 +202,10 @@ function calculateAiCredits(normalizedUsage, model, state = aiCreditsState) { }; } -function applyAiCreditsUsage(normalizedUsage, model) { +function applyAiCreditsUsage(normalizedUsage, model, provider = undefined) { if (!normalizedUsage) return null; const safeModel = model || 'unknown'; - const calc = calculateAiCredits(normalizedUsage, safeModel); + const calc = calculateAiCredits(normalizedUsage, safeModel, aiCreditsState, provider); if (!calc) return null; if (!Object.hasOwn(aiCreditsState.byModel, safeModel)) { diff --git a/containers/api-proxy/guards/ai-credits-guard.test.js b/containers/api-proxy/guards/ai-credits-guard.test.js index 6dc486ddb..379742482 100644 --- a/containers/api-proxy/guards/ai-credits-guard.test.js +++ b/containers/api-proxy/guards/ai-credits-guard.test.js @@ -84,30 +84,79 @@ describe('ai-credits-guard', () => { expect(getAiCreditsReflectState().by_model['claude-sonnet-4-6-20260601'].total).toBeCloseTo(0.5175, 10); }); - it('does not double-count cached tokens (cache_read included in input_tokens)', () => { - // Simulates: 3M total input, 2.9M from cache, 0.1M new input - // This is how Anthropic reports: input_tokens is the total (includes cache hits) + it('does not double-count cached tokens when input_tokens is total-inclusive (OpenAI-style)', () => { + // OpenAI (Chat Completions and Responses API) reports prompt_tokens/input_tokens + // as the TOTAL input, with cached tokens being a subset. When no provider is + // passed, the calculation defaults to this total-inclusive interpretation. + // Simulates: 3M total input, 2.9M from cache, 0.1M new input. const usage = applyAiCreditsUsage({ input_tokens: 3_000_000, cache_read_tokens: 2_900_000, output_tokens: 50_000, - }, 'claude-sonnet-4-6'); + }, 'gpt-5.4'); // nonCached = 3M - 2.9M = 100K - // inputCredits = 100_000 × $3.00 / 10000 = 30 - // cachedInputCredits = 2_900_000 × $0.30 / 10000 = 87 + // inputCredits = 100_000 × $2.50 / 10000 = 25 + // cachedInputCredits = 2_900_000 × $0.25 / 10000 = 72.5 // outputCredits = 50_000 × $15.00 / 10000 = 75 - // total = 192 AIC - expect(usage.inputCreditsThisResponse).toBeCloseTo(30, 5); - expect(usage.cachedInputCreditsThisResponse).toBeCloseTo(87, 5); + // total = 172.5 AIC + expect(usage.inputCreditsThisResponse).toBeCloseTo(25, 5); + expect(usage.cachedInputCreditsThisResponse).toBeCloseTo(72.5, 5); expect(usage.outputCreditsThisResponse).toBeCloseTo(75, 5); - expect(usage.aiCreditsThisResponse).toBeCloseTo(192, 5); + expect(usage.aiCreditsThisResponse).toBeCloseTo(172.5, 5); - // BUG (before fix): would have been 30 + 87 + 75 + (2.9M × $3 / 10000) = 192 + 870 = 1062 - // i.e., cached tokens counted at full price AND cache rate + // BUG (before fix): would have been 25 + 72.5 + 75 + (2.9M × $2.50 / 10000) = 172.5 + 725 + // i.e., cached tokens counted at full price AND cache rate. expect(usage.aiCreditsThisResponse).toBeLessThan(250); }); + it('treats Anthropic input_tokens as non-cached (additive cache), not total-inclusive', () => { + // Anthropic reports input_tokens as the NON-cached input only; + // cache_read_input_tokens and cache_creation_input_tokens are reported + // separately and are ADDITIVE. The fresh input tokens must therefore be + // charged in full and NOT subtracted from cache totals. + const usage = applyAiCreditsUsage({ + input_tokens: 2000, + cache_read_tokens: 10_000, + output_tokens: 100, + }, 'claude-sonnet-4-6', 'anthropic'); + + // nonCached = 2000 (NOT 2000 - 10000 clamped to 0) + // inputCredits = 2000 × $3.00 / 10000 = 0.6 + // cachedInputCredits = 10_000 × $0.30 / 10000 = 0.3 + // outputCredits = 100 × $15.00 / 10000 = 0.15 + // total = 1.05 AIC + expect(usage.inputCreditsThisResponse).toBeCloseTo(0.6, 10); + expect(usage.cachedInputCreditsThisResponse).toBeCloseTo(0.3, 10); + expect(usage.outputCreditsThisResponse).toBeCloseTo(0.15, 10); + expect(usage.aiCreditsThisResponse).toBeCloseTo(1.05, 10); + + // BUG (before fix): nonCached = max(0, 2000 - 10000) = 0, undercounting the + // 2000 fresh input tokens → total would have been 0.45 instead of 1.05. + expect(usage.aiCreditsThisResponse).toBeGreaterThan(1.0); + }); + + it('charges Anthropic fresh input even when cache totals exceed input_tokens', () => { + // Reproduces the observed smoke-claude record: tiny fresh input alongside + // large cache read/write. Previously nonCached clamped to 0, dropping the + // fresh input charge entirely. + const usage = applyAiCreditsUsage({ + input_tokens: 5, + cache_read_tokens: 38_673, + cache_write_tokens: 21_060, + output_tokens: 205, + }, 'claude-opus-4-7', 'anthropic'); + + // nonCached = 5 (Anthropic: additive, not subtracted) + // inputCredits = 5 × $5.00 / 10000 = 0.0025 + // cachedInput = 38_673 × $0.50 / 10000 = 1.93365 + // cacheWrite = 21_060 × $6.25 / 10000 = 13.1625 + // outputCredits = 205 × $25.00 / 10000 = 0.5125 + // total = 15.6111 + expect(usage.inputCreditsThisResponse).toBeCloseTo(0.0025, 10); + expect(usage.aiCreditsThisResponse).toBeCloseTo(15.6111, 4); + }); + it('warns and skips usage for unknown models', () => { const { lines } = collectLogOutput(); const usage = applyAiCreditsUsage({ input_tokens: 100 }, 'unknown-model'); diff --git a/containers/api-proxy/guards/common-guard-checks.js b/containers/api-proxy/guards/common-guard-checks.js index d3d92626d..b989ad6b1 100644 --- a/containers/api-proxy/guards/common-guard-checks.js +++ b/containers/api-proxy/guards/common-guard-checks.js @@ -65,7 +65,11 @@ function buildCommonGuardChecks(deps, model) { { block: getEffectiveTokenBlockState(), isBlocked: block => block && block.maxExceeded, - statusCode: 429, + // Terminal hard cap: returning 429 would make LLM SDK clients treat this + // as a transient rate-limit and retry-storm against a limit that never + // recovers, burning the budget until the step times out. 403 is + // non-retryable, so the agent stops cleanly. + statusCode: 403, eventName: 'effective_tokens_limit_exceeded', buildError: buildEffectiveTokenLimitError, buildLogFields: block => ({ @@ -76,7 +80,8 @@ function buildCommonGuardChecks(deps, model) { { block: getMaxRunsBlockState(), isBlocked: block => block && block.maxExceeded, - statusCode: 429, + // Terminal hard cap — non-retryable (see effective-tokens guard above). + statusCode: 403, eventName: 'max_runs_exceeded', buildError: buildMaxRunsExceededError, buildLogFields: block => ({ @@ -87,7 +92,8 @@ function buildCommonGuardChecks(deps, model) { { block: getMaxCacheMissesBlockState(), isBlocked: block => block && block.maxExceeded, - statusCode: 429, + // Terminal hard cap — non-retryable (see effective-tokens guard above). + statusCode: 403, eventName: 'max_cache_misses_exceeded', buildError: buildMaxCacheMissesExceededError, buildLogFields: block => ({ @@ -109,7 +115,8 @@ function buildCommonGuardChecks(deps, model) { { block: getAiCreditsBlockState(), isBlocked: block => block && block.maxExceeded, - statusCode: 429, + // Terminal hard cap — non-retryable (see effective-tokens guard above). + statusCode: 403, eventName: 'ai_credits_limit_exceeded', buildError: buildAiCreditsLimitError, buildLogFields: block => ({ diff --git a/containers/api-proxy/provider-names.js b/containers/api-proxy/provider-names.js new file mode 100644 index 000000000..ddc3e967d --- /dev/null +++ b/containers/api-proxy/provider-names.js @@ -0,0 +1,23 @@ +'use strict'; + +/** + * Centralized provider name constants. + * + * Use these instead of bare string literals when comparing provider names so + * that provider checks are spelling-safe and easy to find/refactor. + * + * NB: this module is intentionally named `provider-names` rather than + * `providers` to avoid colliding with the `providers/` directory (the upstream + * adapter registry resolved via `require('./providers')`). + */ +const PROVIDER_ANTHROPIC = 'anthropic'; +const PROVIDER_OPENAI = 'openai'; +const PROVIDER_COPILOT = 'copilot'; +const PROVIDER_GEMINI = 'gemini'; + +module.exports = { + PROVIDER_ANTHROPIC, + PROVIDER_OPENAI, + PROVIDER_COPILOT, + PROVIDER_GEMINI, +}; diff --git a/containers/api-proxy/server.token-guards.test.js b/containers/api-proxy/server.token-guards.test.js index 383487d12..cde83dc6f 100644 --- a/containers/api-proxy/server.token-guards.test.js +++ b/containers/api-proxy/server.token-guards.test.js @@ -1,6 +1,6 @@ /** - * Tests for proxyRequest guards: effective token limit (429) and - * max-runs limit (429). + * Tests for proxyRequest guards: effective token limit (403) and + * max-runs limit (403). * * Extracted from server.proxy.test.js. */ @@ -60,7 +60,7 @@ describe('proxyRequest effective token guard', () => { jest.restoreAllMocks(); }); - it('returns 429 with structured payload when effective token limit is reached', async () => { + it('returns 403 with structured payload when effective token limit is reached', async () => { const cycle = createMockUpstreamCycle(https); const req1 = makeReq(); @@ -81,7 +81,7 @@ describe('proxyRequest effective token guard', () => { await flushPromises(); expect(cycle.spy).toHaveBeenCalledTimes(1); - expect(res2.writeHead).toHaveBeenCalledWith(429, expect.objectContaining({ + expect(res2.writeHead).toHaveBeenCalledWith(403, expect.objectContaining({ 'Content-Type': 'application/json', })); const payload = JSON.parse(res2.end.mock.calls[0][0]); @@ -148,7 +148,7 @@ describe('proxyRequest max-runs guard', () => { jest.restoreAllMocks(); }); - it('returns 429 after max consecutive cache misses with non-zero input tokens', async () => { + it('returns 403 after max consecutive cache misses with non-zero input tokens', async () => { const cycle = createMockUpstreamCycle(https); const req1 = makeReq(); @@ -178,7 +178,7 @@ describe('proxyRequest max-runs guard', () => { await flushPromises(); expect(cycle.spy).toHaveBeenCalledTimes(2); - expect(res3.writeHead).toHaveBeenCalledWith(429, expect.objectContaining({ + expect(res3.writeHead).toHaveBeenCalledWith(403, expect.objectContaining({ 'Content-Type': 'application/json', })); const payload = JSON.parse(res3.end.mock.calls[0][0]); @@ -220,7 +220,7 @@ describe('proxyRequest max-runs guard', () => { await flushPromises(); expect(cycle.spy).toHaveBeenCalledTimes(3); - expect(res3.writeHead).not.toHaveBeenCalledWith(429, expect.anything()); + expect(res3.writeHead).not.toHaveBeenCalledWith(403, expect.anything()); }); }); @@ -230,7 +230,7 @@ describe('proxyRequest max-runs guard', () => { jest.restoreAllMocks(); }); - it('returns 429 with structured payload when max runs limit is exceeded', async () => { + it('returns 403 with structured payload when max runs limit is exceeded', async () => { const cycle = createMockUpstreamCycle(https); // First request completes successfully — consumes the single allowed run @@ -250,7 +250,7 @@ describe('proxyRequest max-runs guard', () => { await flushPromises(); expect(cycle.spy).toHaveBeenCalledTimes(1); - expect(res2.writeHead).toHaveBeenCalledWith(429, expect.objectContaining({ + expect(res2.writeHead).toHaveBeenCalledWith(403, expect.objectContaining({ 'Content-Type': 'application/json', })); const payload = JSON.parse(res2.end.mock.calls[0][0]); @@ -273,7 +273,7 @@ describe('proxyRequest max-runs guard', () => { await flushPromises(); expect(httpsRequestSpy).toHaveBeenCalledTimes(1); - expect(res.writeHead).not.toHaveBeenCalledWith(429, expect.anything()); + expect(res.writeHead).not.toHaveBeenCalledWith(403, expect.anything()); }); }); @@ -296,7 +296,7 @@ describe('proxyRequest max-ai-credits guard', () => { jest.restoreAllMocks(); }); - it('returns 429 with structured payload when ai credits limit is reached', async () => { + it('returns 403 with structured payload when ai credits limit is reached', async () => { const cycle = createMockUpstreamCycle(https); const req1 = makeReq(); @@ -317,7 +317,7 @@ describe('proxyRequest max-ai-credits guard', () => { await flushPromises(); expect(cycle.spy).toHaveBeenCalledTimes(1); - expect(res2.writeHead).toHaveBeenCalledWith(429, expect.objectContaining({ + expect(res2.writeHead).toHaveBeenCalledWith(403, expect.objectContaining({ 'Content-Type': 'application/json', })); const payload = JSON.parse(res2.end.mock.calls[0][0]); diff --git a/containers/api-proxy/server.websocket.test.js b/containers/api-proxy/server.websocket.test.js index a42c6e44f..7dc86b242 100644 --- a/containers/api-proxy/server.websocket.test.js +++ b/containers/api-proxy/server.websocket.test.js @@ -323,38 +323,38 @@ describe('proxyWebSocket security guards', () => { jest.restoreAllMocks(); }); - it('blocks with 429 when max-runs limit is exceeded', () => { + it('blocks with 403 when max-runs limit is exceeded', () => { process.env.AWF_MAX_RUNS = '1'; applyMaxRunsInvocation(); // consume the single allowed run const socket = makeMockSocket(); wsProxy(makeUpgradeReq(), socket, Buffer.alloc(0), 'api.openai.com', {}, 'openai'); - expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('HTTP/1.1 429 Too Many Requests')); + expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('HTTP/1.1 403 Forbidden')); expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('"max_runs_exceeded"')); expect(socket.destroy).toHaveBeenCalled(); }); - it('blocks with 429 when effective-token limit is exceeded', () => { + it('blocks with 403 when effective-token limit is exceeded', () => { process.env.AWF_MAX_EFFECTIVE_TOKENS = '1'; applyEffectiveTokenUsage({ output_tokens: 5 }, 'gpt-4o'); // exceeds cap of 1 const socket = makeMockSocket(); wsProxy(makeUpgradeReq(), socket, Buffer.alloc(0), 'api.openai.com', {}, 'openai'); - expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('HTTP/1.1 429 Too Many Requests')); + expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('HTTP/1.1 403 Forbidden')); expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('"effective_tokens_limit_exceeded"')); expect(socket.destroy).toHaveBeenCalled(); }); - it('blocks with 429 when max-cache-misses limit is exceeded', () => { + it('blocks with 403 when max-cache-misses limit is exceeded', () => { process.env.AWF_MAX_CACHE_MISSES = '1'; applyMaxCacheMissesUsage({ input_tokens: 100, cache_read_tokens: 0 }); const socket = makeMockSocket(); wsProxy(makeUpgradeReq(), socket, Buffer.alloc(0), 'api.openai.com', {}, 'openai'); - expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('HTTP/1.1 429 Too Many Requests')); + expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('HTTP/1.1 403 Forbidden')); expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('"max_cache_misses_exceeded"')); expect(socket.destroy).toHaveBeenCalled(); }); @@ -371,14 +371,14 @@ describe('proxyWebSocket security guards', () => { expect(socket.destroy).toHaveBeenCalled(); }); - it('blocks with 429 when ai-credits limit is exceeded', () => { + it('blocks with 403 when ai-credits limit is exceeded', () => { process.env.AWF_MAX_AI_CREDITS = '0.000001'; // tiny cap — any real usage will exceed it applyAiCreditsUsage({ input_tokens: 1_000_000, output_tokens: 1_000_000 }, 'gpt-4o'); const socket = makeMockSocket(); wsProxy(makeUpgradeReq(), socket, Buffer.alloc(0), 'api.openai.com', {}, 'openai'); - expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('HTTP/1.1 429 Too Many Requests')); + expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('HTTP/1.1 403 Forbidden')); expect(socket.write).toHaveBeenCalledWith(expect.stringContaining('"ai_credits_limit_exceeded"')); expect(socket.destroy).toHaveBeenCalled(); }); diff --git a/containers/api-proxy/token-budget-log.js b/containers/api-proxy/token-budget-log.js index 06e090893..74d9a1849 100644 --- a/containers/api-proxy/token-budget-log.js +++ b/containers/api-proxy/token-budget-log.js @@ -19,7 +19,7 @@ const { applyMaxCacheMissesUsage } = require('./guards/max-cache-misses-guard'); */ function computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model) { const effectiveTokenUsage = applyEffectiveTokenUsage(normalizedUsage, model); - const aiCreditsUsage = applyAiCreditsUsage(normalizedUsage, model); + const aiCreditsUsage = applyAiCreditsUsage(normalizedUsage, model, provider); applyMaxCacheMissesUsage(normalizedUsage); if (aiCreditsUsage) { logRequest('info', 'token_budget_usage', { diff --git a/docs/api-proxy-sidecar.md b/docs/api-proxy-sidecar.md index 320fa2061..6e17770ba 100644 --- a/docs/api-proxy-sidecar.md +++ b/docs/api-proxy-sidecar.md @@ -877,7 +877,7 @@ After each successful upstream response, the proxy accumulates the effective tok - **Under budget**: Request is forwarded normally. - **Budget reached or exceeded**: Request is rejected immediately with: - - **HTTP `429 Too Many Requests`** + - **HTTP `403 Forbidden`** - **Error body**: ```json @@ -891,10 +891,10 @@ After each successful upstream response, the proxy accumulates the effective tok } ``` -WebSocket upgrade requests are also rejected with `429` when the budget is reached or exceeded. +WebSocket upgrade requests are also rejected with `403` when the budget is reached or exceeded. :::caution -Once the budget is reached or exceeded, **all subsequent requests in the run are rejected**. The budget is not recoverable — there is no way to "free up" tokens within a single run. +Once the budget is reached or exceeded, **all subsequent requests in the run are rejected**. The budget is not recoverable — there is no way to "free up" tokens within a single run. The rejection uses **HTTP `403`** (not `429`) precisely because the limit is terminal: a `429` would invite LLM SDK clients to retry with backoff against a cap that never recovers, burning the remaining run budget until the step times out. ::: ### Threshold tracking and token steering @@ -971,10 +971,10 @@ The response includes: ### Detecting budget exhaustion -Agents and orchestrators should detect the `429` response and the `effective_tokens_limit_exceeded` error type. The error body is structured JSON and can be parsed programmatically: +Agents and orchestrators should detect the `403` response and the `effective_tokens_limit_exceeded` error type. The error body is structured JSON and can be parsed programmatically: ```javascript -if (response.status === 429) { +if (response.status === 403) { const body = await response.json(); if (body.error?.type === 'effective_tokens_limit_exceeded') { // Budget exhausted — stop making API calls @@ -1005,7 +1005,7 @@ Before forwarding each request to the upstream provider, the proxy checks the in - **Under limit**: Request is forwarded normally. - **Limit reached or exceeded**: Request is rejected immediately with: - - **HTTP `429 Too Many Requests`** + - **HTTP `403 Forbidden`** - **Error body**: ```json @@ -1019,7 +1019,7 @@ Before forwarding each request to the upstream provider, the proxy checks the in } ``` -WebSocket upgrade requests are also rejected with `429` when the limit is reached. +WebSocket upgrade requests are also rejected with `403` when the limit is reached. :::caution Once the limit is reached, **all subsequent requests in the run are rejected**. The counter is not recoverable within a single run. @@ -1045,7 +1045,7 @@ When `maxTurns` is not configured, `enabled` is `false` and `max_runs`/`remainin ### Detecting the limit ```javascript -if (response.status === 429) { +if (response.status === 403) { const body = await response.json(); if (body.error?.type === 'max_runs_exceeded') { console.log(`Run limit exceeded: ${body.error.invocation_count} / ${body.error.max_runs}`); diff --git a/docs/awf-config-spec.md b/docs/awf-config-spec.md index af67fbb53..bf39cf622 100644 --- a/docs/awf-config-spec.md +++ b/docs/awf-config-spec.md @@ -630,7 +630,7 @@ The API proxy MUST enforce the budget as follows: 3. **Rejection**: When the budget is reached or exceeded, the proxy MUST reject the request with: - - **HTTP status**: `429 Too Many Requests` + - **HTTP status**: `403 Forbidden` - **Content-Type**: `application/json` - **Response body**: ```json @@ -645,7 +645,7 @@ The API proxy MUST enforce the budget as follows: ``` 4. **WebSocket rejection**: For WebSocket upgrade requests, the proxy MUST - reject with `HTTP/1.1 429 Too Many Requests` and include the same JSON + reject with `HTTP/1.1 403 Forbidden` and include the same JSON error body before destroying the socket. 5. **Finality**: Once the budget is reached or exceeded, all subsequent requests in @@ -730,12 +730,12 @@ container. When configured, the proxy MUST enforce this budget in addition to any configured `maxEffectiveTokens` budget. Once cumulative AI credits reach or -exceed `maxAiCredits`, subsequent requests MUST be rejected with HTTP `429` +exceed `maxAiCredits`, subsequent requests MUST be rejected with HTTP `403` and error type `ai_credits_limit_exceeded`. Regardless of `maxAiCredits` configuration, AWF also enforces a non-overridable hard cap of **10,000 AI credits**. When cumulative AI credits reach this hard -cap, subsequent requests MUST be rejected with HTTP `429` and error type +cap, subsequent requests MUST be rejected with HTTP `403` and error type `ai_credits_limit_exceeded`, and the error/log payload MUST include `hard_cap: true`. @@ -837,7 +837,7 @@ The API proxy MUST enforce the max-runs limit as follows: 2. **Rejection**: When the limit is reached or exceeded, the proxy MUST reject the request with: - - **HTTP status**: `429 Too Many Requests` + - **HTTP status**: `403 Forbidden` - **Content-Type**: `application/json` - **Response body**: ```json @@ -852,7 +852,7 @@ The API proxy MUST enforce the max-runs limit as follows: ``` 3. **WebSocket rejection**: For WebSocket upgrade requests, the proxy MUST - reject with `HTTP/1.1 429 Too Many Requests` and include the same JSON + reject with `HTTP/1.1 403 Forbidden` and include the same JSON error body before destroying the socket. 4. **Finality**: Once the limit is reached, all subsequent requests in the diff --git a/docs/awf-config.schema.json b/docs/awf-config.schema.json index 5a05365ae..74a713cc4 100644 --- a/docs/awf-config.schema.json +++ b/docs/awf-config.schema.json @@ -70,12 +70,12 @@ "maxEffectiveTokens": { "type": "integer", "minimum": 1, - "description": "Maximum cumulative effective tokens allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 429 and error type 'effective_tokens_limit_exceeded'. Tokens are weighted: input ×1, cache-read ×0.1, output ×4, reasoning ×4. See spec §10." + "description": "Maximum cumulative effective tokens allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 403 and error type 'effective_tokens_limit_exceeded'. Tokens are weighted: input ×1, cache-read ×0.1, output ×4, reasoning ×4. See spec §10." }, "maxAiCredits": { "type": "number", "exclusiveMinimum": 0, - "description": "Maximum cumulative AI credits allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 429 and error type 'ai_credits_limit_exceeded'. AWF also enforces a non-overridable hard cap of 10,000 AI credits; values above 10,000 are effectively clamped." + "description": "Maximum cumulative AI credits allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 403 and error type 'ai_credits_limit_exceeded'. AWF also enforces a non-overridable hard cap of 10,000 AI credits; values above 10,000 are effectively clamped." }, "defaultAiCreditsPricing": { "type": "object", @@ -132,7 +132,7 @@ "maxTurns": { "type": "integer", "minimum": 1, - "description": "Maximum number of LLM invocations allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 429 and error type 'max_runs_exceeded'. See spec §11." + "description": "Maximum number of LLM invocations allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 403 and error type 'max_runs_exceeded'. See spec §11." }, "maxRuns": { "type": "integer", @@ -147,7 +147,7 @@ "maxCacheMisses": { "type": "integer", "minimum": 1, - "description": "Maximum number of consecutive cache misses allowed per run. A miss is counted only for successful responses with non-zero input_tokens and zero cache_read_tokens. Responses with cache_read_tokens > 0 reset the streak. When reached, the API proxy rejects subsequent requests with HTTP 429 and error type 'max_cache_misses_exceeded'." + "description": "Maximum number of consecutive cache misses allowed per run. A miss is counted only for successful responses with non-zero input_tokens and zero cache_read_tokens. Responses with cache_read_tokens > 0 reset the streak. When reached, the API proxy rejects subsequent requests with HTTP 403 and error type 'max_cache_misses_exceeded'." }, "requestedModel": { "type": "string", diff --git a/src/awf-config-schema.json b/src/awf-config-schema.json index 5a05365ae..74a713cc4 100644 --- a/src/awf-config-schema.json +++ b/src/awf-config-schema.json @@ -70,12 +70,12 @@ "maxEffectiveTokens": { "type": "integer", "minimum": 1, - "description": "Maximum cumulative effective tokens allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 429 and error type 'effective_tokens_limit_exceeded'. Tokens are weighted: input ×1, cache-read ×0.1, output ×4, reasoning ×4. See spec §10." + "description": "Maximum cumulative effective tokens allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 403 and error type 'effective_tokens_limit_exceeded'. Tokens are weighted: input ×1, cache-read ×0.1, output ×4, reasoning ×4. See spec §10." }, "maxAiCredits": { "type": "number", "exclusiveMinimum": 0, - "description": "Maximum cumulative AI credits allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 429 and error type 'ai_credits_limit_exceeded'. AWF also enforces a non-overridable hard cap of 10,000 AI credits; values above 10,000 are effectively clamped." + "description": "Maximum cumulative AI credits allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 403 and error type 'ai_credits_limit_exceeded'. AWF also enforces a non-overridable hard cap of 10,000 AI credits; values above 10,000 are effectively clamped." }, "defaultAiCreditsPricing": { "type": "object", @@ -132,7 +132,7 @@ "maxTurns": { "type": "integer", "minimum": 1, - "description": "Maximum number of LLM invocations allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 429 and error type 'max_runs_exceeded'. See spec §11." + "description": "Maximum number of LLM invocations allowed for a run. When reached, the API proxy rejects subsequent requests with HTTP 403 and error type 'max_runs_exceeded'. See spec §11." }, "maxRuns": { "type": "integer", @@ -147,7 +147,7 @@ "maxCacheMisses": { "type": "integer", "minimum": 1, - "description": "Maximum number of consecutive cache misses allowed per run. A miss is counted only for successful responses with non-zero input_tokens and zero cache_read_tokens. Responses with cache_read_tokens > 0 reset the streak. When reached, the API proxy rejects subsequent requests with HTTP 429 and error type 'max_cache_misses_exceeded'." + "description": "Maximum number of consecutive cache misses allowed per run. A miss is counted only for successful responses with non-zero input_tokens and zero cache_read_tokens. Responses with cache_read_tokens > 0 reset the streak. When reached, the API proxy rejects subsequent requests with HTTP 403 and error type 'max_cache_misses_exceeded'." }, "requestedModel": { "type": "string", From 8b0ee058f22d2a9ae3a1bdac84589a50d85ed002 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 02:54:09 +0000 Subject: [PATCH 2/3] fix(api-proxy): handle copilot fresh input in ai credits --- containers/api-proxy/guards/ai-credits-guard.js | 6 +++--- .../api-proxy/guards/ai-credits-guard.test.js | 15 +++++++++++++++ containers/api-proxy/server.token-guards.test.js | 5 +++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/containers/api-proxy/guards/ai-credits-guard.js b/containers/api-proxy/guards/ai-credits-guard.js index daf67760c..10c28a293 100644 --- a/containers/api-proxy/guards/ai-credits-guard.js +++ b/containers/api-proxy/guards/ai-credits-guard.js @@ -4,7 +4,7 @@ const { logRequest, sanitizeForLog } = require('../logging'); const pricingByModel = require('../ai-credits-pricing'); const { resolveCatalogModel } = require('../models-dev-catalog'); const { parsePositiveNumber } = require('./guard-utils'); -const { PROVIDER_ANTHROPIC } = require('../provider-names'); +const { PROVIDER_ANTHROPIC, PROVIDER_COPILOT } = require('../provider-names'); const TOKENS_PER_MILLION = 1_000_000; const DOLLARS_PER_CREDIT = 0.01; @@ -171,7 +171,7 @@ function calculateAiCredits(normalizedUsage, model, state = aiCreditsState, prov if (!pricing) return null; // input_tokens semantics differ by provider: - // - Anthropic reports input_tokens as the NON-cached input only; + // - Anthropic and Copilot report input_tokens as the NON-cached input only; // cache_read_input_tokens and cache_creation_input_tokens are reported // separately and are ADDITIVE to input_tokens. Subtracting them here would // over-subtract and undercount the genuinely-fresh input tokens. @@ -181,7 +181,7 @@ function calculateAiCredits(normalizedUsage, model, state = aiCreditsState, prov const reportedInput = normalizedUsage.input_tokens || 0; const cacheReadTokens = normalizedUsage.cache_read_tokens || 0; const cacheWriteTokens = normalizedUsage.cache_write_tokens || 0; - const nonCachedInput = provider === PROVIDER_ANTHROPIC + const nonCachedInput = provider === PROVIDER_ANTHROPIC || provider === PROVIDER_COPILOT ? reportedInput : Math.max(0, reportedInput - cacheReadTokens - cacheWriteTokens); diff --git a/containers/api-proxy/guards/ai-credits-guard.test.js b/containers/api-proxy/guards/ai-credits-guard.test.js index 379742482..4b50bd1dd 100644 --- a/containers/api-proxy/guards/ai-credits-guard.test.js +++ b/containers/api-proxy/guards/ai-credits-guard.test.js @@ -157,6 +157,21 @@ describe('ai-credits-guard', () => { expect(usage.aiCreditsThisResponse).toBeCloseTo(15.6111, 4); }); + it('treats Copilot input_tokens as non-cached when provider is copilot', () => { + const usage = applyAiCreditsUsage({ + input_tokens: 100, + cache_read_tokens: 10_000, + output_tokens: 0, + }, 'gpt-5.4', 'copilot'); + + // inputCredits = 100 × $2.50 / 10000 = 0.025 + // cachedInputCredits = 10_000 × $0.25 / 10000 = 0.25 + // total = 0.275 + expect(usage.inputCreditsThisResponse).toBeCloseTo(0.025, 10); + expect(usage.cachedInputCreditsThisResponse).toBeCloseTo(0.25, 10); + expect(usage.aiCreditsThisResponse).toBeCloseTo(0.275, 10); + }); + it('warns and skips usage for unknown models', () => { const { lines } = collectLogOutput(); const usage = applyAiCreditsUsage({ input_tokens: 100 }, 'unknown-model'); diff --git a/containers/api-proxy/server.token-guards.test.js b/containers/api-proxy/server.token-guards.test.js index cde83dc6f..cc0209f2f 100644 --- a/containers/api-proxy/server.token-guards.test.js +++ b/containers/api-proxy/server.token-guards.test.js @@ -1,6 +1,7 @@ /** - * Tests for proxyRequest guards: effective token limit (403) and - * max-runs limit (403). + * Tests for proxyRequest token and permission guard behavior, including + * effective-token, max-runs, max-cache-misses, AI-credits, and + * permission-denied enforcement paths. * * Extracted from server.proxy.test.js. */ From 3b46ba93e77a11473c5b6a2076f682465f20702e Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Thu, 18 Jun 2026 20:12:24 -0700 Subject: [PATCH 3/3] test(smoke-claude): raise max-turns/maxRuns from 2 to 5 The maxRuns:2 cap was too tight for the smoke prompt: the agent routinely burns its 2 invocations on a planning turn plus a parallel capability-probe before emitting its safe output, then hits the cap and fails. Bump max-turns (which drives apiProxy.maxRuns) to 5 so the smoke test has headroom to complete. Recompiled the lock file and updated the workflow test assertions accordingly. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/smoke-claude.lock.yml | 8 ++++---- .github/workflows/smoke-claude.md | 2 +- scripts/ci/smoke-claude-workflow.test.ts | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml index 497086848..77736fdfb 100644 --- a/.github/workflows/smoke-claude.lock.yml +++ b/.github/workflows/smoke-claude.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"1931d05a82aa65b2b1d5af50c9dcde1453044c61ac1c0718031eb2eca5c6b046","body_hash":"61fdfb929477edfef0935407ef5e3016122fdda0a2bc1fb9e82755c7dbbeb886","compiler_version":"v0.79.6","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"56c81b9f5ed7b54bd55d1b6a753f29387b124ed89f38bff44c19925629b1468f","body_hash":"61fdfb929477edfef0935407ef5e3016122fdda0a2bc1fb9e82755c7dbbeb886","compiler_version":"v0.79.6","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}} # gh-aw-manifest: {"version":1,"secrets":["ANTHROPIC_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"}]} # ___ _ _ # / _ \ | | (_) @@ -804,7 +804,7 @@ jobs: touch /tmp/gh-aw/agent-step-summary.md (umask 177 && touch /tmp/gh-aw/agent-stdio.log) GH_AW_MAX_AI_CREDITS="${{ vars.GH_AW_DEFAULT_MAX_AI_CREDITS || '1000' }}" - printf '%s\n' "{\"\$schema\":\"https://github.com/github/gh-aw-firewall/releases/download/v0.27.2/awf-config.schema.json\",\"network\":{\"allowDomains\":[\"*.githubusercontent.com\",\"anthropic.com\",\"api.anthropic.com\",\"api.github.com\",\"api.snapcraft.io\",\"archive.ubuntu.com\",\"azure.archive.ubuntu.com\",\"cdn.playwright.dev\",\"codeload.github.com\",\"crl.geotrust.com\",\"crl.globalsign.com\",\"crl.identrust.com\",\"crl.sectigo.com\",\"crl.thawte.com\",\"crl.usertrust.com\",\"crl.verisign.com\",\"crl3.digicert.com\",\"crl4.digicert.com\",\"crls.ssl.com\",\"files.pythonhosted.org\",\"ghcr.io\",\"github-cloud.githubusercontent.com\",\"github-cloud.s3.amazonaws.com\",\"github.com\",\"host.docker.internal\",\"json-schema.org\",\"json.schemastore.org\",\"keyserver.ubuntu.com\",\"lfs.github.com\",\"objects.githubusercontent.com\",\"ocsp.digicert.com\",\"ocsp.geotrust.com\",\"ocsp.globalsign.com\",\"ocsp.identrust.com\",\"ocsp.sectigo.com\",\"ocsp.ssl.com\",\"ocsp.thawte.com\",\"ocsp.usertrust.com\",\"ocsp.verisign.com\",\"packagecloud.io\",\"packages.cloud.google.com\",\"packages.microsoft.com\",\"playwright.download.prss.microsoft.com\",\"ppa.launchpad.net\",\"pypi.org\",\"raw.githubusercontent.com\",\"registry.npmjs.org\",\"s.symcb.com\",\"s.symcd.com\",\"security.ubuntu.com\",\"sentry.io\",\"statsig.anthropic.com\",\"ts-crl.ws.symantec.com\",\"ts-ocsp.ws.symantec.com\",\"www.googleapis.com\"]},\"apiProxy\":{\"enabled\":true,\"enableTokenSteering\":true,\"maxRuns\":2,\"maxAiCredits\":${GH_AW_MAX_AI_CREDITS},\"models\":{\"agent\":[\"sonnet-6x\",\"gpt-5.4\",\"gpt-5.3\",\"gemini-pro\",\"any\"],\"antigravity\":[\"copilot/antigravity*\",\"google/antigravity*\",\"gemini/antigravity*\"],\"any\":[\"copilot/*\",\"anthropic/*\",\"openai/*\",\"google/*\",\"gemini/*\"],\"claude\":[\"agent\"],\"codex\":[\"agent\"],\"coding\":[\"copilot/gpt-5*codex*\",\"openai/gpt-5*codex*\",\"gpt-5-codex\"],\"computer-use\":[\"copilot/*computer-use*\",\"google/*computer-use*\",\"gemini/*computer-use*\",\"openai/*computer-use*\"],\"copilot\":[\"agent\"],\"deep-research\":[\"copilot/deep-research*\",\"copilot/o3-deep-research*\",\"copilot/o4-mini-deep-research*\",\"google/deep-research*\",\"gemini/deep-research*\",\"openai/o3-deep-research*\",\"openai/o4-mini-deep-research*\"],\"gemini\":[\"agent\"],\"gemini-3-flash\":[\"copilot/gemini-3*flash*\",\"google/gemini-3*flash*\",\"gemini/gemini-3*flash*\"],\"gemini-3-pro\":[\"copilot/gemini-3*pro*\",\"google/gemini-3*pro*\",\"google/nano-banana*\",\"gemini/gemini-3*pro*\"],\"gemini-3.1-flash\":[\"copilot/gemini-3.1*flash*\",\"google/gemini-3.1*flash*\",\"gemini/gemini-3.1*flash*\"],\"gemini-3.1-pro\":[\"copilot/gemini-3.1*pro*\",\"google/gemini-3.1*pro*\",\"gemini/gemini-3.1*pro*\"],\"gemini-3.5-flash\":[\"copilot/gemini-3.5*flash*\",\"google/gemini-3.5*flash*\",\"gemini/gemini-3.5*flash*\"],\"gemini-flash\":[\"copilot/gemini-*flash*\",\"google/gemini-*flash*\",\"gemini/gemini-*flash*\"],\"gemini-flash-lite\":[\"copilot/gemini-*flash*lite*\",\"google/gemini-*flash*lite*\",\"gemini/gemini-*flash*lite*\"],\"gemini-pro\":[\"copilot/gemini-*pro*\",\"google/gemini-*pro*\",\"gemini/gemini-*pro*\"],\"gemma\":[\"copilot/gemma*\",\"google/gemma*\",\"gemini/gemma*\"],\"gpt-5\":[\"copilot/gpt-5*\",\"openai/gpt-5*\"],\"gpt-5-codex\":[\"copilot/gpt-5*codex*\",\"openai/gpt-5*codex*\"],\"gpt-5-mini\":[\"copilot/gpt-5*mini*\",\"openai/gpt-5*mini*\"],\"gpt-5-nano\":[\"copilot/gpt-5*nano*\",\"openai/gpt-5*nano*\"],\"gpt-5-pro\":[\"copilot/gpt-5*pro*\",\"openai/gpt-5*pro*\"],\"gpt-5.2\":[\"copilot/gpt-5.2*\",\"openai/gpt-5.2*\"],\"gpt-5.3\":[\"copilot/gpt-5.3*\",\"openai/gpt-5.3*\"],\"gpt-5.4\":[\"copilot/gpt-5.4*\",\"openai/gpt-5.4*\"],\"gpt-5.5\":[\"copilot/gpt-5.5*\",\"openai/gpt-5.5*\"],\"haiku\":[\"copilot/*haiku*\",\"anthropic/*haiku*\"],\"large\":[\"sonnet\",\"gpt-5-pro\",\"gpt-5\",\"gemini-pro\"],\"mai-code\":[\"copilot/MAI-Code*\",\"copilot/mai-code*\",\"openai/MAI-Code*\"],\"mini\":[\"haiku\",\"gpt-5-mini\",\"gpt-5-nano\",\"gemini-flash-lite\"],\"nano-banana\":[\"copilot/nano-banana*\",\"google/nano-banana*\",\"gemini/nano-banana*\"],\"opus\":[\"copilot/*opus*\",\"anthropic/*opus*\"],\"opusplan\":[\"opus?effort=high\"],\"reasoning\":[\"copilot/o1*\",\"copilot/o3*\",\"copilot/o4*\",\"openai/o1*\",\"openai/o3*\",\"openai/o4*\"],\"robotics\":[\"copilot/*robotics*\",\"google/*robotics*\",\"gemini/*robotics*\"],\"small\":[\"mini\"],\"small-agent\":[\"haiku\",\"gpt-5-mini\",\"gemini-flash\"],\"sonnet\":[\"copilot/*sonnet*\",\"anthropic/*sonnet*\"],\"sonnet-6x\":[\"copilot/*sonnet-4.5*\",\"copilot/*sonnet-4.6*\",\"copilot/*sonnet-4-5-*\",\"anthropic/*sonnet-4-5-*\",\"copilot/*sonnet-4-6*\",\"anthropic/*sonnet-4-6*\"],\"summarization\":[\"haiku\",\"gpt-5-mini\",\"gemini-flash-lite\",\"mini\"],\"vision\":[\"copilot/gemini-*image*\",\"gemini/gemini-*image*\",\"copilot/gemini-*flash*\",\"gemini/gemini-*flash*\"]}},\"container\":{\"imageTag\":\"0.27.2,squid=sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591,agent=sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6,api-proxy=sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4,cli-proxy=sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0\"}}" > "${RUNNER_TEMP}/gh-aw/awf-config.json" + printf '%s\n' "{\"\$schema\":\"https://github.com/github/gh-aw-firewall/releases/download/v0.27.2/awf-config.schema.json\",\"network\":{\"allowDomains\":[\"*.githubusercontent.com\",\"anthropic.com\",\"api.anthropic.com\",\"api.github.com\",\"api.snapcraft.io\",\"archive.ubuntu.com\",\"azure.archive.ubuntu.com\",\"cdn.playwright.dev\",\"codeload.github.com\",\"crl.geotrust.com\",\"crl.globalsign.com\",\"crl.identrust.com\",\"crl.sectigo.com\",\"crl.thawte.com\",\"crl.usertrust.com\",\"crl.verisign.com\",\"crl3.digicert.com\",\"crl4.digicert.com\",\"crls.ssl.com\",\"files.pythonhosted.org\",\"ghcr.io\",\"github-cloud.githubusercontent.com\",\"github-cloud.s3.amazonaws.com\",\"github.com\",\"host.docker.internal\",\"json-schema.org\",\"json.schemastore.org\",\"keyserver.ubuntu.com\",\"lfs.github.com\",\"objects.githubusercontent.com\",\"ocsp.digicert.com\",\"ocsp.geotrust.com\",\"ocsp.globalsign.com\",\"ocsp.identrust.com\",\"ocsp.sectigo.com\",\"ocsp.ssl.com\",\"ocsp.thawte.com\",\"ocsp.usertrust.com\",\"ocsp.verisign.com\",\"packagecloud.io\",\"packages.cloud.google.com\",\"packages.microsoft.com\",\"playwright.download.prss.microsoft.com\",\"ppa.launchpad.net\",\"pypi.org\",\"raw.githubusercontent.com\",\"registry.npmjs.org\",\"s.symcb.com\",\"s.symcd.com\",\"security.ubuntu.com\",\"sentry.io\",\"statsig.anthropic.com\",\"ts-crl.ws.symantec.com\",\"ts-ocsp.ws.symantec.com\",\"www.googleapis.com\"]},\"apiProxy\":{\"enabled\":true,\"enableTokenSteering\":true,\"maxRuns\":5,\"maxAiCredits\":${GH_AW_MAX_AI_CREDITS},\"models\":{\"agent\":[\"sonnet-6x\",\"gpt-5.4\",\"gpt-5.3\",\"gemini-pro\",\"any\"],\"antigravity\":[\"copilot/antigravity*\",\"google/antigravity*\",\"gemini/antigravity*\"],\"any\":[\"copilot/*\",\"anthropic/*\",\"openai/*\",\"google/*\",\"gemini/*\"],\"claude\":[\"agent\"],\"codex\":[\"agent\"],\"coding\":[\"copilot/gpt-5*codex*\",\"openai/gpt-5*codex*\",\"gpt-5-codex\"],\"computer-use\":[\"copilot/*computer-use*\",\"google/*computer-use*\",\"gemini/*computer-use*\",\"openai/*computer-use*\"],\"copilot\":[\"agent\"],\"deep-research\":[\"copilot/deep-research*\",\"copilot/o3-deep-research*\",\"copilot/o4-mini-deep-research*\",\"google/deep-research*\",\"gemini/deep-research*\",\"openai/o3-deep-research*\",\"openai/o4-mini-deep-research*\"],\"gemini\":[\"agent\"],\"gemini-3-flash\":[\"copilot/gemini-3*flash*\",\"google/gemini-3*flash*\",\"gemini/gemini-3*flash*\"],\"gemini-3-pro\":[\"copilot/gemini-3*pro*\",\"google/gemini-3*pro*\",\"google/nano-banana*\",\"gemini/gemini-3*pro*\"],\"gemini-3.1-flash\":[\"copilot/gemini-3.1*flash*\",\"google/gemini-3.1*flash*\",\"gemini/gemini-3.1*flash*\"],\"gemini-3.1-pro\":[\"copilot/gemini-3.1*pro*\",\"google/gemini-3.1*pro*\",\"gemini/gemini-3.1*pro*\"],\"gemini-3.5-flash\":[\"copilot/gemini-3.5*flash*\",\"google/gemini-3.5*flash*\",\"gemini/gemini-3.5*flash*\"],\"gemini-flash\":[\"copilot/gemini-*flash*\",\"google/gemini-*flash*\",\"gemini/gemini-*flash*\"],\"gemini-flash-lite\":[\"copilot/gemini-*flash*lite*\",\"google/gemini-*flash*lite*\",\"gemini/gemini-*flash*lite*\"],\"gemini-pro\":[\"copilot/gemini-*pro*\",\"google/gemini-*pro*\",\"gemini/gemini-*pro*\"],\"gemma\":[\"copilot/gemma*\",\"google/gemma*\",\"gemini/gemma*\"],\"gpt-5\":[\"copilot/gpt-5*\",\"openai/gpt-5*\"],\"gpt-5-codex\":[\"copilot/gpt-5*codex*\",\"openai/gpt-5*codex*\"],\"gpt-5-mini\":[\"copilot/gpt-5*mini*\",\"openai/gpt-5*mini*\"],\"gpt-5-nano\":[\"copilot/gpt-5*nano*\",\"openai/gpt-5*nano*\"],\"gpt-5-pro\":[\"copilot/gpt-5*pro*\",\"openai/gpt-5*pro*\"],\"gpt-5.2\":[\"copilot/gpt-5.2*\",\"openai/gpt-5.2*\"],\"gpt-5.3\":[\"copilot/gpt-5.3*\",\"openai/gpt-5.3*\"],\"gpt-5.4\":[\"copilot/gpt-5.4*\",\"openai/gpt-5.4*\"],\"gpt-5.5\":[\"copilot/gpt-5.5*\",\"openai/gpt-5.5*\"],\"haiku\":[\"copilot/*haiku*\",\"anthropic/*haiku*\"],\"large\":[\"sonnet\",\"gpt-5-pro\",\"gpt-5\",\"gemini-pro\"],\"mai-code\":[\"copilot/MAI-Code*\",\"copilot/mai-code*\",\"openai/MAI-Code*\"],\"mini\":[\"haiku\",\"gpt-5-mini\",\"gpt-5-nano\",\"gemini-flash-lite\"],\"nano-banana\":[\"copilot/nano-banana*\",\"google/nano-banana*\",\"gemini/nano-banana*\"],\"opus\":[\"copilot/*opus*\",\"anthropic/*opus*\"],\"opusplan\":[\"opus?effort=high\"],\"reasoning\":[\"copilot/o1*\",\"copilot/o3*\",\"copilot/o4*\",\"openai/o1*\",\"openai/o3*\",\"openai/o4*\"],\"robotics\":[\"copilot/*robotics*\",\"google/*robotics*\",\"gemini/*robotics*\"],\"small\":[\"mini\"],\"small-agent\":[\"haiku\",\"gpt-5-mini\",\"gemini-flash\"],\"sonnet\":[\"copilot/*sonnet*\",\"anthropic/*sonnet*\"],\"sonnet-6x\":[\"copilot/*sonnet-4.5*\",\"copilot/*sonnet-4.6*\",\"copilot/*sonnet-4-5-*\",\"anthropic/*sonnet-4-5-*\",\"copilot/*sonnet-4-6*\",\"anthropic/*sonnet-4-6*\"],\"summarization\":[\"haiku\",\"gpt-5-mini\",\"gemini-flash-lite\",\"mini\"],\"vision\":[\"copilot/gemini-*image*\",\"gemini/gemini-*image*\",\"copilot/gemini-*flash*\",\"gemini/gemini-*flash*\"]}},\"container\":{\"imageTag\":\"0.27.2,squid=sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591,agent=sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6,api-proxy=sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4,cli-proxy=sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0\"}}" > "${RUNNER_TEMP}/gh-aw/awf-config.json" GH_AW_MODEL_MULTIPLIERS_PATH="/tmp/gh-aw/model_multipliers.json" node "${RUNNER_TEMP}/gh-aw/actions/merge_awf_model_multipliers.cjs" cp "${RUNNER_TEMP}/gh-aw/awf-config.json" /tmp/gh-aw/awf-config.json export GH_AW_MODELS_JSON_PATH="/tmp/gh-aw/models.json" @@ -823,7 +823,7 @@ jobs: fi # shellcheck disable=SC1003 sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_TOOL_CACHE_MOUNT:+--mount "$GH_AW_TOOL_CACHE_MOUNT"} ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --tty --env-all --exclude-env ANTHROPIC_API_KEY --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --session-state-dir /tmp/gh-aw/sandbox/agent/session-state --enable-host-access --allow-host-ports 80,443,8080 --build-local \ - -- /bin/bash -c 'set +o histexpand; export PATH="${RUNNER_TEMP}/gh-aw/mcp-cli/bin:$PATH" && GH_AW_TOOL_CACHE="${RUNNER_TOOL_CACHE:-/opt/hostedtoolcache}"; export PATH="$(find "$GH_AW_TOOL_CACHE" /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; GH_AW_NPM_GLOBAL_ROOT="$(npm root -g 2>/dev/null || true)"; if [ -n "$GH_AW_NPM_GLOBAL_ROOT" ]; then export NODE_PATH="${GH_AW_NPM_GLOBAL_ROOT}${NODE_PATH:+:${NODE_PATH}}"; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/claude_harness.cjs claude --print --no-chrome --max-turns 2 --allowed-tools '\''Bash(bash),Bash(cat),Bash(date),Bash(echo),Bash(grep),Bash(head),Bash(ls),Bash(printf),Bash(pwd),Bash(safeoutputs:*),Bash(sort),Bash(tail),Bash(uniq),Bash(wc),Bash(yq),BashOutput,Edit,Edit(/tmp/*),Edit(/tmp/gh-aw/agent/*),ExitPlanMode,Glob,Grep,KillBash,LS,MultiEdit,MultiEdit(/tmp/*),MultiEdit(/tmp/gh-aw/agent/*),NotebookEdit,NotebookRead,Read,Read(/tmp/*),Read(/tmp/gh-aw/agent/*),Task,TodoWrite,Write,Write(/tmp/*),Write(/tmp/gh-aw/agent/*),mcp__safeoutputs'\'' --debug-file /tmp/gh-aw/agent-stdio.log --verbose --permission-mode acceptEdits --output-format stream-json --mcp-config "${RUNNER_TEMP}/gh-aw/mcp-config/mcp-servers.json" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + -- /bin/bash -c 'set +o histexpand; export PATH="${RUNNER_TEMP}/gh-aw/mcp-cli/bin:$PATH" && GH_AW_TOOL_CACHE="${RUNNER_TOOL_CACHE:-/opt/hostedtoolcache}"; export PATH="$(find "$GH_AW_TOOL_CACHE" /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; GH_AW_NPM_GLOBAL_ROOT="$(npm root -g 2>/dev/null || true)"; if [ -n "$GH_AW_NPM_GLOBAL_ROOT" ]; then export NODE_PATH="${GH_AW_NPM_GLOBAL_ROOT}${NODE_PATH:+:${NODE_PATH}}"; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/claude_harness.cjs claude --print --no-chrome --max-turns 5 --allowed-tools '\''Bash(bash),Bash(cat),Bash(date),Bash(echo),Bash(grep),Bash(head),Bash(ls),Bash(printf),Bash(pwd),Bash(safeoutputs:*),Bash(sort),Bash(tail),Bash(uniq),Bash(wc),Bash(yq),BashOutput,Edit,Edit(/tmp/*),Edit(/tmp/gh-aw/agent/*),ExitPlanMode,Glob,Grep,KillBash,LS,MultiEdit,MultiEdit(/tmp/*),MultiEdit(/tmp/gh-aw/agent/*),NotebookEdit,NotebookRead,Read,Read(/tmp/*),Read(/tmp/gh-aw/agent/*),Task,TodoWrite,Write,Write(/tmp/*),Write(/tmp/gh-aw/agent/*),mcp__safeoutputs'\'' --debug-file /tmp/gh-aw/agent-stdio.log --verbose --permission-mode acceptEdits --output-format stream-json --mcp-config "${RUNNER_TEMP}/gh-aw/mcp-config/mcp-servers.json" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} ANTHROPIC_MODEL: claude-haiku-4-5 @@ -833,7 +833,7 @@ jobs: DISABLE_BUG_COMMAND: 1 DISABLE_ERROR_REPORTING: 1 DISABLE_TELEMETRY: 1 - GH_AW_MAX_TURNS: 2 + GH_AW_MAX_TURNS: 5 GH_AW_MCP_CONFIG: ${{ runner.temp }}/gh-aw/mcp-config/mcp-servers.json GH_AW_PHASE: agent GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt diff --git a/.github/workflows/smoke-claude.md b/.github/workflows/smoke-claude.md index 5b1b1c538..fd7dd4cf4 100644 --- a/.github/workflows/smoke-claude.md +++ b/.github/workflows/smoke-claude.md @@ -15,7 +15,7 @@ permissions: pull-requests: read name: Smoke Claude -max-turns: 2 +max-turns: 5 engine: id: claude model: claude-haiku-4-5 diff --git a/scripts/ci/smoke-claude-workflow.test.ts b/scripts/ci/smoke-claude-workflow.test.ts index 235ec431a..98d9861c2 100644 --- a/scripts/ci/smoke-claude-workflow.test.ts +++ b/scripts/ci/smoke-claude-workflow.test.ts @@ -9,7 +9,7 @@ describe('smoke claude workflow optimization config', () => { it('uses pre-computed result step and minimal turn budget in source workflow', () => { const source = fs.readFileSync(smokeClaudeSourcePath, 'utf-8'); - expect(source).toContain('max-turns: 2'); + expect(source).toContain('max-turns: 5'); expect(source).toContain('Check GitHub.com reachability'); expect(source).toContain('/tmp/gh-aw/agent/smoke-context.txt'); expect(source).toContain('curl -fsSL --max-time 15 https://github.com'); @@ -35,10 +35,10 @@ describe('smoke claude workflow optimization config', () => { expect(source).not.toContain('safeoutputs add_labels . < /tmp/gh-aw/agent/labels.json'); }); - it('compiles the workflow without playwright tools and with max-turns 2', () => { + it('compiles the workflow without playwright tools and with max-turns 5', () => { const lock = fs.readFileSync(smokeClaudeLockPath, 'utf-8'); - expect(lock).toContain('--max-turns 2'); + expect(lock).toContain('--max-turns 5'); expect(lock).toContain('Check GitHub.com reachability'); expect(lock).toContain('playwright_check=✅ PASS'); expect(lock).toContain('Compute final smoke result');