diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml index a4490b81..84c3b258 100644 --- a/.github/workflows/smoke-claude.lock.yml +++ b/.github/workflows/smoke-claude.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"a08c320bc1b492fd4827ebbdc1fff37ca664404dc3f30ca87b38733896486989","body_hash":"6e05820005e43b82d8112bc60ced8e13336596ae671ecac69e6c5ac691485b71","compiler_version":"v0.79.8","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"1931d05a82aa65b2b1d5af50c9dcde1453044c61ac1c0718031eb2eca5c6b046","body_hash":"6e05820005e43b82d8112bc60ced8e13336596ae671ecac69e6c5ac691485b71","compiler_version":"v0.79.8","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}} # gh-aw-manifest: {"version":1,"secrets":["ANTHROPIC_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"c0338fef4749d08c21f8f975fb0e37efa17dda47","version":"v0.79.8"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"}]} # This file was automatically generated by gh-aw (v0.79.8). DO NOT EDIT. To debug this workflow, load the skill at https://github.com/github/gh-aw/blob/main/debug.md # @@ -995,6 +995,7 @@ jobs: - activation - agent - safe_outputs + - verify_token_usage if: > always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' || needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_ai_credits_exceeded == 'true') @@ -1289,3 +1290,33 @@ jobs: /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Configure GH_HOST for enterprise compatibility + id: ghes-host-config + shell: bash + # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input. + run: | + # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct + # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. + GH_HOST="${GITHUB_SERVER_URL#https://}" + GH_HOST="${GH_HOST#http://}" + echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV" + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine claude + diff --git a/.github/workflows/smoke-claude.md b/.github/workflows/smoke-claude.md index 7bb75e23..43f3eb94 100644 --- a/.github/workflows/smoke-claude.md +++ b/.github/workflows/smoke-claude.md @@ -23,6 +23,25 @@ sandbox: mcp: version: v0.3.1 strict: false +jobs: + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine claude tools: bash: - bash diff --git a/.github/workflows/smoke-codex.lock.yml b/.github/workflows/smoke-codex.lock.yml index 902b00f0..def5e2fc 100644 --- a/.github/workflows/smoke-codex.lock.yml +++ b/.github/workflows/smoke-codex.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"c0467bdd469d554b0261f696ece56b95cd24d5eb263b2767d976f5db7af45a50","body_hash":"988c8ab731a331e33e5751aa46982c3ee5adc6dbefb2b18236d88854467c3d2b","compiler_version":"v0.79.6","agent_id":"codex","agent_model":"gpt-5.4","engine_versions":{"codex":"0.137.0"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"9971a0badaf621a6194316298030053110c15cc5683e20fc23b1c245fbb42bd8","body_hash":"988c8ab731a331e33e5751aa46982c3ee5adc6dbefb2b18236d88854467c3d2b","compiler_version":"v0.79.6","agent_id":"codex","agent_model":"gpt-5.4","engine_versions":{"codex":"0.137.0"}} # gh-aw-manifest: {"version":1,"secrets":["CODEX_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN","OPENAI_API_KEY"],"actions":[{"repo":"actions/cache","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.2","digest":"sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0","pinned_image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.2@sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:latest","digest":"sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa","pinned_image":"ghcr.io/github/gh-aw-mcpg:latest@sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"mcr.microsoft.com/playwright/mcp","digest":"sha256:7b82f29c6ef83480a97f612d53ac3fd5f30a32df3fea1e06923d4204d3532bb2","pinned_image":"mcr.microsoft.com/playwright/mcp@sha256:7b82f29c6ef83480a97f612d53ac3fd5f30a32df3fea1e06923d4204d3532bb2"}]} # ___ _ _ # / _ \ | | (_) @@ -1283,6 +1283,7 @@ jobs: - activation - agent - safe_outputs + - verify_token_usage if: > always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' || needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_effective_workflow_exceeded == 'true') @@ -1580,3 +1581,33 @@ jobs: /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Configure GH_HOST for enterprise compatibility + id: ghes-host-config + shell: bash + # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input. + run: | + # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct + # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. + GH_HOST="${GITHUB_SERVER_URL#https://}" + GH_HOST="${GH_HOST#http://}" + echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV" + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine codex + diff --git a/.github/workflows/smoke-codex.md b/.github/workflows/smoke-codex.md index 85a2e32e..2a3d7966 100644 --- a/.github/workflows/smoke-codex.md +++ b/.github/workflows/smoke-codex.md @@ -22,6 +22,25 @@ sandbox: mcp: version: latest strict: false +jobs: + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine codex imports: - shared/gh.md - shared/reporting.md diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml index c174f40c..825e757d 100644 --- a/.github/workflows/smoke-copilot.lock.yml +++ b/.github/workflows/smoke-copilot.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"faa79a615d43a6ef6fc6323ee7fa0da8d4ee0330fa26ff718fda20030e74488d","body_hash":"d02de9958e5f3cbf119d4d3b7bd2a3b84afec98bad520e813e0ce2c465973fea","compiler_version":"v0.79.6","agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"d1fb458bdf80ba419bd2b97870372b0db05bfd15a9ee08d6b5f7978e788c7e12","body_hash":"d02de9958e5f3cbf119d4d3b7bd2a3b84afec98bad520e813e0ce2c465973fea","compiler_version":"v0.79.6","agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} # gh-aw-manifest: {"version":1,"secrets":["GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"}]} # ___ _ _ # / _ \ | | (_) @@ -1078,6 +1078,7 @@ jobs: - activation - agent - safe_outputs + - verify_token_usage if: > always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' || needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_effective_workflow_exceeded == 'true') @@ -1372,3 +1373,33 @@ jobs: /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Configure GH_HOST for enterprise compatibility + id: ghes-host-config + shell: bash + # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input. + run: | + # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct + # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. + GH_HOST="${GITHUB_SERVER_URL#https://}" + GH_HOST="${GH_HOST#http://}" + echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV" + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine copilot + diff --git a/.github/workflows/smoke-copilot.md b/.github/workflows/smoke-copilot.md index e01e4648..c1a8a95d 100644 --- a/.github/workflows/smoke-copilot.md +++ b/.github/workflows/smoke-copilot.md @@ -45,6 +45,25 @@ sandbox: mcp: version: v0.3.1 strict: false +jobs: + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine copilot steps: - name: Pre-compute smoke test data id: smoke-data diff --git a/scripts/ci/check-token-usage.js b/scripts/ci/check-token-usage.js new file mode 100644 index 00000000..e11c17e2 --- /dev/null +++ b/scripts/ci/check-token-usage.js @@ -0,0 +1,344 @@ +#!/usr/bin/env node +/** + * Token-usage sanity checker for the smoke workflows. + * + * Runs after the agent job, against the downloaded `agent` artifact, and fails + * the workflow when the api-proxy token accounting looks wrong. Two independent + * checks are performed, both engine-independent: + * + * 1. Internal consistency — the per-response records in + * `token-usage.jsonl` (written by the api-proxy) must sum exactly to the + * aggregated `agent_usage.json` summary that gh-aw derives from them. Any + * drift means a record was dropped, double-counted, or mis-aggregated. + * + * 2. Cache-read red flag — a real multi-request agentic run re-sends a + * growing context every turn, so the provider reports prompt-cache reads. + * A total `cache_read_tokens` of 0 across multiple requests indicates the + * api-proxy silently dropped cached tokens (the class of bug fixed in + * PR #5262 / issue #5203), so it is treated as a hard failure. + * + * The checker is intentionally zero-dependency CommonJS so the CI job only + * needs `node` plus the downloaded artifact — no `npm ci` / `tsx`. + * + * Usage: + * node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw --engine copilot + * + * Flags: + * --artifact-root Root of the downloaded agent artifact (default: /tmp/gh-aw) + * --engine Engine id, for diagnostics only (copilot|claude|codex) + * --token-usage Explicit path to the per-response token-usage.jsonl + * --agent-usage Explicit path to the aggregated agent_usage.json + * --min-requests Minimum record count before cache_read==0 is fatal (default: 2) + */ + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +const TOKEN_FIELDS = ['input_tokens', 'output_tokens', 'cache_read_tokens', 'cache_write_tokens']; + +/** Parse JSONL text into an array of objects, skipping blank / malformed lines. */ +function parseJsonl(text) { + const records = []; + for (const rawLine of text.split('\n')) { + const line = rawLine.trim(); + if (!line) continue; + try { + records.push(JSON.parse(line)); + } catch { + // Tolerate partial / non-JSON lines (e.g. truncated final write). + } + } + return records; +} + +/** Sum the per-response token-usage records into a single aggregate. */ +function sumTokenUsage(records) { + const totals = { + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + count: 0, + firstInputTokens: null, + lastAiCreditsTotal: null, + }; + + for (const record of records) { + if (record == null || typeof record !== 'object') continue; + // Only count actual usage records (defensive against mixed log streams). + if (record.event && record.event !== 'token_usage') continue; + totals.count += 1; + for (const field of TOKEN_FIELDS) { + const value = record[field]; + if (typeof value === 'number' && Number.isFinite(value)) { + totals[field] += value; + } + } + if (totals.firstInputTokens === null && typeof record.input_tokens === 'number') { + totals.firstInputTokens = record.input_tokens; + } + if (typeof record.ai_credits_total === 'number' && Number.isFinite(record.ai_credits_total)) { + totals.lastAiCreditsTotal = record.ai_credits_total; + } + } + + return totals; +} + +/** True when two AI-credit figures agree within rounding noise. */ +function aiCreditsMatch(a, b) { + if (typeof a !== 'number' || typeof b !== 'number') return false; + const tolerance = Math.max(0.01, Math.abs(b) * 0.005); + return Math.abs(a - b) <= tolerance; +} + +/** + * Evaluate both checks. Returns { failures: string[], warnings: string[], summary }. + * Pure function: takes already-parsed inputs so it is trivially unit-testable. + */ +function evaluateTokenUsage({ records, aggregate, minRequests = 2 }) { + const failures = []; + const warnings = []; + const totals = sumTokenUsage(records); + + if (totals.count === 0) { + failures.push( + 'No token-usage records found. The agent produced no model requests, ' + + 'or the api-proxy failed to record usage.', + ); + return { failures, warnings, summary: totals }; + } + + // ── Check 1: internal consistency (per-response sum === aggregate) ── + if (!aggregate || typeof aggregate !== 'object') { + failures.push( + 'Aggregated agent_usage summary is missing or unreadable, so per-response ' + + 'totals cannot be verified.', + ); + } else { + for (const field of TOKEN_FIELDS) { + const summed = totals[field]; + const reported = typeof aggregate[field] === 'number' ? aggregate[field] : undefined; + if (reported === undefined) { + failures.push(`agent_usage is missing "${field}" — cannot verify consistency.`); + continue; + } + if (summed !== reported) { + failures.push( + `Inconsistent ${field}: token-usage.jsonl sums to ${summed} across ` + + `${totals.count} responses, but agent_usage reports ${reported} ` + + `(delta ${summed - reported}).`, + ); + } + } + + // ai_credits and ambient_context are derived figures: surface drift as a + // warning rather than failing the build on float-rounding differences. + if (typeof aggregate.ai_credits === 'number' && totals.lastAiCreditsTotal !== null) { + if (!aiCreditsMatch(totals.lastAiCreditsTotal, aggregate.ai_credits)) { + warnings.push( + `ai_credits drift: last ai_credits_total is ${totals.lastAiCreditsTotal}, ` + + `agent_usage reports ${aggregate.ai_credits}.`, + ); + } + } + if ( + typeof aggregate.ambient_context === 'number' && + totals.firstInputTokens !== null && + aggregate.ambient_context !== totals.firstInputTokens + ) { + warnings.push( + `ambient_context (${aggregate.ambient_context}) does not match the first ` + + `response input_tokens (${totals.firstInputTokens}).`, + ); + } + } + + // ── Check 2: cache-read red flag ── + if (totals.cache_read_tokens === 0) { + if (totals.count >= minRequests) { + failures.push( + `cache_read_tokens is 0 across ${totals.count} responses. A multi-request ` + + 'agentic run should report prompt-cache reads; zero almost always means ' + + 'the api-proxy dropped cached tokens (cf. issue #5203 / PR #5262).', + ); + } else { + warnings.push( + `cache_read_tokens is 0, but only ${totals.count} response(s) were recorded ` + + `(< ${minRequests}); too short to assert prompt caching.`, + ); + } + } + + return { failures, warnings, summary: totals }; +} + +/** Return the first path in `candidates` that exists on disk, else null. */ +function firstExisting(candidates) { + for (const candidate of candidates) { + try { + if (candidate && fs.existsSync(candidate) && fs.statSync(candidate).isFile()) { + return candidate; + } + } catch { + // ignore and keep looking + } + } + return null; +} + +/** Recursively find the first file named `name` under `root` (bounded depth). */ +function findFileRecursive(root, name, maxDepth = 6) { + const stack = [{ dir: root, depth: 0 }]; + while (stack.length > 0) { + const { dir, depth } = stack.pop(); + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + continue; + } + for (const entry of entries) { + const full = path.join(dir, entry.name); + if (entry.isFile() && entry.name === name) return full; + if (entry.isDirectory() && depth < maxDepth) { + stack.push({ dir: full, depth: depth + 1 }); + } + } + } + return null; +} + +/** Locate the per-response token-usage.jsonl and aggregated agent_usage.json. */ +function locateUsageFiles(root, overrides = {}) { + const tokenUsage = + overrides.tokenUsage || + firstExisting([ + path.join(root, 'sandbox/firewall/logs/api-proxy-logs/token-usage.jsonl'), + path.join(root, 'sandbox/firewall/audit/api-proxy-logs/token-usage.jsonl'), + path.join(root, 'sandbox/firewall-audit-logs/api-proxy-logs/token-usage.jsonl'), + path.join(root, 'usage/agent/token_usage.jsonl'), + ]) || + findFileRecursive(root, 'token-usage.jsonl'); + + const agentUsage = + overrides.agentUsage || + firstExisting([ + path.join(root, 'agent_usage.json'), + path.join(root, 'agent_usage.jsonl'), + path.join(root, 'usage/agent_usage.json'), + path.join(root, 'usage/agent_usage.jsonl'), + ]) || + findFileRecursive(root, 'agent_usage.json') || + findFileRecursive(root, 'agent_usage.jsonl'); + + return { tokenUsage, agentUsage }; +} + +function parseArgs(argv) { + const options = { artifactRoot: '/tmp/gh-aw', engine: 'unknown', minRequests: 2 }; + for (let i = 0; i < argv.length; i += 1) { + const arg = argv[i]; + const next = () => argv[(i += 1)]; + switch (arg) { + case '--artifact-root': + options.artifactRoot = next(); + break; + case '--engine': + options.engine = next(); + break; + case '--token-usage': + options.tokenUsage = next(); + break; + case '--agent-usage': + options.agentUsage = next(); + break; + case '--min-requests': + options.minRequests = parseInt(next(), 10) || 2; + break; + default: + break; + } + } + return options; +} + +function main(argv) { + const options = parseArgs(argv); + const { tokenUsage, agentUsage } = locateUsageFiles(options.artifactRoot, { + tokenUsage: options.tokenUsage, + agentUsage: options.agentUsage, + }); + + console.log(`Token-usage sanity check (engine: ${options.engine})`); + console.log(` artifact root: ${options.artifactRoot}`); + console.log(` token-usage.jsonl: ${tokenUsage || '(not found)'}`); + console.log(` agent_usage.json: ${agentUsage || '(not found)'}`); + + if (!tokenUsage) { + console.error( + '::error::Could not locate token-usage.jsonl in the agent artifact. ' + + 'The api-proxy did not record token usage.', + ); + return 1; + } + + const records = parseJsonl(fs.readFileSync(tokenUsage, 'utf8')); + let aggregate = null; + if (agentUsage) { + const text = fs.readFileSync(agentUsage, 'utf8').trim(); + // agent_usage may be a pretty-printed JSON object, a single-line JSON + // object, or a JSONL file. Try JSON.parse() first so that multi-line + // pretty-printed files are handled correctly, then fall back to JSONL. + try { + aggregate = JSON.parse(text); + } catch { + const parsed = parseJsonl(text); + aggregate = parsed.length > 0 ? parsed[parsed.length - 1] : null; + } + } + + const { failures, warnings, summary } = evaluateTokenUsage({ + records, + aggregate, + minRequests: options.minRequests, + }); + + console.log( + ` totals: responses=${summary.count} input=${summary.input_tokens} ` + + `output=${summary.output_tokens} cache_read=${summary.cache_read_tokens} ` + + `cache_write=${summary.cache_write_tokens}`, + ); + + for (const warning of warnings) { + console.log(`::warning::${warning}`); + } + for (const failure of failures) { + console.error(`::error::${failure}`); + } + + if (failures.length > 0) { + console.error(`Token-usage sanity check FAILED with ${failures.length} error(s).`); + return 1; + } + console.log('Token-usage sanity check passed.'); + return 0; +} + +if (require.main === module) { + process.exit(main(process.argv.slice(2))); +} + +module.exports = { + parseJsonl, + sumTokenUsage, + aiCreditsMatch, + evaluateTokenUsage, + firstExisting, + findFileRecursive, + locateUsageFiles, + parseArgs, + main, +}; diff --git a/scripts/ci/check-token-usage.test.ts b/scripts/ci/check-token-usage.test.ts new file mode 100644 index 00000000..e0077195 --- /dev/null +++ b/scripts/ci/check-token-usage.test.ts @@ -0,0 +1,262 @@ +import * as path from 'path'; +import * as fs from 'fs'; +import * as os from 'os'; + +// The checker is intentionally zero-dependency CommonJS so the CI job can run it +// with bare `node`; require it directly here for unit testing. +// eslint-disable-next-line @typescript-eslint/no-var-requires +const checker = require('./check-token-usage.js'); + +const { + parseJsonl, + sumTokenUsage, + aiCreditsMatch, + evaluateTokenUsage, + findFileRecursive, + locateUsageFiles, + parseArgs, + main, +} = checker; + +/** Build a per-response token-usage record with sensible defaults. */ +function record(overrides: Record = {}) { + return { + event: 'token_usage', + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + ai_credits_total: 0, + ...overrides, + }; +} + +describe('parseJsonl', () => { + it('parses well-formed lines and skips blanks / malformed lines', () => { + const text = '{"a":1}\n\n \nnot json\n{"b":2}\n'; + expect(parseJsonl(text)).toEqual([{ a: 1 }, { b: 2 }]); + }); + + it('returns an empty array for empty input', () => { + expect(parseJsonl('')).toEqual([]); + }); +}); + +describe('sumTokenUsage', () => { + it('sums token fields and captures first input / last credits', () => { + const totals = sumTokenUsage([ + record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 0, ai_credits_total: 1.5 }), + record({ input_tokens: 200, output_tokens: 20, cache_read_tokens: 150, ai_credits_total: 3.0 }), + ]); + expect(totals.input_tokens).toBe(300); + expect(totals.output_tokens).toBe(30); + expect(totals.cache_read_tokens).toBe(150); + expect(totals.count).toBe(2); + expect(totals.firstInputTokens).toBe(100); + expect(totals.lastAiCreditsTotal).toBe(3.0); + }); + + it('ignores non-usage records in a mixed stream', () => { + const totals = sumTokenUsage([ + record({ input_tokens: 100 }), + { event: 'something_else', input_tokens: 9999 }, + ]); + expect(totals.input_tokens).toBe(100); + expect(totals.count).toBe(1); + }); +}); + +describe('aiCreditsMatch', () => { + it('accepts values within rounding tolerance', () => { + expect(aiCreditsMatch(28.632, 28.632)).toBe(true); + expect(aiCreditsMatch(417.082, 417.085)).toBe(true); + }); + + it('rejects clearly different values', () => { + expect(aiCreditsMatch(28.632, 30.0)).toBe(false); + }); +}); + +describe('evaluateTokenUsage — internal consistency', () => { + it('passes when per-response sums equal the aggregate and cache_read > 0', () => { + const records = [ + record({ input_tokens: 13663, output_tokens: 378, cache_read_tokens: 0, ai_credits_total: 1.2 }), + record({ input_tokens: 16601, output_tokens: 124, cache_read_tokens: 10752, ai_credits_total: 4.3 }), + ]; + const aggregate = { + input_tokens: 30264, + output_tokens: 502, + cache_read_tokens: 10752, + cache_write_tokens: 0, + ambient_context: 13663, + ai_credits: 4.3, + }; + const { failures, warnings } = evaluateTokenUsage({ records, aggregate }); + expect(failures).toEqual([]); + expect(warnings).toEqual([]); + }); + + it('fails when the aggregate disagrees with the per-response sum', () => { + const records = [record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50 })]; + const aggregate = { + input_tokens: 999, // wrong + output_tokens: 10, + cache_read_tokens: 50, + cache_write_tokens: 0, + }; + const { failures } = evaluateTokenUsage({ records, aggregate, minRequests: 1 }); + expect(failures.some((f: string) => f.includes('Inconsistent input_tokens'))).toBe(true); + }); + + it('fails when the aggregate is missing entirely', () => { + const records = [record({ input_tokens: 100, cache_read_tokens: 50 })]; + const { failures } = evaluateTokenUsage({ records, aggregate: null, minRequests: 1 }); + expect(failures.some((f: string) => f.includes('Aggregated agent_usage'))).toBe(true); + }); + + it('warns (does not fail) on ai_credits / ambient_context drift', () => { + const records = [ + record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50, ai_credits_total: 2.0 }), + record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50, ai_credits_total: 5.0 }), + ]; + const aggregate = { + input_tokens: 200, + output_tokens: 20, + cache_read_tokens: 100, + cache_write_tokens: 0, + ambient_context: 999, // mismatch -> warning + ai_credits: 42.0, // mismatch -> warning + }; + const { failures, warnings } = evaluateTokenUsage({ records, aggregate }); + expect(failures).toEqual([]); + expect(warnings.some((w: string) => w.includes('ai_credits drift'))).toBe(true); + expect(warnings.some((w: string) => w.includes('ambient_context'))).toBe(true); + }); +}); + +describe('evaluateTokenUsage — cache-read red flag', () => { + it('hard-fails when cache_read is 0 across multiple responses (the bug)', () => { + // Mirrors gh-aw codex run 27784259295/27784201719: consistent totals, zero cache reads. + const records = [ + record({ input_tokens: 13663, output_tokens: 378 }), + record({ input_tokens: 26000, output_tokens: 200 }), + ]; + const aggregate = { + input_tokens: 39663, + output_tokens: 578, + cache_read_tokens: 0, + cache_write_tokens: 0, + }; + const { failures } = evaluateTokenUsage({ records, aggregate }); + expect(failures.some((f: string) => f.includes('cache_read_tokens is 0'))).toBe(true); + }); + + it('only warns about cache_read==0 when below the min-requests threshold', () => { + const records = [record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 0 })]; + const aggregate = { + input_tokens: 100, + output_tokens: 10, + cache_read_tokens: 0, + cache_write_tokens: 0, + }; + const { failures, warnings } = evaluateTokenUsage({ records, aggregate, minRequests: 2 }); + expect(failures).toEqual([]); + expect(warnings.some((w: string) => w.includes('too short to assert'))).toBe(true); + }); + + it('fails when there are no usage records at all', () => { + const { failures } = evaluateTokenUsage({ records: [], aggregate: null }); + expect(failures.some((f: string) => f.includes('No token-usage records'))).toBe(true); + }); +}); + +describe('locateUsageFiles', () => { + it('honors explicit overrides without touching the filesystem', () => { + const { tokenUsage, agentUsage } = locateUsageFiles('/nonexistent', { + tokenUsage: '/x/token-usage.jsonl', + agentUsage: '/x/agent_usage.json', + }); + expect(tokenUsage).toBe('/x/token-usage.jsonl'); + expect(agentUsage).toBe('/x/agent_usage.json'); + }); + + it('resolves the canonical api-proxy path inside a real fixture tree', () => { + // The codex artifact downloaded during development is not present in CI, so + // this only asserts the candidate-path logic via overrides above; here we + // simply confirm a missing tree yields nulls rather than throwing. + const { tokenUsage, agentUsage } = locateUsageFiles(path.join('/tmp', 'definitely-missing-xyz')); + expect(tokenUsage).toBeNull(); + expect(agentUsage).toBeNull(); + }); +}); + +describe('parseArgs', () => { + it('parses flags with sensible defaults', () => { + const opts = parseArgs(['--artifact-root', '/tmp/x', '--engine', 'copilot', '--min-requests', '5']); + expect(opts.artifactRoot).toBe('/tmp/x'); + expect(opts.engine).toBe('copilot'); + expect(opts.minRequests).toBe(5); + }); + + it('defaults min-requests to 2 and engine to unknown', () => { + const opts = parseArgs([]); + expect(opts.minRequests).toBe(2); + expect(opts.engine).toBe('unknown'); + expect(opts.artifactRoot).toBe('/tmp/gh-aw'); + }); +}); + +describe('findFileRecursive', () => { + it('finds agent_usage.jsonl nested under a subdirectory', () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'ck-test-')); + try { + const sub = path.join(root, 'deep', 'subdir'); + fs.mkdirSync(sub, { recursive: true }); + const target = path.join(sub, 'agent_usage.jsonl'); + fs.writeFileSync(target, '{"input_tokens":1}\n'); + expect(findFileRecursive(root, 'agent_usage.jsonl')).toBe(target); + } finally { + fs.rmSync(root, { recursive: true, force: true }); + } + }); + + it('returns null when the file is absent', () => { + expect(findFileRecursive('/nonexistent-xyz', 'agent_usage.jsonl')).toBeNull(); + }); +}); + +describe('main — pretty-printed agent_usage.json', () => { + it('parses a multi-line pretty-printed JSON aggregate without error', () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'ck-test-')); + try { + const logsDir = path.join(root, 'sandbox', 'firewall', 'audit', 'api-proxy-logs'); + fs.mkdirSync(logsDir, { recursive: true }); + + // Write one matching token-usage record. + fs.writeFileSync( + path.join(logsDir, 'token-usage.jsonl'), + JSON.stringify({ + event: 'token_usage', + input_tokens: 100, + output_tokens: 10, + cache_read_tokens: 50, + cache_write_tokens: 5, + }) + '\n', + ); + + // Write the aggregate as pretty-printed JSON (multi-line). + const aggregate = { + input_tokens: 100, + output_tokens: 10, + cache_read_tokens: 50, + cache_write_tokens: 5, + }; + fs.writeFileSync(path.join(root, 'agent_usage.json'), JSON.stringify(aggregate, null, 2)); + + const exitCode = main(['--artifact-root', root, '--engine', 'test', '--min-requests', '1']); + expect(exitCode).toBe(0); + } finally { + fs.rmSync(root, { recursive: true, force: true }); + } + }); +});