diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml
index a4490b81..84c3b258 100644
--- a/.github/workflows/smoke-claude.lock.yml
+++ b/.github/workflows/smoke-claude.lock.yml
@@ -1,4 +1,4 @@
-# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"a08c320bc1b492fd4827ebbdc1fff37ca664404dc3f30ca87b38733896486989","body_hash":"6e05820005e43b82d8112bc60ced8e13336596ae671ecac69e6c5ac691485b71","compiler_version":"v0.79.8","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}}
+# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"1931d05a82aa65b2b1d5af50c9dcde1453044c61ac1c0718031eb2eca5c6b046","body_hash":"6e05820005e43b82d8112bc60ced8e13336596ae671ecac69e6c5ac691485b71","compiler_version":"v0.79.8","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}}
# gh-aw-manifest: {"version":1,"secrets":["ANTHROPIC_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"c0338fef4749d08c21f8f975fb0e37efa17dda47","version":"v0.79.8"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"}]}
# This file was automatically generated by gh-aw (v0.79.8). DO NOT EDIT. To debug this workflow, load the skill at https://github.com/github/gh-aw/blob/main/debug.md
#
@@ -995,6 +995,7 @@ jobs:
- activation
- agent
- safe_outputs
+ - verify_token_usage
if: >
always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' ||
needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_ai_credits_exceeded == 'true')
@@ -1289,3 +1290,33 @@ jobs:
/tmp/gh-aw/temporary-id-map.json
if-no-files-found: ignore
+ verify_token_usage:
+ needs: agent
+ if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled'
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+
+ steps:
+ - name: Configure GH_HOST for enterprise compatibility
+ id: ghes-host-config
+ shell: bash
+ # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input.
+ run: |
+ # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct
+ # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op.
+ GH_HOST="${GITHUB_SERVER_URL#https://}"
+ GH_HOST="${GH_HOST#http://}"
+ echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV"
+ - name: Checkout repository
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+ - name: Download agent artifact
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: agent
+ path: /tmp/gh-aw-agent
+ - name: Token-usage sanity check
+ run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine claude
+
diff --git a/.github/workflows/smoke-claude.md b/.github/workflows/smoke-claude.md
index 7bb75e23..43f3eb94 100644
--- a/.github/workflows/smoke-claude.md
+++ b/.github/workflows/smoke-claude.md
@@ -23,6 +23,25 @@ sandbox:
mcp:
version: v0.3.1
strict: false
+jobs:
+ verify_token_usage:
+ needs: agent
+ if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled'
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+ - name: Download agent artifact
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: agent
+ path: /tmp/gh-aw-agent
+ - name: Token-usage sanity check
+ run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine claude
tools:
bash:
- bash
diff --git a/.github/workflows/smoke-codex.lock.yml b/.github/workflows/smoke-codex.lock.yml
index 902b00f0..def5e2fc 100644
--- a/.github/workflows/smoke-codex.lock.yml
+++ b/.github/workflows/smoke-codex.lock.yml
@@ -1,4 +1,4 @@
-# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"c0467bdd469d554b0261f696ece56b95cd24d5eb263b2767d976f5db7af45a50","body_hash":"988c8ab731a331e33e5751aa46982c3ee5adc6dbefb2b18236d88854467c3d2b","compiler_version":"v0.79.6","agent_id":"codex","agent_model":"gpt-5.4","engine_versions":{"codex":"0.137.0"}}
+# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"9971a0badaf621a6194316298030053110c15cc5683e20fc23b1c245fbb42bd8","body_hash":"988c8ab731a331e33e5751aa46982c3ee5adc6dbefb2b18236d88854467c3d2b","compiler_version":"v0.79.6","agent_id":"codex","agent_model":"gpt-5.4","engine_versions":{"codex":"0.137.0"}}
# gh-aw-manifest: {"version":1,"secrets":["CODEX_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN","OPENAI_API_KEY"],"actions":[{"repo":"actions/cache","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.2","digest":"sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0","pinned_image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.2@sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:latest","digest":"sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa","pinned_image":"ghcr.io/github/gh-aw-mcpg:latest@sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"mcr.microsoft.com/playwright/mcp","digest":"sha256:7b82f29c6ef83480a97f612d53ac3fd5f30a32df3fea1e06923d4204d3532bb2","pinned_image":"mcr.microsoft.com/playwright/mcp@sha256:7b82f29c6ef83480a97f612d53ac3fd5f30a32df3fea1e06923d4204d3532bb2"}]}
# ___ _ _
# / _ \ | | (_)
@@ -1283,6 +1283,7 @@ jobs:
- activation
- agent
- safe_outputs
+ - verify_token_usage
if: >
always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' ||
needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_effective_workflow_exceeded == 'true')
@@ -1580,3 +1581,33 @@ jobs:
/tmp/gh-aw/temporary-id-map.json
if-no-files-found: ignore
+ verify_token_usage:
+ needs: agent
+ if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled'
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+
+ steps:
+ - name: Configure GH_HOST for enterprise compatibility
+ id: ghes-host-config
+ shell: bash
+ # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input.
+ run: |
+ # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct
+ # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op.
+ GH_HOST="${GITHUB_SERVER_URL#https://}"
+ GH_HOST="${GH_HOST#http://}"
+ echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV"
+ - name: Checkout repository
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+ - name: Download agent artifact
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: agent
+ path: /tmp/gh-aw-agent
+ - name: Token-usage sanity check
+ run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine codex
+
diff --git a/.github/workflows/smoke-codex.md b/.github/workflows/smoke-codex.md
index 85a2e32e..2a3d7966 100644
--- a/.github/workflows/smoke-codex.md
+++ b/.github/workflows/smoke-codex.md
@@ -22,6 +22,25 @@ sandbox:
mcp:
version: latest
strict: false
+jobs:
+ verify_token_usage:
+ needs: agent
+ if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled'
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+ - name: Download agent artifact
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: agent
+ path: /tmp/gh-aw-agent
+ - name: Token-usage sanity check
+ run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine codex
imports:
- shared/gh.md
- shared/reporting.md
diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml
index c174f40c..825e757d 100644
--- a/.github/workflows/smoke-copilot.lock.yml
+++ b/.github/workflows/smoke-copilot.lock.yml
@@ -1,4 +1,4 @@
-# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"faa79a615d43a6ef6fc6323ee7fa0da8d4ee0330fa26ff718fda20030e74488d","body_hash":"d02de9958e5f3cbf119d4d3b7bd2a3b84afec98bad520e813e0ce2c465973fea","compiler_version":"v0.79.6","agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}}
+# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"d1fb458bdf80ba419bd2b97870372b0db05bfd15a9ee08d6b5f7978e788c7e12","body_hash":"d02de9958e5f3cbf119d4d3b7bd2a3b84afec98bad520e813e0ce2c465973fea","compiler_version":"v0.79.6","agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}}
# gh-aw-manifest: {"version":1,"secrets":["GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"}]}
# ___ _ _
# / _ \ | | (_)
@@ -1078,6 +1078,7 @@ jobs:
- activation
- agent
- safe_outputs
+ - verify_token_usage
if: >
always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' ||
needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_effective_workflow_exceeded == 'true')
@@ -1372,3 +1373,33 @@ jobs:
/tmp/gh-aw/temporary-id-map.json
if-no-files-found: ignore
+ verify_token_usage:
+ needs: agent
+ if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled'
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+
+ steps:
+ - name: Configure GH_HOST for enterprise compatibility
+ id: ghes-host-config
+ shell: bash
+ # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input.
+ run: |
+ # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct
+ # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op.
+ GH_HOST="${GITHUB_SERVER_URL#https://}"
+ GH_HOST="${GH_HOST#http://}"
+ echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV"
+ - name: Checkout repository
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+ - name: Download agent artifact
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: agent
+ path: /tmp/gh-aw-agent
+ - name: Token-usage sanity check
+ run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine copilot
+
diff --git a/.github/workflows/smoke-copilot.md b/.github/workflows/smoke-copilot.md
index e01e4648..c1a8a95d 100644
--- a/.github/workflows/smoke-copilot.md
+++ b/.github/workflows/smoke-copilot.md
@@ -45,6 +45,25 @@ sandbox:
mcp:
version: v0.3.1
strict: false
+jobs:
+ verify_token_usage:
+ needs: agent
+ if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled'
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+ - name: Download agent artifact
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: agent
+ path: /tmp/gh-aw-agent
+ - name: Token-usage sanity check
+ run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine copilot
steps:
- name: Pre-compute smoke test data
id: smoke-data
diff --git a/scripts/ci/check-token-usage.js b/scripts/ci/check-token-usage.js
new file mode 100644
index 00000000..e11c17e2
--- /dev/null
+++ b/scripts/ci/check-token-usage.js
@@ -0,0 +1,344 @@
+#!/usr/bin/env node
+/**
+ * Token-usage sanity checker for the smoke workflows.
+ *
+ * Runs after the agent job, against the downloaded `agent` artifact, and fails
+ * the workflow when the api-proxy token accounting looks wrong. Two independent
+ * checks are performed, both engine-independent:
+ *
+ * 1. Internal consistency — the per-response records in
+ * `token-usage.jsonl` (written by the api-proxy) must sum exactly to the
+ * aggregated `agent_usage.json` summary that gh-aw derives from them. Any
+ * drift means a record was dropped, double-counted, or mis-aggregated.
+ *
+ * 2. Cache-read red flag — a real multi-request agentic run re-sends a
+ * growing context every turn, so the provider reports prompt-cache reads.
+ * A total `cache_read_tokens` of 0 across multiple requests indicates the
+ * api-proxy silently dropped cached tokens (the class of bug fixed in
+ * PR #5262 / issue #5203), so it is treated as a hard failure.
+ *
+ * The checker is intentionally zero-dependency CommonJS so the CI job only
+ * needs `node` plus the downloaded artifact — no `npm ci` / `tsx`.
+ *
+ * Usage:
+ * node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw --engine copilot
+ *
+ * Flags:
+ * --artifact-root
Root of the downloaded agent artifact (default: /tmp/gh-aw)
+ * --engine Engine id, for diagnostics only (copilot|claude|codex)
+ * --token-usage Explicit path to the per-response token-usage.jsonl
+ * --agent-usage Explicit path to the aggregated agent_usage.json
+ * --min-requests Minimum record count before cache_read==0 is fatal (default: 2)
+ */
+
+'use strict';
+
+const fs = require('fs');
+const path = require('path');
+
+const TOKEN_FIELDS = ['input_tokens', 'output_tokens', 'cache_read_tokens', 'cache_write_tokens'];
+
+/** Parse JSONL text into an array of objects, skipping blank / malformed lines. */
+function parseJsonl(text) {
+ const records = [];
+ for (const rawLine of text.split('\n')) {
+ const line = rawLine.trim();
+ if (!line) continue;
+ try {
+ records.push(JSON.parse(line));
+ } catch {
+ // Tolerate partial / non-JSON lines (e.g. truncated final write).
+ }
+ }
+ return records;
+}
+
+/** Sum the per-response token-usage records into a single aggregate. */
+function sumTokenUsage(records) {
+ const totals = {
+ input_tokens: 0,
+ output_tokens: 0,
+ cache_read_tokens: 0,
+ cache_write_tokens: 0,
+ count: 0,
+ firstInputTokens: null,
+ lastAiCreditsTotal: null,
+ };
+
+ for (const record of records) {
+ if (record == null || typeof record !== 'object') continue;
+ // Only count actual usage records (defensive against mixed log streams).
+ if (record.event && record.event !== 'token_usage') continue;
+ totals.count += 1;
+ for (const field of TOKEN_FIELDS) {
+ const value = record[field];
+ if (typeof value === 'number' && Number.isFinite(value)) {
+ totals[field] += value;
+ }
+ }
+ if (totals.firstInputTokens === null && typeof record.input_tokens === 'number') {
+ totals.firstInputTokens = record.input_tokens;
+ }
+ if (typeof record.ai_credits_total === 'number' && Number.isFinite(record.ai_credits_total)) {
+ totals.lastAiCreditsTotal = record.ai_credits_total;
+ }
+ }
+
+ return totals;
+}
+
+/** True when two AI-credit figures agree within rounding noise. */
+function aiCreditsMatch(a, b) {
+ if (typeof a !== 'number' || typeof b !== 'number') return false;
+ const tolerance = Math.max(0.01, Math.abs(b) * 0.005);
+ return Math.abs(a - b) <= tolerance;
+}
+
+/**
+ * Evaluate both checks. Returns { failures: string[], warnings: string[], summary }.
+ * Pure function: takes already-parsed inputs so it is trivially unit-testable.
+ */
+function evaluateTokenUsage({ records, aggregate, minRequests = 2 }) {
+ const failures = [];
+ const warnings = [];
+ const totals = sumTokenUsage(records);
+
+ if (totals.count === 0) {
+ failures.push(
+ 'No token-usage records found. The agent produced no model requests, ' +
+ 'or the api-proxy failed to record usage.',
+ );
+ return { failures, warnings, summary: totals };
+ }
+
+ // ── Check 1: internal consistency (per-response sum === aggregate) ──
+ if (!aggregate || typeof aggregate !== 'object') {
+ failures.push(
+ 'Aggregated agent_usage summary is missing or unreadable, so per-response ' +
+ 'totals cannot be verified.',
+ );
+ } else {
+ for (const field of TOKEN_FIELDS) {
+ const summed = totals[field];
+ const reported = typeof aggregate[field] === 'number' ? aggregate[field] : undefined;
+ if (reported === undefined) {
+ failures.push(`agent_usage is missing "${field}" — cannot verify consistency.`);
+ continue;
+ }
+ if (summed !== reported) {
+ failures.push(
+ `Inconsistent ${field}: token-usage.jsonl sums to ${summed} across ` +
+ `${totals.count} responses, but agent_usage reports ${reported} ` +
+ `(delta ${summed - reported}).`,
+ );
+ }
+ }
+
+ // ai_credits and ambient_context are derived figures: surface drift as a
+ // warning rather than failing the build on float-rounding differences.
+ if (typeof aggregate.ai_credits === 'number' && totals.lastAiCreditsTotal !== null) {
+ if (!aiCreditsMatch(totals.lastAiCreditsTotal, aggregate.ai_credits)) {
+ warnings.push(
+ `ai_credits drift: last ai_credits_total is ${totals.lastAiCreditsTotal}, ` +
+ `agent_usage reports ${aggregate.ai_credits}.`,
+ );
+ }
+ }
+ if (
+ typeof aggregate.ambient_context === 'number' &&
+ totals.firstInputTokens !== null &&
+ aggregate.ambient_context !== totals.firstInputTokens
+ ) {
+ warnings.push(
+ `ambient_context (${aggregate.ambient_context}) does not match the first ` +
+ `response input_tokens (${totals.firstInputTokens}).`,
+ );
+ }
+ }
+
+ // ── Check 2: cache-read red flag ──
+ if (totals.cache_read_tokens === 0) {
+ if (totals.count >= minRequests) {
+ failures.push(
+ `cache_read_tokens is 0 across ${totals.count} responses. A multi-request ` +
+ 'agentic run should report prompt-cache reads; zero almost always means ' +
+ 'the api-proxy dropped cached tokens (cf. issue #5203 / PR #5262).',
+ );
+ } else {
+ warnings.push(
+ `cache_read_tokens is 0, but only ${totals.count} response(s) were recorded ` +
+ `(< ${minRequests}); too short to assert prompt caching.`,
+ );
+ }
+ }
+
+ return { failures, warnings, summary: totals };
+}
+
+/** Return the first path in `candidates` that exists on disk, else null. */
+function firstExisting(candidates) {
+ for (const candidate of candidates) {
+ try {
+ if (candidate && fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
+ return candidate;
+ }
+ } catch {
+ // ignore and keep looking
+ }
+ }
+ return null;
+}
+
+/** Recursively find the first file named `name` under `root` (bounded depth). */
+function findFileRecursive(root, name, maxDepth = 6) {
+ const stack = [{ dir: root, depth: 0 }];
+ while (stack.length > 0) {
+ const { dir, depth } = stack.pop();
+ let entries;
+ try {
+ entries = fs.readdirSync(dir, { withFileTypes: true });
+ } catch {
+ continue;
+ }
+ for (const entry of entries) {
+ const full = path.join(dir, entry.name);
+ if (entry.isFile() && entry.name === name) return full;
+ if (entry.isDirectory() && depth < maxDepth) {
+ stack.push({ dir: full, depth: depth + 1 });
+ }
+ }
+ }
+ return null;
+}
+
+/** Locate the per-response token-usage.jsonl and aggregated agent_usage.json. */
+function locateUsageFiles(root, overrides = {}) {
+ const tokenUsage =
+ overrides.tokenUsage ||
+ firstExisting([
+ path.join(root, 'sandbox/firewall/logs/api-proxy-logs/token-usage.jsonl'),
+ path.join(root, 'sandbox/firewall/audit/api-proxy-logs/token-usage.jsonl'),
+ path.join(root, 'sandbox/firewall-audit-logs/api-proxy-logs/token-usage.jsonl'),
+ path.join(root, 'usage/agent/token_usage.jsonl'),
+ ]) ||
+ findFileRecursive(root, 'token-usage.jsonl');
+
+ const agentUsage =
+ overrides.agentUsage ||
+ firstExisting([
+ path.join(root, 'agent_usage.json'),
+ path.join(root, 'agent_usage.jsonl'),
+ path.join(root, 'usage/agent_usage.json'),
+ path.join(root, 'usage/agent_usage.jsonl'),
+ ]) ||
+ findFileRecursive(root, 'agent_usage.json') ||
+ findFileRecursive(root, 'agent_usage.jsonl');
+
+ return { tokenUsage, agentUsage };
+}
+
+function parseArgs(argv) {
+ const options = { artifactRoot: '/tmp/gh-aw', engine: 'unknown', minRequests: 2 };
+ for (let i = 0; i < argv.length; i += 1) {
+ const arg = argv[i];
+ const next = () => argv[(i += 1)];
+ switch (arg) {
+ case '--artifact-root':
+ options.artifactRoot = next();
+ break;
+ case '--engine':
+ options.engine = next();
+ break;
+ case '--token-usage':
+ options.tokenUsage = next();
+ break;
+ case '--agent-usage':
+ options.agentUsage = next();
+ break;
+ case '--min-requests':
+ options.minRequests = parseInt(next(), 10) || 2;
+ break;
+ default:
+ break;
+ }
+ }
+ return options;
+}
+
+function main(argv) {
+ const options = parseArgs(argv);
+ const { tokenUsage, agentUsage } = locateUsageFiles(options.artifactRoot, {
+ tokenUsage: options.tokenUsage,
+ agentUsage: options.agentUsage,
+ });
+
+ console.log(`Token-usage sanity check (engine: ${options.engine})`);
+ console.log(` artifact root: ${options.artifactRoot}`);
+ console.log(` token-usage.jsonl: ${tokenUsage || '(not found)'}`);
+ console.log(` agent_usage.json: ${agentUsage || '(not found)'}`);
+
+ if (!tokenUsage) {
+ console.error(
+ '::error::Could not locate token-usage.jsonl in the agent artifact. ' +
+ 'The api-proxy did not record token usage.',
+ );
+ return 1;
+ }
+
+ const records = parseJsonl(fs.readFileSync(tokenUsage, 'utf8'));
+ let aggregate = null;
+ if (agentUsage) {
+ const text = fs.readFileSync(agentUsage, 'utf8').trim();
+ // agent_usage may be a pretty-printed JSON object, a single-line JSON
+ // object, or a JSONL file. Try JSON.parse() first so that multi-line
+ // pretty-printed files are handled correctly, then fall back to JSONL.
+ try {
+ aggregate = JSON.parse(text);
+ } catch {
+ const parsed = parseJsonl(text);
+ aggregate = parsed.length > 0 ? parsed[parsed.length - 1] : null;
+ }
+ }
+
+ const { failures, warnings, summary } = evaluateTokenUsage({
+ records,
+ aggregate,
+ minRequests: options.minRequests,
+ });
+
+ console.log(
+ ` totals: responses=${summary.count} input=${summary.input_tokens} ` +
+ `output=${summary.output_tokens} cache_read=${summary.cache_read_tokens} ` +
+ `cache_write=${summary.cache_write_tokens}`,
+ );
+
+ for (const warning of warnings) {
+ console.log(`::warning::${warning}`);
+ }
+ for (const failure of failures) {
+ console.error(`::error::${failure}`);
+ }
+
+ if (failures.length > 0) {
+ console.error(`Token-usage sanity check FAILED with ${failures.length} error(s).`);
+ return 1;
+ }
+ console.log('Token-usage sanity check passed.');
+ return 0;
+}
+
+if (require.main === module) {
+ process.exit(main(process.argv.slice(2)));
+}
+
+module.exports = {
+ parseJsonl,
+ sumTokenUsage,
+ aiCreditsMatch,
+ evaluateTokenUsage,
+ firstExisting,
+ findFileRecursive,
+ locateUsageFiles,
+ parseArgs,
+ main,
+};
diff --git a/scripts/ci/check-token-usage.test.ts b/scripts/ci/check-token-usage.test.ts
new file mode 100644
index 00000000..e0077195
--- /dev/null
+++ b/scripts/ci/check-token-usage.test.ts
@@ -0,0 +1,262 @@
+import * as path from 'path';
+import * as fs from 'fs';
+import * as os from 'os';
+
+// The checker is intentionally zero-dependency CommonJS so the CI job can run it
+// with bare `node`; require it directly here for unit testing.
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+const checker = require('./check-token-usage.js');
+
+const {
+ parseJsonl,
+ sumTokenUsage,
+ aiCreditsMatch,
+ evaluateTokenUsage,
+ findFileRecursive,
+ locateUsageFiles,
+ parseArgs,
+ main,
+} = checker;
+
+/** Build a per-response token-usage record with sensible defaults. */
+function record(overrides: Record = {}) {
+ return {
+ event: 'token_usage',
+ input_tokens: 0,
+ output_tokens: 0,
+ cache_read_tokens: 0,
+ cache_write_tokens: 0,
+ ai_credits_total: 0,
+ ...overrides,
+ };
+}
+
+describe('parseJsonl', () => {
+ it('parses well-formed lines and skips blanks / malformed lines', () => {
+ const text = '{"a":1}\n\n \nnot json\n{"b":2}\n';
+ expect(parseJsonl(text)).toEqual([{ a: 1 }, { b: 2 }]);
+ });
+
+ it('returns an empty array for empty input', () => {
+ expect(parseJsonl('')).toEqual([]);
+ });
+});
+
+describe('sumTokenUsage', () => {
+ it('sums token fields and captures first input / last credits', () => {
+ const totals = sumTokenUsage([
+ record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 0, ai_credits_total: 1.5 }),
+ record({ input_tokens: 200, output_tokens: 20, cache_read_tokens: 150, ai_credits_total: 3.0 }),
+ ]);
+ expect(totals.input_tokens).toBe(300);
+ expect(totals.output_tokens).toBe(30);
+ expect(totals.cache_read_tokens).toBe(150);
+ expect(totals.count).toBe(2);
+ expect(totals.firstInputTokens).toBe(100);
+ expect(totals.lastAiCreditsTotal).toBe(3.0);
+ });
+
+ it('ignores non-usage records in a mixed stream', () => {
+ const totals = sumTokenUsage([
+ record({ input_tokens: 100 }),
+ { event: 'something_else', input_tokens: 9999 },
+ ]);
+ expect(totals.input_tokens).toBe(100);
+ expect(totals.count).toBe(1);
+ });
+});
+
+describe('aiCreditsMatch', () => {
+ it('accepts values within rounding tolerance', () => {
+ expect(aiCreditsMatch(28.632, 28.632)).toBe(true);
+ expect(aiCreditsMatch(417.082, 417.085)).toBe(true);
+ });
+
+ it('rejects clearly different values', () => {
+ expect(aiCreditsMatch(28.632, 30.0)).toBe(false);
+ });
+});
+
+describe('evaluateTokenUsage — internal consistency', () => {
+ it('passes when per-response sums equal the aggregate and cache_read > 0', () => {
+ const records = [
+ record({ input_tokens: 13663, output_tokens: 378, cache_read_tokens: 0, ai_credits_total: 1.2 }),
+ record({ input_tokens: 16601, output_tokens: 124, cache_read_tokens: 10752, ai_credits_total: 4.3 }),
+ ];
+ const aggregate = {
+ input_tokens: 30264,
+ output_tokens: 502,
+ cache_read_tokens: 10752,
+ cache_write_tokens: 0,
+ ambient_context: 13663,
+ ai_credits: 4.3,
+ };
+ const { failures, warnings } = evaluateTokenUsage({ records, aggregate });
+ expect(failures).toEqual([]);
+ expect(warnings).toEqual([]);
+ });
+
+ it('fails when the aggregate disagrees with the per-response sum', () => {
+ const records = [record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50 })];
+ const aggregate = {
+ input_tokens: 999, // wrong
+ output_tokens: 10,
+ cache_read_tokens: 50,
+ cache_write_tokens: 0,
+ };
+ const { failures } = evaluateTokenUsage({ records, aggregate, minRequests: 1 });
+ expect(failures.some((f: string) => f.includes('Inconsistent input_tokens'))).toBe(true);
+ });
+
+ it('fails when the aggregate is missing entirely', () => {
+ const records = [record({ input_tokens: 100, cache_read_tokens: 50 })];
+ const { failures } = evaluateTokenUsage({ records, aggregate: null, minRequests: 1 });
+ expect(failures.some((f: string) => f.includes('Aggregated agent_usage'))).toBe(true);
+ });
+
+ it('warns (does not fail) on ai_credits / ambient_context drift', () => {
+ const records = [
+ record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50, ai_credits_total: 2.0 }),
+ record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50, ai_credits_total: 5.0 }),
+ ];
+ const aggregate = {
+ input_tokens: 200,
+ output_tokens: 20,
+ cache_read_tokens: 100,
+ cache_write_tokens: 0,
+ ambient_context: 999, // mismatch -> warning
+ ai_credits: 42.0, // mismatch -> warning
+ };
+ const { failures, warnings } = evaluateTokenUsage({ records, aggregate });
+ expect(failures).toEqual([]);
+ expect(warnings.some((w: string) => w.includes('ai_credits drift'))).toBe(true);
+ expect(warnings.some((w: string) => w.includes('ambient_context'))).toBe(true);
+ });
+});
+
+describe('evaluateTokenUsage — cache-read red flag', () => {
+ it('hard-fails when cache_read is 0 across multiple responses (the bug)', () => {
+ // Mirrors gh-aw codex run 27784259295/27784201719: consistent totals, zero cache reads.
+ const records = [
+ record({ input_tokens: 13663, output_tokens: 378 }),
+ record({ input_tokens: 26000, output_tokens: 200 }),
+ ];
+ const aggregate = {
+ input_tokens: 39663,
+ output_tokens: 578,
+ cache_read_tokens: 0,
+ cache_write_tokens: 0,
+ };
+ const { failures } = evaluateTokenUsage({ records, aggregate });
+ expect(failures.some((f: string) => f.includes('cache_read_tokens is 0'))).toBe(true);
+ });
+
+ it('only warns about cache_read==0 when below the min-requests threshold', () => {
+ const records = [record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 0 })];
+ const aggregate = {
+ input_tokens: 100,
+ output_tokens: 10,
+ cache_read_tokens: 0,
+ cache_write_tokens: 0,
+ };
+ const { failures, warnings } = evaluateTokenUsage({ records, aggregate, minRequests: 2 });
+ expect(failures).toEqual([]);
+ expect(warnings.some((w: string) => w.includes('too short to assert'))).toBe(true);
+ });
+
+ it('fails when there are no usage records at all', () => {
+ const { failures } = evaluateTokenUsage({ records: [], aggregate: null });
+ expect(failures.some((f: string) => f.includes('No token-usage records'))).toBe(true);
+ });
+});
+
+describe('locateUsageFiles', () => {
+ it('honors explicit overrides without touching the filesystem', () => {
+ const { tokenUsage, agentUsage } = locateUsageFiles('/nonexistent', {
+ tokenUsage: '/x/token-usage.jsonl',
+ agentUsage: '/x/agent_usage.json',
+ });
+ expect(tokenUsage).toBe('/x/token-usage.jsonl');
+ expect(agentUsage).toBe('/x/agent_usage.json');
+ });
+
+ it('resolves the canonical api-proxy path inside a real fixture tree', () => {
+ // The codex artifact downloaded during development is not present in CI, so
+ // this only asserts the candidate-path logic via overrides above; here we
+ // simply confirm a missing tree yields nulls rather than throwing.
+ const { tokenUsage, agentUsage } = locateUsageFiles(path.join('/tmp', 'definitely-missing-xyz'));
+ expect(tokenUsage).toBeNull();
+ expect(agentUsage).toBeNull();
+ });
+});
+
+describe('parseArgs', () => {
+ it('parses flags with sensible defaults', () => {
+ const opts = parseArgs(['--artifact-root', '/tmp/x', '--engine', 'copilot', '--min-requests', '5']);
+ expect(opts.artifactRoot).toBe('/tmp/x');
+ expect(opts.engine).toBe('copilot');
+ expect(opts.minRequests).toBe(5);
+ });
+
+ it('defaults min-requests to 2 and engine to unknown', () => {
+ const opts = parseArgs([]);
+ expect(opts.minRequests).toBe(2);
+ expect(opts.engine).toBe('unknown');
+ expect(opts.artifactRoot).toBe('/tmp/gh-aw');
+ });
+});
+
+describe('findFileRecursive', () => {
+ it('finds agent_usage.jsonl nested under a subdirectory', () => {
+ const root = fs.mkdtempSync(path.join(os.tmpdir(), 'ck-test-'));
+ try {
+ const sub = path.join(root, 'deep', 'subdir');
+ fs.mkdirSync(sub, { recursive: true });
+ const target = path.join(sub, 'agent_usage.jsonl');
+ fs.writeFileSync(target, '{"input_tokens":1}\n');
+ expect(findFileRecursive(root, 'agent_usage.jsonl')).toBe(target);
+ } finally {
+ fs.rmSync(root, { recursive: true, force: true });
+ }
+ });
+
+ it('returns null when the file is absent', () => {
+ expect(findFileRecursive('/nonexistent-xyz', 'agent_usage.jsonl')).toBeNull();
+ });
+});
+
+describe('main — pretty-printed agent_usage.json', () => {
+ it('parses a multi-line pretty-printed JSON aggregate without error', () => {
+ const root = fs.mkdtempSync(path.join(os.tmpdir(), 'ck-test-'));
+ try {
+ const logsDir = path.join(root, 'sandbox', 'firewall', 'audit', 'api-proxy-logs');
+ fs.mkdirSync(logsDir, { recursive: true });
+
+ // Write one matching token-usage record.
+ fs.writeFileSync(
+ path.join(logsDir, 'token-usage.jsonl'),
+ JSON.stringify({
+ event: 'token_usage',
+ input_tokens: 100,
+ output_tokens: 10,
+ cache_read_tokens: 50,
+ cache_write_tokens: 5,
+ }) + '\n',
+ );
+
+ // Write the aggregate as pretty-printed JSON (multi-line).
+ const aggregate = {
+ input_tokens: 100,
+ output_tokens: 10,
+ cache_read_tokens: 50,
+ cache_write_tokens: 5,
+ };
+ fs.writeFileSync(path.join(root, 'agent_usage.json'), JSON.stringify(aggregate, null, 2));
+
+ const exitCode = main(['--artifact-root', root, '--engine', 'test', '--min-requests', '1']);
+ expect(exitCode).toBe(0);
+ } finally {
+ fs.rmSync(root, { recursive: true, force: true });
+ }
+ });
+});