From 6cf652ca69e005891bfefae6343698030046183f Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Thu, 18 Jun 2026 14:13:05 -0700 Subject: [PATCH 1/2] ci(smoke): add token-usage sanity checks to smoke workflows Add a verify_token_usage job to smoke-copilot, smoke-claude, and smoke-codex that runs after the agent job on the downloaded agent artifact and fails the workflow when token accounting looks wrong. The checker (scripts/ci/check-token-usage.js) enforces two invariants: - Internal consistency: the sum of per-response records in token-usage.jsonl must exactly equal the aggregated agent_usage.json (input/output/cache_read/cache_write). This is engine-independent. - cache_read_tokens must not be 0 across multiple responses, which is the symptom of the cached-token normalization bug. ai_credits/ambient_context drift is reported as warnings only. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/smoke-claude.lock.yml | 72 +++-- .github/workflows/smoke-claude.md | 19 ++ .github/workflows/smoke-codex.lock.yml | 33 ++- .github/workflows/smoke-codex.md | 19 ++ .github/workflows/smoke-copilot.lock.yml | 33 ++- .github/workflows/smoke-copilot.md | 19 ++ scripts/ci/check-token-usage.js | 337 +++++++++++++++++++++++ scripts/ci/check-token-usage.test.ts | 203 ++++++++++++++ 8 files changed, 713 insertions(+), 22 deletions(-) create mode 100644 scripts/ci/check-token-usage.js create mode 100644 scripts/ci/check-token-usage.test.ts diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml index a4490b81..3a49b308 100644 --- a/.github/workflows/smoke-claude.lock.yml +++ b/.github/workflows/smoke-claude.lock.yml @@ -1,7 +1,5 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"a08c320bc1b492fd4827ebbdc1fff37ca664404dc3f30ca87b38733896486989","body_hash":"6e05820005e43b82d8112bc60ced8e13336596ae671ecac69e6c5ac691485b71","compiler_version":"v0.79.8","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}} -# gh-aw-manifest: {"version":1,"secrets":["ANTHROPIC_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"c0338fef4749d08c21f8f975fb0e37efa17dda47","version":"v0.79.8"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"}]} -# This file was automatically generated by gh-aw (v0.79.8). DO NOT EDIT. To debug this workflow, load the skill at https://github.com/github/gh-aw/blob/main/debug.md -# +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"1931d05a82aa65b2b1d5af50c9dcde1453044c61ac1c0718031eb2eca5c6b046","body_hash":"6e05820005e43b82d8112bc60ced8e13336596ae671ecac69e6c5ac691485b71","compiler_version":"v0.79.6","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}} +# gh-aw-manifest: {"version":1,"secrets":["ANTHROPIC_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"}]} # ___ _ _ # / _ \ | | (_) # | |_| | __ _ ___ _ __ | |_ _ ___ @@ -16,6 +14,7 @@ # \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ # \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ # +# This file was automatically generated by gh-aw (v0.79.6). DO NOT EDIT. # # To update this file, edit the corresponding .md file and run: # gh aw compile @@ -37,7 +36,7 @@ # - actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 # - actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 # - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 -# - github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 +# - github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 # # Container images used: # - ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6 @@ -90,9 +89,9 @@ jobs: comment_id: ${{ steps.add-comment.outputs.comment-id }} comment_repo: ${{ steps.add-comment.outputs.comment-repo }} comment_url: ${{ steps.add-comment.outputs.comment-url }} - daily_ai_credits_exceeded: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_ai_credits_exceeded == 'true' }} - daily_ai_credits_threshold: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_ai_credits_threshold || '' }} - daily_ai_credits_total_effective_tokens: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_ai_credits_total_effective_tokens || '' }} + daily_effective_workflow_exceeded: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_exceeded == 'true' }} + daily_effective_workflow_threshold: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_threshold || '' }} + daily_effective_workflow_total_effective_tokens: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_total_effective_tokens || '' }} engine_id: ${{ steps.generate_aw_info.outputs.engine_id }} label_command: ${{ steps.get_trigger_label.outputs.label_name }} lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }} @@ -105,7 +104,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 + uses: github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -124,7 +123,7 @@ jobs: GH_AW_INFO_MODEL: "claude-haiku-4-5" GH_AW_INFO_VERSION: "2.1.168" GH_AW_INFO_AGENT_VERSION: "2.1.168" - GH_AW_INFO_CLI_VERSION: "v0.79.8" + GH_AW_INFO_CLI_VERSION: "v0.79.6" GH_AW_INFO_WORKFLOW_NAME: "Smoke Claude" GH_AW_INFO_EXPERIMENTAL: "false" GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" @@ -205,7 +204,7 @@ jobs: - name: Check compile-agentic version uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 env: - GH_AW_COMPILED_VERSION: "v0.79.8" + GH_AW_COMPILED_VERSION: "v0.79.6" with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -297,6 +296,7 @@ jobs: include-hidden-files: true path: | /tmp/gh-aw/aw_info.json + /tmp/gh-aw/model_multipliers.json /tmp/gh-aw/models.json /tmp/gh-aw/aw-prompts/prompt.txt /tmp/gh-aw/aw-prompts/prompt-template.txt @@ -310,7 +310,7 @@ jobs: agent: needs: activation - if: needs.activation.outputs.daily_ai_credits_exceeded != 'true' + if: needs.activation.outputs.daily_effective_workflow_exceeded != 'true' runs-on: ubuntu-latest permissions: contents: read @@ -344,7 +344,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 + uses: github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -790,6 +790,7 @@ jobs: (umask 177 && touch /tmp/gh-aw/agent-stdio.log) GH_AW_MAX_AI_CREDITS="${{ vars.GH_AW_DEFAULT_MAX_AI_CREDITS || '1000' }}" printf '%s\n' "{\"\$schema\":\"https://github.com/github/gh-aw-firewall/releases/download/v0.27.2/awf-config.schema.json\",\"network\":{\"allowDomains\":[\"*.githubusercontent.com\",\"anthropic.com\",\"api.anthropic.com\",\"api.github.com\",\"api.snapcraft.io\",\"archive.ubuntu.com\",\"azure.archive.ubuntu.com\",\"cdn.playwright.dev\",\"codeload.github.com\",\"crl.geotrust.com\",\"crl.globalsign.com\",\"crl.identrust.com\",\"crl.sectigo.com\",\"crl.thawte.com\",\"crl.usertrust.com\",\"crl.verisign.com\",\"crl3.digicert.com\",\"crl4.digicert.com\",\"crls.ssl.com\",\"files.pythonhosted.org\",\"ghcr.io\",\"github-cloud.githubusercontent.com\",\"github-cloud.s3.amazonaws.com\",\"github.com\",\"host.docker.internal\",\"json-schema.org\",\"json.schemastore.org\",\"keyserver.ubuntu.com\",\"lfs.github.com\",\"objects.githubusercontent.com\",\"ocsp.digicert.com\",\"ocsp.geotrust.com\",\"ocsp.globalsign.com\",\"ocsp.identrust.com\",\"ocsp.sectigo.com\",\"ocsp.ssl.com\",\"ocsp.thawte.com\",\"ocsp.usertrust.com\",\"ocsp.verisign.com\",\"packagecloud.io\",\"packages.cloud.google.com\",\"packages.microsoft.com\",\"playwright.download.prss.microsoft.com\",\"ppa.launchpad.net\",\"pypi.org\",\"raw.githubusercontent.com\",\"registry.npmjs.org\",\"s.symcb.com\",\"s.symcd.com\",\"security.ubuntu.com\",\"sentry.io\",\"statsig.anthropic.com\",\"ts-crl.ws.symantec.com\",\"ts-ocsp.ws.symantec.com\",\"www.googleapis.com\"]},\"apiProxy\":{\"enabled\":true,\"enableTokenSteering\":true,\"maxRuns\":2,\"maxAiCredits\":${GH_AW_MAX_AI_CREDITS},\"models\":{\"agent\":[\"sonnet-6x\",\"gpt-5.4\",\"gpt-5.3\",\"gemini-pro\",\"any\"],\"antigravity\":[\"copilot/antigravity*\",\"google/antigravity*\",\"gemini/antigravity*\"],\"any\":[\"copilot/*\",\"anthropic/*\",\"openai/*\",\"google/*\",\"gemini/*\"],\"claude\":[\"agent\"],\"codex\":[\"agent\"],\"coding\":[\"copilot/gpt-5*codex*\",\"openai/gpt-5*codex*\",\"gpt-5-codex\"],\"computer-use\":[\"copilot/*computer-use*\",\"google/*computer-use*\",\"gemini/*computer-use*\",\"openai/*computer-use*\"],\"copilot\":[\"agent\"],\"deep-research\":[\"copilot/deep-research*\",\"copilot/o3-deep-research*\",\"copilot/o4-mini-deep-research*\",\"google/deep-research*\",\"gemini/deep-research*\",\"openai/o3-deep-research*\",\"openai/o4-mini-deep-research*\"],\"gemini\":[\"agent\"],\"gemini-3-flash\":[\"copilot/gemini-3*flash*\",\"google/gemini-3*flash*\",\"gemini/gemini-3*flash*\"],\"gemini-3-pro\":[\"copilot/gemini-3*pro*\",\"google/gemini-3*pro*\",\"google/nano-banana*\",\"gemini/gemini-3*pro*\"],\"gemini-3.1-flash\":[\"copilot/gemini-3.1*flash*\",\"google/gemini-3.1*flash*\",\"gemini/gemini-3.1*flash*\"],\"gemini-3.1-pro\":[\"copilot/gemini-3.1*pro*\",\"google/gemini-3.1*pro*\",\"gemini/gemini-3.1*pro*\"],\"gemini-3.5-flash\":[\"copilot/gemini-3.5*flash*\",\"google/gemini-3.5*flash*\",\"gemini/gemini-3.5*flash*\"],\"gemini-flash\":[\"copilot/gemini-*flash*\",\"google/gemini-*flash*\",\"gemini/gemini-*flash*\"],\"gemini-flash-lite\":[\"copilot/gemini-*flash*lite*\",\"google/gemini-*flash*lite*\",\"gemini/gemini-*flash*lite*\"],\"gemini-pro\":[\"copilot/gemini-*pro*\",\"google/gemini-*pro*\",\"gemini/gemini-*pro*\"],\"gemma\":[\"copilot/gemma*\",\"google/gemma*\",\"gemini/gemma*\"],\"gpt-5\":[\"copilot/gpt-5*\",\"openai/gpt-5*\"],\"gpt-5-codex\":[\"copilot/gpt-5*codex*\",\"openai/gpt-5*codex*\"],\"gpt-5-mini\":[\"copilot/gpt-5*mini*\",\"openai/gpt-5*mini*\"],\"gpt-5-nano\":[\"copilot/gpt-5*nano*\",\"openai/gpt-5*nano*\"],\"gpt-5-pro\":[\"copilot/gpt-5*pro*\",\"openai/gpt-5*pro*\"],\"gpt-5.2\":[\"copilot/gpt-5.2*\",\"openai/gpt-5.2*\"],\"gpt-5.3\":[\"copilot/gpt-5.3*\",\"openai/gpt-5.3*\"],\"gpt-5.4\":[\"copilot/gpt-5.4*\",\"openai/gpt-5.4*\"],\"gpt-5.5\":[\"copilot/gpt-5.5*\",\"openai/gpt-5.5*\"],\"haiku\":[\"copilot/*haiku*\",\"anthropic/*haiku*\"],\"large\":[\"sonnet\",\"gpt-5-pro\",\"gpt-5\",\"gemini-pro\"],\"mai-code\":[\"copilot/MAI-Code*\",\"copilot/mai-code*\",\"openai/MAI-Code*\"],\"mini\":[\"haiku\",\"gpt-5-mini\",\"gpt-5-nano\",\"gemini-flash-lite\"],\"nano-banana\":[\"copilot/nano-banana*\",\"google/nano-banana*\",\"gemini/nano-banana*\"],\"opus\":[\"copilot/*opus*\",\"anthropic/*opus*\"],\"opusplan\":[\"opus?effort=high\"],\"reasoning\":[\"copilot/o1*\",\"copilot/o3*\",\"copilot/o4*\",\"openai/o1*\",\"openai/o3*\",\"openai/o4*\"],\"robotics\":[\"copilot/*robotics*\",\"google/*robotics*\",\"gemini/*robotics*\"],\"small\":[\"mini\"],\"small-agent\":[\"haiku\",\"gpt-5-mini\",\"gemini-flash\"],\"sonnet\":[\"copilot/*sonnet*\",\"anthropic/*sonnet*\"],\"sonnet-6x\":[\"copilot/*sonnet-4.5*\",\"copilot/*sonnet-4.6*\",\"copilot/*sonnet-4-5-*\",\"anthropic/*sonnet-4-5-*\",\"copilot/*sonnet-4-6*\",\"anthropic/*sonnet-4-6*\"],\"summarization\":[\"haiku\",\"gpt-5-mini\",\"gemini-flash-lite\",\"mini\"],\"vision\":[\"copilot/gemini-*image*\",\"gemini/gemini-*image*\",\"copilot/gemini-*flash*\",\"gemini/gemini-*flash*\"]}},\"container\":{\"imageTag\":\"0.27.2,squid=sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591,agent=sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6,api-proxy=sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4,cli-proxy=sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0\"}}" > "${RUNNER_TEMP}/gh-aw/awf-config.json" + GH_AW_MODEL_MULTIPLIERS_PATH="/tmp/gh-aw/model_multipliers.json" node "${RUNNER_TEMP}/gh-aw/actions/merge_awf_model_multipliers.cjs" cp "${RUNNER_TEMP}/gh-aw/awf-config.json" /tmp/gh-aw/awf-config.json export GH_AW_MODELS_JSON_PATH="/tmp/gh-aw/models.json" GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="" @@ -822,7 +823,7 @@ jobs: GH_AW_PHASE: agent GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - GH_AW_VERSION: v0.79.8 + GH_AW_VERSION: v0.79.6 GITHUB_AW: true GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md GITHUB_WORKSPACE: ${{ github.workspace }} @@ -995,9 +996,10 @@ jobs: - activation - agent - safe_outputs + - verify_token_usage if: > always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' || - needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_ai_credits_exceeded == 'true') + needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_effective_workflow_exceeded == 'true') runs-on: ubuntu-slim permissions: contents: read @@ -1016,7 +1018,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 + uses: github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -1154,9 +1156,9 @@ jobs: GH_AW_ENGINE_API_HOSTS: "api.anthropic.com" GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} GH_AW_STALE_LOCK_FILE_FAILED: ${{ needs.activation.outputs.stale_lock_file_failed }} - GH_AW_DAILY_AI_CREDITS_EXCEEDED: ${{ needs.activation.outputs.daily_ai_credits_exceeded }} - GH_AW_DAILY_AI_CREDITS_TOTAL_EFFECTIVE_TOKENS: ${{ needs.activation.outputs.daily_ai_credits_total_effective_tokens }} - GH_AW_DAILY_AI_CREDITS_THRESHOLD: ${{ needs.activation.outputs.daily_ai_credits_threshold }} + GH_AW_DAILY_EFFECTIVE_WORKFLOW_EXCEEDED: ${{ needs.activation.outputs.daily_effective_workflow_exceeded }} + GH_AW_DAILY_EFFECTIVE_WORKFLOW_TOTAL_EFFECTIVE_TOKENS: ${{ needs.activation.outputs.daily_effective_workflow_total_effective_tokens }} + GH_AW_DAILY_EFFECTIVE_WORKFLOW_THRESHOLD: ${{ needs.activation.outputs.daily_effective_workflow_threshold }} GH_AW_SAFE_OUTPUT_MESSAGES: "{\"runSuccess\":\"✅ [{workflow_name}]({run_url}) passed\",\"runFailure\":\"❌ [{workflow_name}]({run_url}) {status}\"}" GH_AW_GROUP_REPORTS: "false" GH_AW_FAILURE_REPORT_AS_ISSUE: "true" @@ -1226,7 +1228,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 + uses: github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -1289,3 +1291,33 @@ jobs: /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Configure GH_HOST for enterprise compatibility + id: ghes-host-config + shell: bash + # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input. + run: | + # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct + # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. + GH_HOST="${GITHUB_SERVER_URL#https://}" + GH_HOST="${GH_HOST#http://}" + echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV" + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine claude + diff --git a/.github/workflows/smoke-claude.md b/.github/workflows/smoke-claude.md index 7bb75e23..43f3eb94 100644 --- a/.github/workflows/smoke-claude.md +++ b/.github/workflows/smoke-claude.md @@ -23,6 +23,25 @@ sandbox: mcp: version: v0.3.1 strict: false +jobs: + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine claude tools: bash: - bash diff --git a/.github/workflows/smoke-codex.lock.yml b/.github/workflows/smoke-codex.lock.yml index 902b00f0..def5e2fc 100644 --- a/.github/workflows/smoke-codex.lock.yml +++ b/.github/workflows/smoke-codex.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"c0467bdd469d554b0261f696ece56b95cd24d5eb263b2767d976f5db7af45a50","body_hash":"988c8ab731a331e33e5751aa46982c3ee5adc6dbefb2b18236d88854467c3d2b","compiler_version":"v0.79.6","agent_id":"codex","agent_model":"gpt-5.4","engine_versions":{"codex":"0.137.0"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"9971a0badaf621a6194316298030053110c15cc5683e20fc23b1c245fbb42bd8","body_hash":"988c8ab731a331e33e5751aa46982c3ee5adc6dbefb2b18236d88854467c3d2b","compiler_version":"v0.79.6","agent_id":"codex","agent_model":"gpt-5.4","engine_versions":{"codex":"0.137.0"}} # gh-aw-manifest: {"version":1,"secrets":["CODEX_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN","OPENAI_API_KEY"],"actions":[{"repo":"actions/cache","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.2","digest":"sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0","pinned_image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.2@sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:latest","digest":"sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa","pinned_image":"ghcr.io/github/gh-aw-mcpg:latest@sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"mcr.microsoft.com/playwright/mcp","digest":"sha256:7b82f29c6ef83480a97f612d53ac3fd5f30a32df3fea1e06923d4204d3532bb2","pinned_image":"mcr.microsoft.com/playwright/mcp@sha256:7b82f29c6ef83480a97f612d53ac3fd5f30a32df3fea1e06923d4204d3532bb2"}]} # ___ _ _ # / _ \ | | (_) @@ -1283,6 +1283,7 @@ jobs: - activation - agent - safe_outputs + - verify_token_usage if: > always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' || needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_effective_workflow_exceeded == 'true') @@ -1580,3 +1581,33 @@ jobs: /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Configure GH_HOST for enterprise compatibility + id: ghes-host-config + shell: bash + # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input. + run: | + # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct + # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. + GH_HOST="${GITHUB_SERVER_URL#https://}" + GH_HOST="${GH_HOST#http://}" + echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV" + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine codex + diff --git a/.github/workflows/smoke-codex.md b/.github/workflows/smoke-codex.md index 85a2e32e..2a3d7966 100644 --- a/.github/workflows/smoke-codex.md +++ b/.github/workflows/smoke-codex.md @@ -22,6 +22,25 @@ sandbox: mcp: version: latest strict: false +jobs: + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine codex imports: - shared/gh.md - shared/reporting.md diff --git a/.github/workflows/smoke-copilot.lock.yml b/.github/workflows/smoke-copilot.lock.yml index c174f40c..825e757d 100644 --- a/.github/workflows/smoke-copilot.lock.yml +++ b/.github/workflows/smoke-copilot.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"faa79a615d43a6ef6fc6323ee7fa0da8d4ee0330fa26ff718fda20030e74488d","body_hash":"d02de9958e5f3cbf119d4d3b7bd2a3b84afec98bad520e813e0ce2c465973fea","compiler_version":"v0.79.6","agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"d1fb458bdf80ba419bd2b97870372b0db05bfd15a9ee08d6b5f7978e788c7e12","body_hash":"d02de9958e5f3cbf119d4d3b7bd2a3b84afec98bad520e813e0ce2c465973fea","compiler_version":"v0.79.6","agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} # gh-aw-manifest: {"version":1,"secrets":["GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"}]} # ___ _ _ # / _ \ | | (_) @@ -1078,6 +1078,7 @@ jobs: - activation - agent - safe_outputs + - verify_token_usage if: > always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' || needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_effective_workflow_exceeded == 'true') @@ -1372,3 +1373,33 @@ jobs: /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Configure GH_HOST for enterprise compatibility + id: ghes-host-config + shell: bash + # zizmor: ignore[github-env] - GITHUB_SERVER_URL is set by GitHub Actions, not user input. + run: | + # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct + # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op. + GH_HOST="${GITHUB_SERVER_URL#https://}" + GH_HOST="${GH_HOST#http://}" + echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV" + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine copilot + diff --git a/.github/workflows/smoke-copilot.md b/.github/workflows/smoke-copilot.md index e01e4648..c1a8a95d 100644 --- a/.github/workflows/smoke-copilot.md +++ b/.github/workflows/smoke-copilot.md @@ -45,6 +45,25 @@ sandbox: mcp: version: v0.3.1 strict: false +jobs: + verify_token_usage: + needs: agent + if: always() && needs.agent.result != 'skipped' && needs.agent.result != 'cancelled' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download agent artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent + path: /tmp/gh-aw-agent + - name: Token-usage sanity check + run: node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw-agent --engine copilot steps: - name: Pre-compute smoke test data id: smoke-data diff --git a/scripts/ci/check-token-usage.js b/scripts/ci/check-token-usage.js new file mode 100644 index 00000000..d9175e04 --- /dev/null +++ b/scripts/ci/check-token-usage.js @@ -0,0 +1,337 @@ +#!/usr/bin/env node +/** + * Token-usage sanity checker for the smoke workflows. + * + * Runs after the agent job, against the downloaded `agent` artifact, and fails + * the workflow when the api-proxy token accounting looks wrong. Two independent + * checks are performed, both engine-independent: + * + * 1. Internal consistency — the per-response records in + * `token-usage.jsonl` (written by the api-proxy) must sum exactly to the + * aggregated `agent_usage.json` summary that gh-aw derives from them. Any + * drift means a record was dropped, double-counted, or mis-aggregated. + * + * 2. Cache-read red flag — a real multi-request agentic run re-sends a + * growing context every turn, so the provider reports prompt-cache reads. + * A total `cache_read_tokens` of 0 across multiple requests indicates the + * api-proxy silently dropped cached tokens (the class of bug fixed in + * PR #5262 / issue #5203), so it is treated as a hard failure. + * + * The checker is intentionally zero-dependency CommonJS so the CI job only + * needs `node` plus the downloaded artifact — no `npm ci` / `tsx`. + * + * Usage: + * node scripts/ci/check-token-usage.js --artifact-root /tmp/gh-aw --engine copilot + * + * Flags: + * --artifact-root Root of the downloaded agent artifact (default: /tmp/gh-aw) + * --engine Engine id, for diagnostics only (copilot|claude|codex) + * --token-usage Explicit path to the per-response token-usage.jsonl + * --agent-usage Explicit path to the aggregated agent_usage.json + * --min-requests Minimum record count before cache_read==0 is fatal (default: 2) + */ + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +const TOKEN_FIELDS = ['input_tokens', 'output_tokens', 'cache_read_tokens', 'cache_write_tokens']; + +/** Parse JSONL text into an array of objects, skipping blank / malformed lines. */ +function parseJsonl(text) { + const records = []; + for (const rawLine of text.split('\n')) { + const line = rawLine.trim(); + if (!line) continue; + try { + records.push(JSON.parse(line)); + } catch { + // Tolerate partial / non-JSON lines (e.g. truncated final write). + } + } + return records; +} + +/** Sum the per-response token-usage records into a single aggregate. */ +function sumTokenUsage(records) { + const totals = { + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + count: 0, + firstInputTokens: null, + lastAiCreditsTotal: null, + }; + + for (const record of records) { + if (record == null || typeof record !== 'object') continue; + // Only count actual usage records (defensive against mixed log streams). + if (record.event && record.event !== 'token_usage') continue; + totals.count += 1; + for (const field of TOKEN_FIELDS) { + const value = record[field]; + if (typeof value === 'number' && Number.isFinite(value)) { + totals[field] += value; + } + } + if (totals.firstInputTokens === null && typeof record.input_tokens === 'number') { + totals.firstInputTokens = record.input_tokens; + } + if (typeof record.ai_credits_total === 'number' && Number.isFinite(record.ai_credits_total)) { + totals.lastAiCreditsTotal = record.ai_credits_total; + } + } + + return totals; +} + +/** True when two AI-credit figures agree within rounding noise. */ +function aiCreditsMatch(a, b) { + if (typeof a !== 'number' || typeof b !== 'number') return false; + const tolerance = Math.max(0.01, Math.abs(b) * 0.005); + return Math.abs(a - b) <= tolerance; +} + +/** + * Evaluate both checks. Returns { failures: string[], warnings: string[], summary }. + * Pure function: takes already-parsed inputs so it is trivially unit-testable. + */ +function evaluateTokenUsage({ records, aggregate, minRequests = 2 }) { + const failures = []; + const warnings = []; + const totals = sumTokenUsage(records); + + if (totals.count === 0) { + failures.push( + 'No token-usage records found. The agent produced no model requests, ' + + 'or the api-proxy failed to record usage.', + ); + return { failures, warnings, summary: totals }; + } + + // ── Check 1: internal consistency (per-response sum === aggregate) ── + if (!aggregate || typeof aggregate !== 'object') { + failures.push( + 'Aggregated agent_usage summary is missing or unreadable, so per-response ' + + 'totals cannot be verified.', + ); + } else { + for (const field of TOKEN_FIELDS) { + const summed = totals[field]; + const reported = typeof aggregate[field] === 'number' ? aggregate[field] : undefined; + if (reported === undefined) { + failures.push(`agent_usage is missing "${field}" — cannot verify consistency.`); + continue; + } + if (summed !== reported) { + failures.push( + `Inconsistent ${field}: token-usage.jsonl sums to ${summed} across ` + + `${totals.count} responses, but agent_usage reports ${reported} ` + + `(delta ${summed - reported}).`, + ); + } + } + + // ai_credits and ambient_context are derived figures: surface drift as a + // warning rather than failing the build on float-rounding differences. + if (typeof aggregate.ai_credits === 'number' && totals.lastAiCreditsTotal !== null) { + if (!aiCreditsMatch(totals.lastAiCreditsTotal, aggregate.ai_credits)) { + warnings.push( + `ai_credits drift: last ai_credits_total is ${totals.lastAiCreditsTotal}, ` + + `agent_usage reports ${aggregate.ai_credits}.`, + ); + } + } + if ( + typeof aggregate.ambient_context === 'number' && + totals.firstInputTokens !== null && + aggregate.ambient_context !== totals.firstInputTokens + ) { + warnings.push( + `ambient_context (${aggregate.ambient_context}) does not match the first ` + + `response input_tokens (${totals.firstInputTokens}).`, + ); + } + } + + // ── Check 2: cache-read red flag ── + if (totals.cache_read_tokens === 0) { + if (totals.count >= minRequests) { + failures.push( + `cache_read_tokens is 0 across ${totals.count} responses. A multi-request ` + + 'agentic run should report prompt-cache reads; zero almost always means ' + + 'the api-proxy dropped cached tokens (cf. issue #5203 / PR #5262).', + ); + } else { + warnings.push( + `cache_read_tokens is 0, but only ${totals.count} response(s) were recorded ` + + `(< ${minRequests}); too short to assert prompt caching.`, + ); + } + } + + return { failures, warnings, summary: totals }; +} + +/** Return the first path in `candidates` that exists on disk, else null. */ +function firstExisting(candidates) { + for (const candidate of candidates) { + try { + if (candidate && fs.existsSync(candidate) && fs.statSync(candidate).isFile()) { + return candidate; + } + } catch { + // ignore and keep looking + } + } + return null; +} + +/** Recursively find the first file named `name` under `root` (bounded depth). */ +function findFileRecursive(root, name, maxDepth = 6) { + const stack = [{ dir: root, depth: 0 }]; + while (stack.length > 0) { + const { dir, depth } = stack.pop(); + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + continue; + } + for (const entry of entries) { + const full = path.join(dir, entry.name); + if (entry.isFile() && entry.name === name) return full; + if (entry.isDirectory() && depth < maxDepth) { + stack.push({ dir: full, depth: depth + 1 }); + } + } + } + return null; +} + +/** Locate the per-response token-usage.jsonl and aggregated agent_usage.json. */ +function locateUsageFiles(root, overrides = {}) { + const tokenUsage = + overrides.tokenUsage || + firstExisting([ + path.join(root, 'sandbox/firewall/logs/api-proxy-logs/token-usage.jsonl'), + path.join(root, 'sandbox/firewall/audit/api-proxy-logs/token-usage.jsonl'), + path.join(root, 'sandbox/firewall-audit-logs/api-proxy-logs/token-usage.jsonl'), + path.join(root, 'usage/agent/token_usage.jsonl'), + ]) || + findFileRecursive(root, 'token-usage.jsonl'); + + const agentUsage = + overrides.agentUsage || + firstExisting([ + path.join(root, 'agent_usage.json'), + path.join(root, 'agent_usage.jsonl'), + path.join(root, 'usage/agent_usage.json'), + path.join(root, 'usage/agent_usage.jsonl'), + ]) || + findFileRecursive(root, 'agent_usage.json'); + + return { tokenUsage, agentUsage }; +} + +function parseArgs(argv) { + const options = { artifactRoot: '/tmp/gh-aw', engine: 'unknown', minRequests: 2 }; + for (let i = 0; i < argv.length; i += 1) { + const arg = argv[i]; + const next = () => argv[(i += 1)]; + switch (arg) { + case '--artifact-root': + options.artifactRoot = next(); + break; + case '--engine': + options.engine = next(); + break; + case '--token-usage': + options.tokenUsage = next(); + break; + case '--agent-usage': + options.agentUsage = next(); + break; + case '--min-requests': + options.minRequests = parseInt(next(), 10) || 2; + break; + default: + break; + } + } + return options; +} + +function main(argv) { + const options = parseArgs(argv); + const { tokenUsage, agentUsage } = locateUsageFiles(options.artifactRoot, { + tokenUsage: options.tokenUsage, + agentUsage: options.agentUsage, + }); + + console.log(`Token-usage sanity check (engine: ${options.engine})`); + console.log(` artifact root: ${options.artifactRoot}`); + console.log(` token-usage.jsonl: ${tokenUsage || '(not found)'}`); + console.log(` agent_usage.json: ${agentUsage || '(not found)'}`); + + if (!tokenUsage) { + console.error( + '::error::Could not locate token-usage.jsonl in the agent artifact. ' + + 'The api-proxy did not record token usage.', + ); + return 1; + } + + const records = parseJsonl(fs.readFileSync(tokenUsage, 'utf8')); + let aggregate = null; + if (agentUsage) { + const text = fs.readFileSync(agentUsage, 'utf8').trim(); + // agent_usage may be a single JSON object or a one-line JSONL file. + const parsed = parseJsonl(text); + aggregate = parsed.length > 0 ? parsed[parsed.length - 1] : null; + } + + const { failures, warnings, summary } = evaluateTokenUsage({ + records, + aggregate, + minRequests: options.minRequests, + }); + + console.log( + ` totals: responses=${summary.count} input=${summary.input_tokens} ` + + `output=${summary.output_tokens} cache_read=${summary.cache_read_tokens} ` + + `cache_write=${summary.cache_write_tokens}`, + ); + + for (const warning of warnings) { + console.log(`::warning::${warning}`); + } + for (const failure of failures) { + console.error(`::error::${failure}`); + } + + if (failures.length > 0) { + console.error(`Token-usage sanity check FAILED with ${failures.length} error(s).`); + return 1; + } + console.log('Token-usage sanity check passed.'); + return 0; +} + +if (require.main === module) { + process.exit(main(process.argv.slice(2))); +} + +module.exports = { + parseJsonl, + sumTokenUsage, + aiCreditsMatch, + evaluateTokenUsage, + firstExisting, + findFileRecursive, + locateUsageFiles, + parseArgs, + main, +}; diff --git a/scripts/ci/check-token-usage.test.ts b/scripts/ci/check-token-usage.test.ts new file mode 100644 index 00000000..9359df07 --- /dev/null +++ b/scripts/ci/check-token-usage.test.ts @@ -0,0 +1,203 @@ +import * as path from 'path'; + +// The checker is intentionally zero-dependency CommonJS so the CI job can run it +// with bare `node`; require it directly here for unit testing. +// eslint-disable-next-line @typescript-eslint/no-var-requires +const checker = require('./check-token-usage.js'); + +const { + parseJsonl, + sumTokenUsage, + aiCreditsMatch, + evaluateTokenUsage, + locateUsageFiles, + parseArgs, +} = checker; + +/** Build a per-response token-usage record with sensible defaults. */ +function record(overrides: Record = {}) { + return { + event: 'token_usage', + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + ai_credits_total: 0, + ...overrides, + }; +} + +describe('parseJsonl', () => { + it('parses well-formed lines and skips blanks / malformed lines', () => { + const text = '{"a":1}\n\n \nnot json\n{"b":2}\n'; + expect(parseJsonl(text)).toEqual([{ a: 1 }, { b: 2 }]); + }); + + it('returns an empty array for empty input', () => { + expect(parseJsonl('')).toEqual([]); + }); +}); + +describe('sumTokenUsage', () => { + it('sums token fields and captures first input / last credits', () => { + const totals = sumTokenUsage([ + record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 0, ai_credits_total: 1.5 }), + record({ input_tokens: 200, output_tokens: 20, cache_read_tokens: 150, ai_credits_total: 3.0 }), + ]); + expect(totals.input_tokens).toBe(300); + expect(totals.output_tokens).toBe(30); + expect(totals.cache_read_tokens).toBe(150); + expect(totals.count).toBe(2); + expect(totals.firstInputTokens).toBe(100); + expect(totals.lastAiCreditsTotal).toBe(3.0); + }); + + it('ignores non-usage records in a mixed stream', () => { + const totals = sumTokenUsage([ + record({ input_tokens: 100 }), + { event: 'something_else', input_tokens: 9999 }, + ]); + expect(totals.input_tokens).toBe(100); + expect(totals.count).toBe(1); + }); +}); + +describe('aiCreditsMatch', () => { + it('accepts values within rounding tolerance', () => { + expect(aiCreditsMatch(28.632, 28.632)).toBe(true); + expect(aiCreditsMatch(417.082, 417.085)).toBe(true); + }); + + it('rejects clearly different values', () => { + expect(aiCreditsMatch(28.632, 30.0)).toBe(false); + }); +}); + +describe('evaluateTokenUsage — internal consistency', () => { + it('passes when per-response sums equal the aggregate and cache_read > 0', () => { + const records = [ + record({ input_tokens: 13663, output_tokens: 378, cache_read_tokens: 0, ai_credits_total: 1.2 }), + record({ input_tokens: 16601, output_tokens: 124, cache_read_tokens: 10752, ai_credits_total: 4.3 }), + ]; + const aggregate = { + input_tokens: 30264, + output_tokens: 502, + cache_read_tokens: 10752, + cache_write_tokens: 0, + ambient_context: 13663, + ai_credits: 4.3, + }; + const { failures, warnings } = evaluateTokenUsage({ records, aggregate }); + expect(failures).toEqual([]); + expect(warnings).toEqual([]); + }); + + it('fails when the aggregate disagrees with the per-response sum', () => { + const records = [record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50 })]; + const aggregate = { + input_tokens: 999, // wrong + output_tokens: 10, + cache_read_tokens: 50, + cache_write_tokens: 0, + }; + const { failures } = evaluateTokenUsage({ records, aggregate, minRequests: 1 }); + expect(failures.some((f: string) => f.includes('Inconsistent input_tokens'))).toBe(true); + }); + + it('fails when the aggregate is missing entirely', () => { + const records = [record({ input_tokens: 100, cache_read_tokens: 50 })]; + const { failures } = evaluateTokenUsage({ records, aggregate: null, minRequests: 1 }); + expect(failures.some((f: string) => f.includes('Aggregated agent_usage'))).toBe(true); + }); + + it('warns (does not fail) on ai_credits / ambient_context drift', () => { + const records = [ + record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50, ai_credits_total: 2.0 }), + record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 50, ai_credits_total: 5.0 }), + ]; + const aggregate = { + input_tokens: 200, + output_tokens: 20, + cache_read_tokens: 100, + cache_write_tokens: 0, + ambient_context: 999, // mismatch -> warning + ai_credits: 42.0, // mismatch -> warning + }; + const { failures, warnings } = evaluateTokenUsage({ records, aggregate }); + expect(failures).toEqual([]); + expect(warnings.some((w: string) => w.includes('ai_credits drift'))).toBe(true); + expect(warnings.some((w: string) => w.includes('ambient_context'))).toBe(true); + }); +}); + +describe('evaluateTokenUsage — cache-read red flag', () => { + it('hard-fails when cache_read is 0 across multiple responses (the bug)', () => { + // Mirrors gh-aw codex run 27784259295/27784201719: consistent totals, zero cache reads. + const records = [ + record({ input_tokens: 13663, output_tokens: 378 }), + record({ input_tokens: 26000, output_tokens: 200 }), + ]; + const aggregate = { + input_tokens: 39663, + output_tokens: 578, + cache_read_tokens: 0, + cache_write_tokens: 0, + }; + const { failures } = evaluateTokenUsage({ records, aggregate }); + expect(failures.some((f: string) => f.includes('cache_read_tokens is 0'))).toBe(true); + }); + + it('only warns about cache_read==0 when below the min-requests threshold', () => { + const records = [record({ input_tokens: 100, output_tokens: 10, cache_read_tokens: 0 })]; + const aggregate = { + input_tokens: 100, + output_tokens: 10, + cache_read_tokens: 0, + cache_write_tokens: 0, + }; + const { failures, warnings } = evaluateTokenUsage({ records, aggregate, minRequests: 2 }); + expect(failures).toEqual([]); + expect(warnings.some((w: string) => w.includes('too short to assert'))).toBe(true); + }); + + it('fails when there are no usage records at all', () => { + const { failures } = evaluateTokenUsage({ records: [], aggregate: null }); + expect(failures.some((f: string) => f.includes('No token-usage records'))).toBe(true); + }); +}); + +describe('locateUsageFiles', () => { + it('honors explicit overrides without touching the filesystem', () => { + const { tokenUsage, agentUsage } = locateUsageFiles('/nonexistent', { + tokenUsage: '/x/token-usage.jsonl', + agentUsage: '/x/agent_usage.json', + }); + expect(tokenUsage).toBe('/x/token-usage.jsonl'); + expect(agentUsage).toBe('/x/agent_usage.json'); + }); + + it('resolves the canonical api-proxy path inside a real fixture tree', () => { + // The codex artifact downloaded during development is not present in CI, so + // this only asserts the candidate-path logic via overrides above; here we + // simply confirm a missing tree yields nulls rather than throwing. + const { tokenUsage, agentUsage } = locateUsageFiles(path.join('/tmp', 'definitely-missing-xyz')); + expect(tokenUsage).toBeNull(); + expect(agentUsage).toBeNull(); + }); +}); + +describe('parseArgs', () => { + it('parses flags with sensible defaults', () => { + const opts = parseArgs(['--artifact-root', '/tmp/x', '--engine', 'copilot', '--min-requests', '5']); + expect(opts.artifactRoot).toBe('/tmp/x'); + expect(opts.engine).toBe('copilot'); + expect(opts.minRequests).toBe(5); + }); + + it('defaults min-requests to 2 and engine to unknown', () => { + const opts = parseArgs([]); + expect(opts.minRequests).toBe(2); + expect(opts.engine).toBe('unknown'); + expect(opts.artifactRoot).toBe('/tmp/gh-aw'); + }); +}); From e6e291c9c3110d5e52ca0814549dc87a5f924255 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Jun 2026 21:56:46 +0000 Subject: [PATCH 2/2] fix(ci): address check-token-usage.js review feedback - Parse agent_usage with JSON.parse() first, fallback to JSONL - Recursive fallback also searches for agent_usage.jsonl - Restore smoke-claude.lock.yml to v0.79.8 versions and add only the verify_token_usage job - Add 3 unit tests (20 total) --- .github/workflows/smoke-claude.lock.yml | 41 +++++++++-------- scripts/ci/check-token-usage.js | 15 +++++-- scripts/ci/check-token-usage.test.ts | 59 +++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 25 deletions(-) diff --git a/.github/workflows/smoke-claude.lock.yml b/.github/workflows/smoke-claude.lock.yml index 3a49b308..84c3b258 100644 --- a/.github/workflows/smoke-claude.lock.yml +++ b/.github/workflows/smoke-claude.lock.yml @@ -1,5 +1,7 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"1931d05a82aa65b2b1d5af50c9dcde1453044c61ac1c0718031eb2eca5c6b046","body_hash":"6e05820005e43b82d8112bc60ced8e13336596ae671ecac69e6c5ac691485b71","compiler_version":"v0.79.6","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}} -# gh-aw-manifest: {"version":1,"secrets":["ANTHROPIC_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"}]} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"1931d05a82aa65b2b1d5af50c9dcde1453044c61ac1c0718031eb2eca5c6b046","body_hash":"6e05820005e43b82d8112bc60ced8e13336596ae671ecac69e6c5ac691485b71","compiler_version":"v0.79.8","agent_id":"claude","agent_model":"claude-haiku-4-5","engine_versions":{"claude":"2.1.168"}} +# gh-aw-manifest: {"version":1,"secrets":["ANTHROPIC_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"c0338fef4749d08c21f8f975fb0e37efa17dda47","version":"v0.79.8"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.1","digest":"sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.1@sha256:287fad0236959f3b3d9936ea1ef8d5b4f135ef2a5f5789713495cbbef191e60c"}]} +# This file was automatically generated by gh-aw (v0.79.8). DO NOT EDIT. To debug this workflow, load the skill at https://github.com/github/gh-aw/blob/main/debug.md +# # ___ _ _ # / _ \ | | (_) # | |_| | __ _ ___ _ __ | |_ _ ___ @@ -14,7 +16,6 @@ # \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ # \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ # -# This file was automatically generated by gh-aw (v0.79.6). DO NOT EDIT. # # To update this file, edit the corresponding .md file and run: # gh aw compile @@ -36,7 +37,7 @@ # - actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 # - actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 # - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 -# - github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 +# - github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 # # Container images used: # - ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6 @@ -89,9 +90,9 @@ jobs: comment_id: ${{ steps.add-comment.outputs.comment-id }} comment_repo: ${{ steps.add-comment.outputs.comment-repo }} comment_url: ${{ steps.add-comment.outputs.comment-url }} - daily_effective_workflow_exceeded: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_exceeded == 'true' }} - daily_effective_workflow_threshold: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_threshold || '' }} - daily_effective_workflow_total_effective_tokens: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_total_effective_tokens || '' }} + daily_ai_credits_exceeded: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_ai_credits_exceeded == 'true' }} + daily_ai_credits_threshold: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_ai_credits_threshold || '' }} + daily_ai_credits_total_effective_tokens: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_ai_credits_total_effective_tokens || '' }} engine_id: ${{ steps.generate_aw_info.outputs.engine_id }} label_command: ${{ steps.get_trigger_label.outputs.label_name }} lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }} @@ -104,7 +105,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 + uses: github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -123,7 +124,7 @@ jobs: GH_AW_INFO_MODEL: "claude-haiku-4-5" GH_AW_INFO_VERSION: "2.1.168" GH_AW_INFO_AGENT_VERSION: "2.1.168" - GH_AW_INFO_CLI_VERSION: "v0.79.6" + GH_AW_INFO_CLI_VERSION: "v0.79.8" GH_AW_INFO_WORKFLOW_NAME: "Smoke Claude" GH_AW_INFO_EXPERIMENTAL: "false" GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" @@ -204,7 +205,7 @@ jobs: - name: Check compile-agentic version uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 env: - GH_AW_COMPILED_VERSION: "v0.79.6" + GH_AW_COMPILED_VERSION: "v0.79.8" with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -296,7 +297,6 @@ jobs: include-hidden-files: true path: | /tmp/gh-aw/aw_info.json - /tmp/gh-aw/model_multipliers.json /tmp/gh-aw/models.json /tmp/gh-aw/aw-prompts/prompt.txt /tmp/gh-aw/aw-prompts/prompt-template.txt @@ -310,7 +310,7 @@ jobs: agent: needs: activation - if: needs.activation.outputs.daily_effective_workflow_exceeded != 'true' + if: needs.activation.outputs.daily_ai_credits_exceeded != 'true' runs-on: ubuntu-latest permissions: contents: read @@ -344,7 +344,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 + uses: github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -790,7 +790,6 @@ jobs: (umask 177 && touch /tmp/gh-aw/agent-stdio.log) GH_AW_MAX_AI_CREDITS="${{ vars.GH_AW_DEFAULT_MAX_AI_CREDITS || '1000' }}" printf '%s\n' "{\"\$schema\":\"https://github.com/github/gh-aw-firewall/releases/download/v0.27.2/awf-config.schema.json\",\"network\":{\"allowDomains\":[\"*.githubusercontent.com\",\"anthropic.com\",\"api.anthropic.com\",\"api.github.com\",\"api.snapcraft.io\",\"archive.ubuntu.com\",\"azure.archive.ubuntu.com\",\"cdn.playwright.dev\",\"codeload.github.com\",\"crl.geotrust.com\",\"crl.globalsign.com\",\"crl.identrust.com\",\"crl.sectigo.com\",\"crl.thawte.com\",\"crl.usertrust.com\",\"crl.verisign.com\",\"crl3.digicert.com\",\"crl4.digicert.com\",\"crls.ssl.com\",\"files.pythonhosted.org\",\"ghcr.io\",\"github-cloud.githubusercontent.com\",\"github-cloud.s3.amazonaws.com\",\"github.com\",\"host.docker.internal\",\"json-schema.org\",\"json.schemastore.org\",\"keyserver.ubuntu.com\",\"lfs.github.com\",\"objects.githubusercontent.com\",\"ocsp.digicert.com\",\"ocsp.geotrust.com\",\"ocsp.globalsign.com\",\"ocsp.identrust.com\",\"ocsp.sectigo.com\",\"ocsp.ssl.com\",\"ocsp.thawte.com\",\"ocsp.usertrust.com\",\"ocsp.verisign.com\",\"packagecloud.io\",\"packages.cloud.google.com\",\"packages.microsoft.com\",\"playwright.download.prss.microsoft.com\",\"ppa.launchpad.net\",\"pypi.org\",\"raw.githubusercontent.com\",\"registry.npmjs.org\",\"s.symcb.com\",\"s.symcd.com\",\"security.ubuntu.com\",\"sentry.io\",\"statsig.anthropic.com\",\"ts-crl.ws.symantec.com\",\"ts-ocsp.ws.symantec.com\",\"www.googleapis.com\"]},\"apiProxy\":{\"enabled\":true,\"enableTokenSteering\":true,\"maxRuns\":2,\"maxAiCredits\":${GH_AW_MAX_AI_CREDITS},\"models\":{\"agent\":[\"sonnet-6x\",\"gpt-5.4\",\"gpt-5.3\",\"gemini-pro\",\"any\"],\"antigravity\":[\"copilot/antigravity*\",\"google/antigravity*\",\"gemini/antigravity*\"],\"any\":[\"copilot/*\",\"anthropic/*\",\"openai/*\",\"google/*\",\"gemini/*\"],\"claude\":[\"agent\"],\"codex\":[\"agent\"],\"coding\":[\"copilot/gpt-5*codex*\",\"openai/gpt-5*codex*\",\"gpt-5-codex\"],\"computer-use\":[\"copilot/*computer-use*\",\"google/*computer-use*\",\"gemini/*computer-use*\",\"openai/*computer-use*\"],\"copilot\":[\"agent\"],\"deep-research\":[\"copilot/deep-research*\",\"copilot/o3-deep-research*\",\"copilot/o4-mini-deep-research*\",\"google/deep-research*\",\"gemini/deep-research*\",\"openai/o3-deep-research*\",\"openai/o4-mini-deep-research*\"],\"gemini\":[\"agent\"],\"gemini-3-flash\":[\"copilot/gemini-3*flash*\",\"google/gemini-3*flash*\",\"gemini/gemini-3*flash*\"],\"gemini-3-pro\":[\"copilot/gemini-3*pro*\",\"google/gemini-3*pro*\",\"google/nano-banana*\",\"gemini/gemini-3*pro*\"],\"gemini-3.1-flash\":[\"copilot/gemini-3.1*flash*\",\"google/gemini-3.1*flash*\",\"gemini/gemini-3.1*flash*\"],\"gemini-3.1-pro\":[\"copilot/gemini-3.1*pro*\",\"google/gemini-3.1*pro*\",\"gemini/gemini-3.1*pro*\"],\"gemini-3.5-flash\":[\"copilot/gemini-3.5*flash*\",\"google/gemini-3.5*flash*\",\"gemini/gemini-3.5*flash*\"],\"gemini-flash\":[\"copilot/gemini-*flash*\",\"google/gemini-*flash*\",\"gemini/gemini-*flash*\"],\"gemini-flash-lite\":[\"copilot/gemini-*flash*lite*\",\"google/gemini-*flash*lite*\",\"gemini/gemini-*flash*lite*\"],\"gemini-pro\":[\"copilot/gemini-*pro*\",\"google/gemini-*pro*\",\"gemini/gemini-*pro*\"],\"gemma\":[\"copilot/gemma*\",\"google/gemma*\",\"gemini/gemma*\"],\"gpt-5\":[\"copilot/gpt-5*\",\"openai/gpt-5*\"],\"gpt-5-codex\":[\"copilot/gpt-5*codex*\",\"openai/gpt-5*codex*\"],\"gpt-5-mini\":[\"copilot/gpt-5*mini*\",\"openai/gpt-5*mini*\"],\"gpt-5-nano\":[\"copilot/gpt-5*nano*\",\"openai/gpt-5*nano*\"],\"gpt-5-pro\":[\"copilot/gpt-5*pro*\",\"openai/gpt-5*pro*\"],\"gpt-5.2\":[\"copilot/gpt-5.2*\",\"openai/gpt-5.2*\"],\"gpt-5.3\":[\"copilot/gpt-5.3*\",\"openai/gpt-5.3*\"],\"gpt-5.4\":[\"copilot/gpt-5.4*\",\"openai/gpt-5.4*\"],\"gpt-5.5\":[\"copilot/gpt-5.5*\",\"openai/gpt-5.5*\"],\"haiku\":[\"copilot/*haiku*\",\"anthropic/*haiku*\"],\"large\":[\"sonnet\",\"gpt-5-pro\",\"gpt-5\",\"gemini-pro\"],\"mai-code\":[\"copilot/MAI-Code*\",\"copilot/mai-code*\",\"openai/MAI-Code*\"],\"mini\":[\"haiku\",\"gpt-5-mini\",\"gpt-5-nano\",\"gemini-flash-lite\"],\"nano-banana\":[\"copilot/nano-banana*\",\"google/nano-banana*\",\"gemini/nano-banana*\"],\"opus\":[\"copilot/*opus*\",\"anthropic/*opus*\"],\"opusplan\":[\"opus?effort=high\"],\"reasoning\":[\"copilot/o1*\",\"copilot/o3*\",\"copilot/o4*\",\"openai/o1*\",\"openai/o3*\",\"openai/o4*\"],\"robotics\":[\"copilot/*robotics*\",\"google/*robotics*\",\"gemini/*robotics*\"],\"small\":[\"mini\"],\"small-agent\":[\"haiku\",\"gpt-5-mini\",\"gemini-flash\"],\"sonnet\":[\"copilot/*sonnet*\",\"anthropic/*sonnet*\"],\"sonnet-6x\":[\"copilot/*sonnet-4.5*\",\"copilot/*sonnet-4.6*\",\"copilot/*sonnet-4-5-*\",\"anthropic/*sonnet-4-5-*\",\"copilot/*sonnet-4-6*\",\"anthropic/*sonnet-4-6*\"],\"summarization\":[\"haiku\",\"gpt-5-mini\",\"gemini-flash-lite\",\"mini\"],\"vision\":[\"copilot/gemini-*image*\",\"gemini/gemini-*image*\",\"copilot/gemini-*flash*\",\"gemini/gemini-*flash*\"]}},\"container\":{\"imageTag\":\"0.27.2,squid=sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591,agent=sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6,api-proxy=sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4,cli-proxy=sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0\"}}" > "${RUNNER_TEMP}/gh-aw/awf-config.json" - GH_AW_MODEL_MULTIPLIERS_PATH="/tmp/gh-aw/model_multipliers.json" node "${RUNNER_TEMP}/gh-aw/actions/merge_awf_model_multipliers.cjs" cp "${RUNNER_TEMP}/gh-aw/awf-config.json" /tmp/gh-aw/awf-config.json export GH_AW_MODELS_JSON_PATH="/tmp/gh-aw/models.json" GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="" @@ -823,7 +822,7 @@ jobs: GH_AW_PHASE: agent GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - GH_AW_VERSION: v0.79.6 + GH_AW_VERSION: v0.79.8 GITHUB_AW: true GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md GITHUB_WORKSPACE: ${{ github.workspace }} @@ -999,7 +998,7 @@ jobs: - verify_token_usage if: > always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' || - needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_effective_workflow_exceeded == 'true') + needs.activation.outputs.stale_lock_file_failed == 'true' || needs.activation.outputs.daily_ai_credits_exceeded == 'true') runs-on: ubuntu-slim permissions: contents: read @@ -1018,7 +1017,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 + uses: github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -1156,9 +1155,9 @@ jobs: GH_AW_ENGINE_API_HOSTS: "api.anthropic.com" GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} GH_AW_STALE_LOCK_FILE_FAILED: ${{ needs.activation.outputs.stale_lock_file_failed }} - GH_AW_DAILY_EFFECTIVE_WORKFLOW_EXCEEDED: ${{ needs.activation.outputs.daily_effective_workflow_exceeded }} - GH_AW_DAILY_EFFECTIVE_WORKFLOW_TOTAL_EFFECTIVE_TOKENS: ${{ needs.activation.outputs.daily_effective_workflow_total_effective_tokens }} - GH_AW_DAILY_EFFECTIVE_WORKFLOW_THRESHOLD: ${{ needs.activation.outputs.daily_effective_workflow_threshold }} + GH_AW_DAILY_AI_CREDITS_EXCEEDED: ${{ needs.activation.outputs.daily_ai_credits_exceeded }} + GH_AW_DAILY_AI_CREDITS_TOTAL_EFFECTIVE_TOKENS: ${{ needs.activation.outputs.daily_ai_credits_total_effective_tokens }} + GH_AW_DAILY_AI_CREDITS_THRESHOLD: ${{ needs.activation.outputs.daily_ai_credits_threshold }} GH_AW_SAFE_OUTPUT_MESSAGES: "{\"runSuccess\":\"✅ [{workflow_name}]({run_url}) passed\",\"runFailure\":\"❌ [{workflow_name}]({run_url}) {status}\"}" GH_AW_GROUP_REPORTS: "false" GH_AW_FAILURE_REPORT_AS_ISSUE: "true" @@ -1228,7 +1227,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@5c2fe865bb4dc46e1450f6ee0d0541d759aea73a # v0.79.6 + uses: github/gh-aw-actions/setup@c0338fef4749d08c21f8f975fb0e37efa17dda47 # v0.79.8 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} diff --git a/scripts/ci/check-token-usage.js b/scripts/ci/check-token-usage.js index d9175e04..e11c17e2 100644 --- a/scripts/ci/check-token-usage.js +++ b/scripts/ci/check-token-usage.js @@ -231,7 +231,8 @@ function locateUsageFiles(root, overrides = {}) { path.join(root, 'usage/agent_usage.json'), path.join(root, 'usage/agent_usage.jsonl'), ]) || - findFileRecursive(root, 'agent_usage.json'); + findFileRecursive(root, 'agent_usage.json') || + findFileRecursive(root, 'agent_usage.jsonl'); return { tokenUsage, agentUsage }; } @@ -288,9 +289,15 @@ function main(argv) { let aggregate = null; if (agentUsage) { const text = fs.readFileSync(agentUsage, 'utf8').trim(); - // agent_usage may be a single JSON object or a one-line JSONL file. - const parsed = parseJsonl(text); - aggregate = parsed.length > 0 ? parsed[parsed.length - 1] : null; + // agent_usage may be a pretty-printed JSON object, a single-line JSON + // object, or a JSONL file. Try JSON.parse() first so that multi-line + // pretty-printed files are handled correctly, then fall back to JSONL. + try { + aggregate = JSON.parse(text); + } catch { + const parsed = parseJsonl(text); + aggregate = parsed.length > 0 ? parsed[parsed.length - 1] : null; + } } const { failures, warnings, summary } = evaluateTokenUsage({ diff --git a/scripts/ci/check-token-usage.test.ts b/scripts/ci/check-token-usage.test.ts index 9359df07..e0077195 100644 --- a/scripts/ci/check-token-usage.test.ts +++ b/scripts/ci/check-token-usage.test.ts @@ -1,4 +1,6 @@ import * as path from 'path'; +import * as fs from 'fs'; +import * as os from 'os'; // The checker is intentionally zero-dependency CommonJS so the CI job can run it // with bare `node`; require it directly here for unit testing. @@ -10,8 +12,10 @@ const { sumTokenUsage, aiCreditsMatch, evaluateTokenUsage, + findFileRecursive, locateUsageFiles, parseArgs, + main, } = checker; /** Build a per-response token-usage record with sensible defaults. */ @@ -201,3 +205,58 @@ describe('parseArgs', () => { expect(opts.artifactRoot).toBe('/tmp/gh-aw'); }); }); + +describe('findFileRecursive', () => { + it('finds agent_usage.jsonl nested under a subdirectory', () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'ck-test-')); + try { + const sub = path.join(root, 'deep', 'subdir'); + fs.mkdirSync(sub, { recursive: true }); + const target = path.join(sub, 'agent_usage.jsonl'); + fs.writeFileSync(target, '{"input_tokens":1}\n'); + expect(findFileRecursive(root, 'agent_usage.jsonl')).toBe(target); + } finally { + fs.rmSync(root, { recursive: true, force: true }); + } + }); + + it('returns null when the file is absent', () => { + expect(findFileRecursive('/nonexistent-xyz', 'agent_usage.jsonl')).toBeNull(); + }); +}); + +describe('main — pretty-printed agent_usage.json', () => { + it('parses a multi-line pretty-printed JSON aggregate without error', () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'ck-test-')); + try { + const logsDir = path.join(root, 'sandbox', 'firewall', 'audit', 'api-proxy-logs'); + fs.mkdirSync(logsDir, { recursive: true }); + + // Write one matching token-usage record. + fs.writeFileSync( + path.join(logsDir, 'token-usage.jsonl'), + JSON.stringify({ + event: 'token_usage', + input_tokens: 100, + output_tokens: 10, + cache_read_tokens: 50, + cache_write_tokens: 5, + }) + '\n', + ); + + // Write the aggregate as pretty-printed JSON (multi-line). + const aggregate = { + input_tokens: 100, + output_tokens: 10, + cache_read_tokens: 50, + cache_write_tokens: 5, + }; + fs.writeFileSync(path.join(root, 'agent_usage.json'), JSON.stringify(aggregate, null, 2)); + + const exitCode = main(['--artifact-root', root, '--engine', 'test', '--min-requests', '1']); + expect(exitCode).toBe(0); + } finally { + fs.rmSync(root, { recursive: true, force: true }); + } + }); +});