Cap /reflect effective-token totals at configured maxEffectiveTokens (#3208)

Copilot · web-flow · commit ff0e5ef8fcef · 2026-05-15T07:49:53.000-07:00
* Initial plan

* fix: cap reflected effective token usage at configured max

* test: cover effective token reflect cap boundaries

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
diff --git a/containers/api-proxy/guards/effective-token-guard.js b/containers/api-proxy/guards/effective-token-guard.js
@@ -152,12 +152,13 @@ function getEffectiveTokenReflectState() {
       thresholds_crossed: [],
     };
   }
+  const reflectedTotal = Math.min(state.totalEffectiveTokens, config.max);
   return {
     enabled: true,
     max_effective_tokens: config.max,
-    total_effective_tokens: state.totalEffectiveTokens,
-    remaining_effective_tokens: Math.max(0, config.max - state.totalEffectiveTokens),
-    percent_used: Math.round((state.totalEffectiveTokens / config.max) * 10000) / 100,
+    total_effective_tokens: reflectedTotal,
+    remaining_effective_tokens: Math.max(0, config.max - reflectedTotal),
+    percent_used: Math.round((reflectedTotal / config.max) * 10000) / 100,
     thresholds_crossed: [...state.emittedThresholds].sort((a, b) => a - b),
   };
 }
diff --git a/containers/api-proxy/guards/effective-token-guard.test.js b/containers/api-proxy/guards/effective-token-guard.test.js
@@ -0,0 +1,62 @@
+const {
+  applyEffectiveTokenUsage,
+  getEffectiveTokenBlockState,
+  getEffectiveTokenReflectState,
+  resetEffectiveTokenGuardForTests,
+} = require('./effective-token-guard');
+
+describe('effective-token-guard reflect state', () => {
+  beforeEach(() => {
+    delete process.env.AWF_EFFECTIVE_TOKEN_MODEL_MULTIPLIERS;
+    resetEffectiveTokenGuardForTests();
+  });
+
+  afterEach(() => {
+    delete process.env.AWF_MAX_EFFECTIVE_TOKENS;
+    delete process.env.AWF_EFFECTIVE_TOKEN_MODEL_MULTIPLIERS;
+    resetEffectiveTokenGuardForTests();
+  });
+
+  it('caps reflected total at max after the running total exceeds the budget', () => {
+    process.env.AWF_MAX_EFFECTIVE_TOKENS = '100';
+
+    // output_tokens are weighted at 4x by default (30 * 4 = 120 effective tokens).
+    applyEffectiveTokenUsage({ output_tokens: 30 }, 'gpt-4o');
+
+    const blockState = getEffectiveTokenBlockState();
+    const reflectState = getEffectiveTokenReflectState();
+
+    expect(blockState.totalEffectiveTokens).toBe(120);
+    expect(blockState.maxExceeded).toBe(true);
+    expect(reflectState.total_effective_tokens).toBe(100);
+    expect(reflectState.remaining_effective_tokens).toBe(0);
+    expect(reflectState.percent_used).toBe(100);
+    expect(reflectState.max_effective_tokens).toBe(100);
+  });
+
+  it('does not cap reflected usage while total remains below max', () => {
+    process.env.AWF_MAX_EFFECTIVE_TOKENS = '100';
+
+    applyEffectiveTokenUsage({ output_tokens: 20 }, 'gpt-4o');
+
+    expect(getEffectiveTokenReflectState()).toMatchObject({
+      max_effective_tokens: 100,
+      total_effective_tokens: 80,
+      remaining_effective_tokens: 20,
+      percent_used: 80,
+    });
+  });
+
+  it('reports 100% usage when total lands exactly on max', () => {
+    process.env.AWF_MAX_EFFECTIVE_TOKENS = '100';
+
+    applyEffectiveTokenUsage({ output_tokens: 25 }, 'gpt-4o');
+
+    expect(getEffectiveTokenReflectState()).toMatchObject({
+      max_effective_tokens: 100,
+      total_effective_tokens: 100,
+      remaining_effective_tokens: 0,
+      percent_used: 100,
+    });
+  });
+});