Skip to content

Commit ff0e5ef

Browse files
authored
Cap /reflect effective-token totals at configured maxEffectiveTokens (#3208)
* Initial plan * fix: cap reflected effective token usage at configured max * test: cover effective token reflect cap boundaries --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent 416e6f2 commit ff0e5ef

2 files changed

Lines changed: 66 additions & 3 deletions

File tree

containers/api-proxy/guards/effective-token-guard.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,13 @@ function getEffectiveTokenReflectState() {
152152
thresholds_crossed: [],
153153
};
154154
}
155+
const reflectedTotal = Math.min(state.totalEffectiveTokens, config.max);
155156
return {
156157
enabled: true,
157158
max_effective_tokens: config.max,
158-
total_effective_tokens: state.totalEffectiveTokens,
159-
remaining_effective_tokens: Math.max(0, config.max - state.totalEffectiveTokens),
160-
percent_used: Math.round((state.totalEffectiveTokens / config.max) * 10000) / 100,
159+
total_effective_tokens: reflectedTotal,
160+
remaining_effective_tokens: Math.max(0, config.max - reflectedTotal),
161+
percent_used: Math.round((reflectedTotal / config.max) * 10000) / 100,
161162
thresholds_crossed: [...state.emittedThresholds].sort((a, b) => a - b),
162163
};
163164
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
const {
2+
applyEffectiveTokenUsage,
3+
getEffectiveTokenBlockState,
4+
getEffectiveTokenReflectState,
5+
resetEffectiveTokenGuardForTests,
6+
} = require('./effective-token-guard');
7+
8+
describe('effective-token-guard reflect state', () => {
9+
beforeEach(() => {
10+
delete process.env.AWF_EFFECTIVE_TOKEN_MODEL_MULTIPLIERS;
11+
resetEffectiveTokenGuardForTests();
12+
});
13+
14+
afterEach(() => {
15+
delete process.env.AWF_MAX_EFFECTIVE_TOKENS;
16+
delete process.env.AWF_EFFECTIVE_TOKEN_MODEL_MULTIPLIERS;
17+
resetEffectiveTokenGuardForTests();
18+
});
19+
20+
it('caps reflected total at max after the running total exceeds the budget', () => {
21+
process.env.AWF_MAX_EFFECTIVE_TOKENS = '100';
22+
23+
// output_tokens are weighted at 4x by default (30 * 4 = 120 effective tokens).
24+
applyEffectiveTokenUsage({ output_tokens: 30 }, 'gpt-4o');
25+
26+
const blockState = getEffectiveTokenBlockState();
27+
const reflectState = getEffectiveTokenReflectState();
28+
29+
expect(blockState.totalEffectiveTokens).toBe(120);
30+
expect(blockState.maxExceeded).toBe(true);
31+
expect(reflectState.total_effective_tokens).toBe(100);
32+
expect(reflectState.remaining_effective_tokens).toBe(0);
33+
expect(reflectState.percent_used).toBe(100);
34+
expect(reflectState.max_effective_tokens).toBe(100);
35+
});
36+
37+
it('does not cap reflected usage while total remains below max', () => {
38+
process.env.AWF_MAX_EFFECTIVE_TOKENS = '100';
39+
40+
applyEffectiveTokenUsage({ output_tokens: 20 }, 'gpt-4o');
41+
42+
expect(getEffectiveTokenReflectState()).toMatchObject({
43+
max_effective_tokens: 100,
44+
total_effective_tokens: 80,
45+
remaining_effective_tokens: 20,
46+
percent_used: 80,
47+
});
48+
});
49+
50+
it('reports 100% usage when total lands exactly on max', () => {
51+
process.env.AWF_MAX_EFFECTIVE_TOKENS = '100';
52+
53+
applyEffectiveTokenUsage({ output_tokens: 25 }, 'gpt-4o');
54+
55+
expect(getEffectiveTokenReflectState()).toMatchObject({
56+
max_effective_tokens: 100,
57+
total_effective_tokens: 100,
58+
remaining_effective_tokens: 0,
59+
percent_used: 100,
60+
});
61+
});
62+
});

0 commit comments

Comments
 (0)