Skip to content

Commit fa7e87a

Browse files
authored
Add max-cache-misses guardrail for API proxy token budget enforcement (#5202)
* Initial plan * feat: add max-cache-misses guardrail * Fix help text: use === instead of = for cache_read_tokens comparison --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent 3d11860 commit fa7e87a

27 files changed

Lines changed: 476 additions & 2 deletions

containers/api-proxy/guards/common-guard-checks.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
* @param {Function} deps.buildEffectiveTokenLimitError
2424
* @param {Function} deps.getMaxRunsBlockState
2525
* @param {Function} deps.buildMaxRunsExceededError
26+
* @param {Function} deps.getMaxCacheMissesBlockState
27+
* @param {Function} deps.buildMaxCacheMissesExceededError
2628
* @param {Function} deps.getPermissionDeniedBlockState
2729
* @param {Function} deps.buildPermissionDeniedLimitError
2830
* @param {Function} deps.getAiCreditsBlockState
@@ -44,6 +46,8 @@ function buildCommonGuardChecks(deps, model) {
4446
buildEffectiveTokenLimitError,
4547
getMaxRunsBlockState,
4648
buildMaxRunsExceededError,
49+
getMaxCacheMissesBlockState,
50+
buildMaxCacheMissesExceededError,
4751
getPermissionDeniedBlockState,
4852
buildPermissionDeniedLimitError,
4953
getAiCreditsBlockState,
@@ -80,6 +84,17 @@ function buildCommonGuardChecks(deps, model) {
8084
max_runs: block.maxRuns,
8185
}),
8286
},
87+
{
88+
block: getMaxCacheMissesBlockState(),
89+
isBlocked: block => block && block.maxExceeded,
90+
statusCode: 429,
91+
eventName: 'max_cache_misses_exceeded',
92+
buildError: buildMaxCacheMissesExceededError,
93+
buildLogFields: block => ({
94+
consecutive_cache_misses: block.consecutiveCacheMisses,
95+
max_cache_misses: block.maxCacheMisses,
96+
}),
97+
},
8398
{
8499
block: getPermissionDeniedBlockState(),
85100
isBlocked: block => block && block.maxExceeded,
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
'use strict';
2+
3+
const { parsePositiveInteger } = require('./guard-utils');
4+
5+
function createMaxCacheMissesState(configKey = null) {
6+
return {
7+
configKey,
8+
consecutiveCacheMisses: 0,
9+
};
10+
}
11+
12+
let guardState = createMaxCacheMissesState();
13+
const configCache = { rawMax: undefined, parsed: null };
14+
15+
function getMaxCacheMissesConfig() {
16+
const rawMax = process.env.AWF_MAX_CACHE_MISSES;
17+
if (configCache.rawMax === rawMax) return configCache.parsed;
18+
configCache.rawMax = rawMax;
19+
configCache.parsed = parsePositiveInteger(rawMax);
20+
return configCache.parsed;
21+
}
22+
23+
function getMaxCacheMissesState(maxCacheMisses) {
24+
if (!maxCacheMisses) return null;
25+
const configKey = String(maxCacheMisses);
26+
if (guardState.configKey !== configKey) {
27+
guardState = createMaxCacheMissesState(configKey);
28+
}
29+
return guardState;
30+
}
31+
32+
function applyMaxCacheMissesUsage(normalizedUsage) {
33+
const maxCacheMisses = getMaxCacheMissesConfig();
34+
const state = getMaxCacheMissesState(maxCacheMisses);
35+
if (!state || !normalizedUsage) return;
36+
37+
const inputTokens = normalizedUsage.input_tokens || 0;
38+
const cacheReadTokens = normalizedUsage.cache_read_tokens || 0;
39+
40+
// Only runs with non-zero input tokens are considered for cache-miss streaks.
41+
if (inputTokens <= 0) return;
42+
43+
if (cacheReadTokens > 0) {
44+
state.consecutiveCacheMisses = 0;
45+
return;
46+
}
47+
48+
state.consecutiveCacheMisses += 1;
49+
}
50+
51+
function getMaxCacheMissesBlockState() {
52+
const maxCacheMisses = getMaxCacheMissesConfig();
53+
const state = getMaxCacheMissesState(maxCacheMisses);
54+
if (!state) return null;
55+
return {
56+
maxCacheMisses,
57+
consecutiveCacheMisses: state.consecutiveCacheMisses,
58+
maxExceeded: state.consecutiveCacheMisses >= maxCacheMisses,
59+
};
60+
}
61+
62+
function getMaxCacheMissesReflectState() {
63+
const maxCacheMisses = getMaxCacheMissesConfig();
64+
const state = getMaxCacheMissesState(maxCacheMisses);
65+
if (!state) {
66+
return {
67+
enabled: false,
68+
max_cache_misses: null,
69+
consecutive_cache_misses: 0,
70+
remaining_cache_misses: null,
71+
};
72+
}
73+
return {
74+
enabled: true,
75+
max_cache_misses: maxCacheMisses,
76+
consecutive_cache_misses: state.consecutiveCacheMisses,
77+
remaining_cache_misses: Math.max(0, maxCacheMisses - state.consecutiveCacheMisses),
78+
};
79+
}
80+
81+
function resetMaxCacheMissesGuardForTests() {
82+
guardState = createMaxCacheMissesState();
83+
configCache.rawMax = undefined;
84+
configCache.parsed = null;
85+
}
86+
87+
function buildMaxCacheMissesExceededError(state) {
88+
return {
89+
error: {
90+
type: 'max_cache_misses_exceeded',
91+
message: `Maximum consecutive cache misses exceeded (${state.consecutiveCacheMisses} / ${state.maxCacheMisses}).`,
92+
consecutive_cache_misses: state.consecutiveCacheMisses,
93+
max_cache_misses: state.maxCacheMisses,
94+
},
95+
};
96+
}
97+
98+
module.exports = {
99+
applyMaxCacheMissesUsage,
100+
getMaxCacheMissesBlockState,
101+
getMaxCacheMissesReflectState,
102+
resetMaxCacheMissesGuardForTests,
103+
buildMaxCacheMissesExceededError,
104+
};
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
'use strict';
2+
3+
const {
4+
applyMaxCacheMissesUsage,
5+
getMaxCacheMissesBlockState,
6+
getMaxCacheMissesReflectState,
7+
resetMaxCacheMissesGuardForTests,
8+
buildMaxCacheMissesExceededError,
9+
} = require('./max-cache-misses-guard');
10+
11+
describe('max-cache-misses-guard', () => {
12+
beforeEach(() => {
13+
delete process.env.AWF_MAX_CACHE_MISSES;
14+
resetMaxCacheMissesGuardForTests();
15+
});
16+
17+
afterEach(() => {
18+
delete process.env.AWF_MAX_CACHE_MISSES;
19+
resetMaxCacheMissesGuardForTests();
20+
});
21+
22+
it('is disabled when AWF_MAX_CACHE_MISSES is not configured', () => {
23+
applyMaxCacheMissesUsage({ input_tokens: 100, cache_read_tokens: 0 });
24+
expect(getMaxCacheMissesBlockState()).toBeNull();
25+
expect(getMaxCacheMissesReflectState()).toEqual({
26+
enabled: false,
27+
max_cache_misses: null,
28+
consecutive_cache_misses: 0,
29+
remaining_cache_misses: null,
30+
});
31+
});
32+
33+
it('tracks consecutive cache misses only for non-zero input runs', () => {
34+
process.env.AWF_MAX_CACHE_MISSES = '3';
35+
resetMaxCacheMissesGuardForTests();
36+
37+
applyMaxCacheMissesUsage({ input_tokens: 100, cache_read_tokens: 0 });
38+
applyMaxCacheMissesUsage({ input_tokens: 0, cache_read_tokens: 0 });
39+
applyMaxCacheMissesUsage({ input_tokens: 200, cache_read_tokens: 0 });
40+
41+
expect(getMaxCacheMissesBlockState()).toEqual({
42+
maxCacheMisses: 3,
43+
consecutiveCacheMisses: 2,
44+
maxExceeded: false,
45+
});
46+
});
47+
48+
it('resets streak when cache_read_tokens is non-zero', () => {
49+
process.env.AWF_MAX_CACHE_MISSES = '3';
50+
resetMaxCacheMissesGuardForTests();
51+
52+
applyMaxCacheMissesUsage({ input_tokens: 100, cache_read_tokens: 0 });
53+
applyMaxCacheMissesUsage({ input_tokens: 100, cache_read_tokens: 25 });
54+
55+
expect(getMaxCacheMissesBlockState()).toEqual({
56+
maxCacheMisses: 3,
57+
consecutiveCacheMisses: 0,
58+
maxExceeded: false,
59+
});
60+
});
61+
62+
it('blocks once streak reaches the configured max', () => {
63+
process.env.AWF_MAX_CACHE_MISSES = '2';
64+
resetMaxCacheMissesGuardForTests();
65+
66+
applyMaxCacheMissesUsage({ input_tokens: 50, cache_read_tokens: 0 });
67+
applyMaxCacheMissesUsage({ input_tokens: 60, cache_read_tokens: 0 });
68+
69+
expect(getMaxCacheMissesBlockState()).toEqual({
70+
maxCacheMisses: 2,
71+
consecutiveCacheMisses: 2,
72+
maxExceeded: true,
73+
});
74+
expect(getMaxCacheMissesReflectState()).toEqual({
75+
enabled: true,
76+
max_cache_misses: 2,
77+
consecutive_cache_misses: 2,
78+
remaining_cache_misses: 0,
79+
});
80+
});
81+
82+
it('builds structured guard error payload', () => {
83+
const error = buildMaxCacheMissesExceededError({
84+
maxCacheMisses: 3,
85+
consecutiveCacheMisses: 3,
86+
});
87+
expect(error).toEqual({
88+
error: {
89+
type: 'max_cache_misses_exceeded',
90+
message: expect.stringContaining('3 / 3'),
91+
consecutive_cache_misses: 3,
92+
max_cache_misses: 3,
93+
},
94+
});
95+
});
96+
});

containers/api-proxy/management.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const metrics = require('./metrics');
2929
* @property {() => Record<string, { enabled: boolean, strategy: string, suppressed: boolean, suppression_reason?: string }>} getEffectiveModelFallback - Returns provider-effective fallback summary
3030
* @property {() => object} getAiCreditsUsage - Returns AI credits usage summary
3131
* @property {() => object} getMaxRunsUsage - Returns max-runs usage summary
32+
* @property {() => object} getMaxCacheMissesUsage - Returns max-cache-misses usage summary
3233
* @property {() => object} getPermissionDeniedUsage - Returns permission-denied usage summary
3334
*/
3435

@@ -53,6 +54,7 @@ function createManagementHandlers(deps) {
5354
getEffectiveModelFallback,
5455
getAiCreditsUsage,
5556
getMaxRunsUsage,
57+
getMaxCacheMissesUsage,
5658
getPermissionDeniedUsage,
5759
} = deps;
5860

@@ -105,6 +107,7 @@ function createManagementHandlers(deps) {
105107
model_fallback_effective: getEffectiveModelFallback(),
106108
ai_credits: getAiCreditsUsage(),
107109
runs: getMaxRunsUsage(),
110+
cache_misses: getMaxCacheMissesUsage(),
108111
permission_denied: getPermissionDeniedUsage(),
109112
};
110113
}

containers/api-proxy/proxy-request.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ const {
3939
resetMaxRunsGuardForTests,
4040
buildMaxRunsExceededError,
4141
} = require('./guards/max-runs-guard');
42+
const {
43+
getMaxCacheMissesBlockState,
44+
getMaxCacheMissesReflectState,
45+
resetMaxCacheMissesGuardForTests,
46+
buildMaxCacheMissesExceededError,
47+
} = require('./guards/max-cache-misses-guard');
4248
const {
4349
applyPermissionDenied,
4450
getPermissionDeniedBlockState,
@@ -216,6 +222,8 @@ const proxyWebSocket = createProxyWebSocket({
216222
buildEffectiveTokenLimitError,
217223
getMaxRunsBlockState,
218224
buildMaxRunsExceededError,
225+
getMaxCacheMissesBlockState,
226+
buildMaxCacheMissesExceededError,
219227
getPermissionDeniedBlockState,
220228
buildPermissionDeniedLimitError,
221229
getAiCreditsBlockState,
@@ -408,6 +416,8 @@ function enforceGuards({ body, provider, req, res, requestId, startTime, span, i
408416
buildEffectiveTokenLimitError,
409417
getMaxRunsBlockState,
410418
buildMaxRunsExceededError,
419+
getMaxCacheMissesBlockState,
420+
buildMaxCacheMissesExceededError,
411421
getPermissionDeniedBlockState,
412422
buildPermissionDeniedLimitError,
413423
getAiCreditsBlockState,
@@ -537,10 +547,12 @@ module.exports = {
537547
getEffectiveTokenReflectState,
538548
getAiCreditsReflectState,
539549
getMaxRunsReflectState,
550+
getMaxCacheMissesReflectState,
540551
getPermissionDeniedReflectState,
541552
resetEffectiveTokenGuardForTests,
542553
resetAiCreditsGuardForTests,
543554
resetMaxRunsGuardForTests,
555+
resetMaxCacheMissesGuardForTests,
544556
resetPermissionDeniedGuardForTests,
545557
resetMaxModelMultiplierGuardForTests,
546558
resetTimeoutSteeringForTests,

containers/api-proxy/server.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ const {
5050
getEffectiveTokenReflectState,
5151
getAiCreditsReflectState,
5252
getMaxRunsReflectState,
53+
getMaxCacheMissesReflectState,
5354
getPermissionDeniedReflectState,
5455
} = require('./proxy-request');
5556

@@ -128,6 +129,7 @@ const { healthResponse, reflectEndpoints, handleManagementEndpoint } = createMan
128129
getEffectiveTokenUsage: () => getEffectiveTokenReflectState(),
129130
getAiCreditsUsage: () => getAiCreditsReflectState(),
130131
getMaxRunsUsage: () => getMaxRunsReflectState(),
132+
getMaxCacheMissesUsage: () => getMaxCacheMissesReflectState(),
131133
getPermissionDeniedUsage: () => getPermissionDeniedReflectState(),
132134
});
133135

containers/api-proxy/server.network.test.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,16 @@ describe('reflectEndpoints', () => {
415415
});
416416
});
417417

418+
it('should include cache_misses in reflect output', () => {
419+
const result = reflectEndpoints();
420+
expect(result.cache_misses).toEqual({
421+
enabled: false,
422+
max_cache_misses: null,
423+
consecutive_cache_misses: 0,
424+
remaining_cache_misses: null,
425+
});
426+
});
427+
418428
it('should expose Copilot fallback suppression in reflect output for BYOK non-githubcopilot targets', () => {
419429
const prevTarget = process.env.COPILOT_API_TARGET;
420430
const prevProviderType = process.env.COPILOT_PROVIDER_TYPE;

0 commit comments

Comments
 (0)