Skip to content

Commit cac6a06

Browse files
authored
Add option to remove cache breakpoints to custom llm (#3660)
I'll use this to test the implicit caching of Qwen3.7
1 parent bdac5bd commit cac6a06

7 files changed

Lines changed: 159 additions & 7 deletions

File tree

apps/web/src/lib/ai-gateway/experiments/build-direct-provider.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import {
22
addCacheBreakpoints,
33
injectReasoningIntoContent,
4+
removeCacheBreakpoints,
45
} from '@/lib/ai-gateway/providers/openrouter/request-helpers';
56
import type { CustomLlmProvider } from '@kilocode/db';
67
import type { GatewayChatApiKind, Provider } from '@/lib/ai-gateway/providers/types';
@@ -65,9 +66,12 @@ export type DirectProviderInput = ResolvedExperimentUpstream & {
6566
* contacted. The route layer is responsible for not applying provider
6667
* pinning or kilo-exclusive model rewrites on top of this provider.
6768
*/
68-
export function buildDirectProvider(upstream: DirectProviderInput): Provider {
69+
export function buildDirectProvider(
70+
id: 'custom' | 'experiment',
71+
upstream: DirectProviderInput
72+
): Provider {
6973
return {
70-
id: 'custom',
74+
id,
7175
apiUrl: upstream.base_url,
7276
apiKey: upstream.api_key,
7377
supportedChatApis: inferSupportedChatApis(upstream.opencode_settings?.ai_sdk_provider),
@@ -83,6 +87,9 @@ export function buildDirectProvider(upstream: DirectProviderInput): Provider {
8387
Object.assign(context.extraHeaders, upstream.extra_headers);
8488
}
8589
context.request.body.model = upstream.internal_id;
90+
if (upstream.remove_cache_breakpoints) {
91+
removeCacheBreakpoints(context.request);
92+
}
8693
if (upstream.add_cache_breakpoints) {
8794
addCacheBreakpoints(context.request);
8895
}

apps/web/src/lib/ai-gateway/experiments/upstream-schema.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import { CustomLlmExtraBodySchema, OpenCodeSettingsSchema } from '@kilocode/db/s
2020
export const ExperimentUpstreamSchema = z
2121
.object({
2222
internal_id: z.string().min(1),
23-
base_url: z.string().url(),
23+
base_url: z.url(),
2424
opencode_settings: z
2525
.object({
2626
ai_sdk_provider: OpenCodeSettingsSchema.shape.ai_sdk_provider,
@@ -30,6 +30,7 @@ export const ExperimentUpstreamSchema = z
3030
extra_body: CustomLlmExtraBodySchema.optional(),
3131
remove_from_body: z.array(z.string()).optional(),
3232
add_cache_breakpoints: z.boolean().optional(),
33+
remove_cache_breakpoints: z.boolean().optional(),
3334
inject_reasoning_into_content: z.boolean().optional(),
3435
})
3536
.strict();

apps/web/src/lib/ai-gateway/providers/get-provider.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ async function checkCustomLlm(
110110
}
111111
return {
112112
kind: 'provider',
113-
provider: buildDirectProvider({
113+
provider: buildDirectProvider('custom', {
114114
internal_id: customLlm.internal_id,
115115
base_url: customLlm.base_url,
116116
api_key: customLlm.api_key,
@@ -121,6 +121,7 @@ async function checkCustomLlm(
121121
extra_headers: customLlm.extra_headers,
122122
remove_from_body: customLlm.remove_from_body,
123123
add_cache_breakpoints: customLlm.add_cache_breakpoints,
124+
remove_cache_breakpoints: customLlm.remove_cache_breakpoints,
124125
inject_reasoning_into_content: customLlm.inject_reasoning_into_content,
125126
}),
126127
userByok: null,
@@ -209,7 +210,7 @@ export async function getProvider(input: GetProviderInput): Promise<GetProviderR
209210
if (selection?.status === 'active') {
210211
return {
211212
kind: 'provider',
212-
provider: buildDirectProvider(selection.upstream),
213+
provider: buildDirectProvider('experiment', selection.upstream),
213214
userByok: null,
214215
bypassAccessCheck: false,
215216
experiment: {

apps/web/src/lib/ai-gateway/providers/openrouter/request-helpers.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,24 @@ function containsCacheControl(value: unknown): boolean {
9797
return Object.values(value).some(containsCacheControl);
9898
}
9999

100+
function deleteCacheControl(value: unknown): void {
101+
if (Array.isArray(value)) {
102+
for (const item of value) {
103+
deleteCacheControl(item);
104+
}
105+
return;
106+
}
107+
if (!isObjectRecord(value)) {
108+
return;
109+
}
110+
if (Object.hasOwn(value, 'cache_control')) {
111+
delete value.cache_control;
112+
}
113+
for (const item of Object.values(value)) {
114+
deleteCacheControl(item);
115+
}
116+
}
117+
100118
export function addCacheBreakpoints(request: GatewayRequest) {
101119
if (
102120
request.kind === 'chat_completions' &&
@@ -153,6 +171,20 @@ export function addCacheBreakpoints(request: GatewayRequest) {
153171
}
154172
}
155173

174+
export function removeCacheBreakpoints(request: GatewayRequest) {
175+
if (request.kind === 'chat_completions' && Array.isArray(request.body.messages)) {
176+
console.debug('[removeCacheBreakpoints] removing cache breakpoints from chat completions');
177+
deleteCacheControl(request.body.messages);
178+
} else if (request.kind === 'responses' && Array.isArray(request.body.input)) {
179+
console.debug('[removeCacheBreakpoints] removing cache breakpoints from responses request');
180+
deleteCacheControl(request.body.input);
181+
} else if (request.kind === 'messages') {
182+
console.debug('[removeCacheBreakpoints] removing cache breakpoints from messages request');
183+
delete request.body.cache_control;
184+
deleteCacheControl(request.body.messages);
185+
}
186+
}
187+
156188
export function fixResponsesRequest(request: GatewayResponsesRequest) {
157189
if (!Array.isArray(request.input)) {
158190
return;

apps/web/src/lib/ai-gateway/providers/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export type ProviderId =
1313
| 'morph'
1414
| 'vercel'
1515
| 'custom'
16+
| 'experiment'
1617
| 'dev-tools';
1718

1819
export type BYOKResult = {

apps/web/src/tests/openrouter-request-helpers.test.ts

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import { describe, expect, test } from '@jest/globals';
2-
import { addCacheBreakpoints } from '@/lib/ai-gateway/providers/openrouter/request-helpers';
2+
import {
3+
addCacheBreakpoints,
4+
removeCacheBreakpoints,
5+
} from '@/lib/ai-gateway/providers/openrouter/request-helpers';
36
import type { GatewayRequest } from '@/lib/ai-gateway/providers/openrouter/types';
47
import type OpenAI from 'openai';
58

@@ -251,3 +254,109 @@ describe('addCacheBreakpoints', () => {
251254
expect(request.body.cache_control).toBeUndefined();
252255
});
253256
});
257+
258+
describe('removeCacheBreakpoints', () => {
259+
test('removes all cache breakpoints added to a chat completions request', () => {
260+
const request: GatewayRequest = {
261+
kind: 'chat_completions',
262+
body: {
263+
model: 'test-model',
264+
messages: [
265+
{ role: 'system', content: 'You are helpful.' },
266+
{ role: 'user', content: 'First prompt' },
267+
{ role: 'assistant', content: 'First response' },
268+
{
269+
role: 'user',
270+
content: [
271+
{ type: 'text', text: 'Latest prompt' },
272+
{ type: 'text', text: 'Latest detail' },
273+
],
274+
},
275+
],
276+
},
277+
};
278+
279+
addCacheBreakpoints(request);
280+
expect(containsCacheControlDeep(request.body.messages)).toBe(true);
281+
282+
removeCacheBreakpoints(request);
283+
284+
expect(containsCacheControlDeep(request.body.messages)).toBe(false);
285+
});
286+
287+
test('removes all cache breakpoints added to a responses request', () => {
288+
const request: GatewayRequest = {
289+
kind: 'responses',
290+
body: {
291+
model: 'test-model',
292+
input: [
293+
{ type: 'message', role: 'system', content: 'You are helpful.' },
294+
{
295+
type: 'message',
296+
role: 'user',
297+
content: [{ type: 'input_text', text: 'First prompt' }],
298+
},
299+
{
300+
type: 'function_call_output',
301+
call_id: 'call_123',
302+
output: [
303+
{ type: 'input_text', text: 'Tool output' },
304+
{ type: 'input_text', text: 'Tool detail' },
305+
],
306+
},
307+
],
308+
},
309+
};
310+
311+
addCacheBreakpoints(request);
312+
if (request.kind !== 'responses' || !Array.isArray(request.body.input)) return;
313+
expect(containsCacheControlDeep(request.body.input)).toBe(true);
314+
315+
removeCacheBreakpoints(request);
316+
317+
expect(containsCacheControlDeep(request.body.input)).toBe(false);
318+
});
319+
320+
test('removes top-level and nested cache_control from a messages request', () => {
321+
const request: GatewayRequest = {
322+
kind: 'messages',
323+
body: {
324+
model: 'anthropic/claude-sonnet-4-5',
325+
max_tokens: 1024,
326+
cache_control: { type: 'ephemeral' },
327+
messages: [
328+
{
329+
role: 'user',
330+
content: [
331+
{
332+
type: 'text',
333+
text: 'First prompt',
334+
cache_control: { type: 'ephemeral' },
335+
},
336+
],
337+
},
338+
{ role: 'assistant', content: 'First response' },
339+
{ role: 'user', content: 'Latest prompt' },
340+
],
341+
},
342+
};
343+
344+
removeCacheBreakpoints(request);
345+
346+
expect(request.body.cache_control).toBeUndefined();
347+
expect(containsCacheControlDeep(request.body.messages)).toBe(false);
348+
});
349+
});
350+
351+
function containsCacheControlDeep(value: unknown): boolean {
352+
if (Array.isArray(value)) {
353+
return value.some(containsCacheControlDeep);
354+
}
355+
if (typeof value !== 'object' || value === null) {
356+
return false;
357+
}
358+
if (Object.hasOwn(value, 'cache_control')) {
359+
return true;
360+
}
361+
return Object.values(value).some(containsCacheControlDeep);
362+
}

packages/db/src/schema-types.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1205,11 +1205,12 @@ export const CustomLlmDefinitionSchema = z.object({
12051205
display_name: z.string(),
12061206
context_length: z.number(),
12071207
max_completion_tokens: z.number(),
1208-
base_url: z.string(),
1208+
base_url: z.url(),
12091209
api_key: z.string(),
12101210
organization_ids: z.array(z.string()),
12111211
supports_image_input: z.boolean().optional(),
12121212
add_cache_breakpoints: z.boolean().optional(),
1213+
remove_cache_breakpoints: z.boolean().optional(),
12131214
inject_reasoning_into_content: z.boolean().optional(),
12141215
extra_headers: CustomLlmExtraHeadersSchema.optional(),
12151216
extra_body: CustomLlmExtraBodySchema.optional(),

0 commit comments

Comments
 (0)