Skip to content

Commit aaac2c6

Browse files
authored
Merge pull request #165 from zicochaos/feat/forward-cache-routing-headers
2 parents 4abdd08 + 543b95c commit aaac2c6

3 files changed

Lines changed: 36 additions & 2 deletions

File tree

packages/backend/src/routes/inference/responses.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ export async function registerResponsesRoute(
3131
* This enables true stateless multi-turn conversations where the client only sends the
3232
* new input and the previous_response_id, without needing to re-send all history.
3333
*/
34-
fastify.post('/v1/responses', async (request: FastifyRequest, reply: FastifyReply) => {
34+
// Handler for Responses API requests (shared between /v1/responses and /v1/codex/responses)
35+
const responsesHandler = async (request: FastifyRequest, reply: FastifyReply) => {
3536
const requestId = crypto.randomUUID();
3637
const startTime = Date.now();
3738
let usageRecord: Partial<UsageRecord> = {
@@ -127,6 +128,17 @@ export async function registerResponsesRoute(
127128
unifiedRequest.incomingApiType = 'responses';
128129
unifiedRequest.originalBody = body;
129130
unifiedRequest.requestId = requestId;
131+
132+
// Forward cache routing headers for prompt caching support.
133+
// These headers enable server-side cache routing at the upstream provider.
134+
const sessionId = request.headers['session_id'] as string | undefined;
135+
const clientRequestId = request.headers['x-client-request-id'] as string | undefined;
136+
if (sessionId || clientRequestId || body.prompt_cache_key) {
137+
unifiedRequest.cacheRoutingHeaders = {
138+
session_id: sessionId || body.prompt_cache_key,
139+
'x-client-request-id': clientRequestId || body.prompt_cache_key,
140+
};
141+
}
130142
unifiedRequest = attachKeyAccessPolicy(request, unifiedRequest);
131143
const xAppHeader = Array.isArray(request.headers['x-app'])
132144
? request.headers['x-app'][0]
@@ -241,7 +253,11 @@ export async function registerResponsesRoute(
241253
},
242254
});
243255
}
244-
});
256+
};
257+
258+
fastify.post('/v1/responses', responsesHandler);
259+
// Codex CLI sends requests to /v1/codex/responses — alias to the same handler
260+
fastify.post('/v1/codex/responses', responsesHandler);
245261

246262
/**
247263
* GET /v1/responses/:response_id

packages/backend/src/services/dispatcher.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,20 @@ export class Dispatcher {
969969
if (route.config.headers) {
970970
Object.assign(headers, route.config.headers);
971971
}
972+
973+
// Forward cache routing headers for Responses API prompt caching.
974+
// These headers enable server-side cache routing at the upstream provider
975+
// (e.g. theclawbay, OpenAI). Without them, each request may land on a
976+
// different backend server, causing cache misses.
977+
if (request.cacheRoutingHeaders) {
978+
if (request.cacheRoutingHeaders.session_id) {
979+
headers['session_id'] = request.cacheRoutingHeaders.session_id;
980+
}
981+
if (request.cacheRoutingHeaders['x-client-request-id']) {
982+
headers['x-client-request-id'] = request.cacheRoutingHeaders['x-client-request-id'];
983+
}
984+
}
985+
972986
return headers;
973987
}
974988

packages/backend/src/types/unified.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ export interface UnifiedChatRequest {
119119
type: 'text' | 'json_object' | 'json_schema';
120120
json_schema?: any;
121121
};
122+
cacheRoutingHeaders?: {
123+
session_id?: string;
124+
'x-client-request-id'?: string;
125+
};
122126
incomingApiType?: string;
123127
originalBody?: any;
124128
metadata?: {

0 commit comments

Comments
 (0)