Skip to content

Commit 371d531

Browse files
committed
feat: enhance responses API context management with server-side compaction
1 parent eee4f6e commit 371d531

1 file changed

Lines changed: 3 additions & 4 deletions

File tree

src/routes/responses/handler.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,9 @@ export const handleResponses = async (c: Context) => {
122122
)
123123
}
124124

125-
applyResponsesApiContextManagement(
126-
payload,
127-
selectedModel?.capabilities.limits.max_prompt_tokens,
128-
)
125+
// Smaller than the client compaction threshold, use server-side compaction to maintain cache hit rate
126+
const maxPromptTokens = selectedModel?.capabilities.limits.max_prompt_tokens
127+
applyResponsesApiContextManagement(payload, maxPromptTokens, 0.8)
129128

130129
debugJson(logger, "Translated Responses payload:", payload)
131130

0 commit comments

Comments
 (0)