We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent eee4f6e commit 371d531Copy full SHA for 371d531
1 file changed
src/routes/responses/handler.ts
@@ -122,10 +122,9 @@ export const handleResponses = async (c: Context) => {
122
)
123
}
124
125
- applyResponsesApiContextManagement(
126
- payload,
127
- selectedModel?.capabilities.limits.max_prompt_tokens,
128
- )
+ // Smaller than the client compaction threshold, use server-side compaction to maintain cache hit rate
+ const maxPromptTokens = selectedModel?.capabilities.limits.max_prompt_tokens
+ applyResponsesApiContextManagement(payload, maxPromptTokens, 0.8)
129
130
debugJson(logger, "Translated Responses payload:", payload)
131
0 commit comments