Skip to content

Commit c9686a2

Browse files
committed
feat: add context management and compaction features to responses API
1 parent 47764aa commit c9686a2

7 files changed

Lines changed: 305 additions & 20 deletions

File tree

src/lib/config.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export interface AppConfig {
99
}
1010
extraPrompts?: Record<string, string>
1111
smallModel?: string
12+
responsesApiContextManagementModels?: Array<string>
1213
modelReasoningEfforts?: Record<
1314
string,
1415
"none" | "minimal" | "low" | "medium" | "high" | "xhigh"
@@ -54,6 +55,7 @@ const defaultConfig: AppConfig = {
5455
"gpt-5.4": gpt5CommentaryPrompt,
5556
},
5657
smallModel: "gpt-5-mini",
58+
responsesApiContextManagementModels: ["gpt-5.4", "gpt-5.3-codex"],
5759
modelReasoningEfforts: {
5860
"gpt-5-mini": "low",
5961
"gpt-5.3-codex": "xhigh",
@@ -179,6 +181,19 @@ export function getSmallModel(): string {
179181
return config.smallModel ?? "gpt-5-mini"
180182
}
181183

184+
export function getResponsesApiContextManagementModels(): Array<string> {
185+
const config = getConfig()
186+
return (
187+
config.responsesApiContextManagementModels
188+
?? defaultConfig.responsesApiContextManagementModels
189+
?? []
190+
)
191+
}
192+
193+
export function isResponsesApiContextManagementModel(model: string): boolean {
194+
return getResponsesApiContextManagementModels().includes(model)
195+
}
196+
182197
export function getReasoningEffortForModel(
183198
model: string,
184199
): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" {

src/routes/messages/handler.ts

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@ import {
2222
translateAnthropicMessagesToResponsesPayload,
2323
translateResponsesResultToAnthropic,
2424
} from "~/routes/messages/responses-translation"
25-
import { getResponsesRequestOptions } from "~/routes/responses/utils"
25+
import {
26+
applyResponsesApiContextManagement,
27+
compactInputByLatestCompaction,
28+
getResponsesRequestOptions,
29+
} from "~/routes/responses/utils"
2630
import {
2731
createChatCompletions,
2832
type ChatCompletionChunk,
@@ -108,7 +112,10 @@ export async function handleCompletion(c: Context) {
108112
}
109113

110114
if (shouldUseResponsesApi(selectedModel)) {
111-
return await handleWithResponsesApi(c, anthropicPayload, initiatorOverride)
115+
return await handleWithResponsesApi(c, anthropicPayload, {
116+
initiatorOverride,
117+
selectedModel,
118+
})
112119
}
113120

114121
return await handleWithChatCompletions(c, anthropicPayload, initiatorOverride)
@@ -181,10 +188,23 @@ const handleWithChatCompletions = async (
181188
const handleWithResponsesApi = async (
182189
c: Context,
183190
anthropicPayload: AnthropicMessagesPayload,
184-
initiatorOverride?: "agent" | "user",
191+
options?: {
192+
initiatorOverride?: "agent" | "user"
193+
selectedModel?: Model
194+
},
185195
) => {
196+
const { initiatorOverride, selectedModel } = options ?? {}
197+
186198
const responsesPayload =
187199
translateAnthropicMessagesToResponsesPayload(anthropicPayload)
200+
201+
applyResponsesApiContextManagement(
202+
responsesPayload,
203+
selectedModel?.capabilities.limits.max_prompt_tokens,
204+
)
205+
206+
compactInputByLatestCompaction(responsesPayload)
207+
188208
logger.debug(
189209
"Translated Responses payload:",
190210
JSON.stringify(responsesPayload),

src/routes/messages/responses-stream-translation.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {
1919
import { type AnthropicStreamEventData } from "./anthropic-types"
2020
import {
2121
THINKING_TEXT,
22+
encodeCompactionCarrierSignature,
2223
translateResponsesResultToAnthropic,
2324
} from "./responses-translation"
2425

@@ -193,11 +194,45 @@ const handleOutputItemDone = (
193194
const events = new Array<AnthropicStreamEventData>()
194195
const item = rawEvent.item
195196
const itemType = item.type
197+
const outputIndex = rawEvent.output_index
198+
199+
if (itemType === "compaction") {
200+
if (!item.id || !item.encrypted_content) {
201+
return events
202+
}
203+
204+
const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
205+
206+
if (!state.blockHasDelta.has(blockIndex)) {
207+
events.push({
208+
type: "content_block_delta",
209+
index: blockIndex,
210+
delta: {
211+
type: "thinking_delta",
212+
thinking: THINKING_TEXT,
213+
},
214+
})
215+
}
216+
217+
events.push({
218+
type: "content_block_delta",
219+
index: blockIndex,
220+
delta: {
221+
type: "signature_delta",
222+
signature: encodeCompactionCarrierSignature({
223+
id: item.id,
224+
encrypted_content: item.encrypted_content,
225+
}),
226+
},
227+
})
228+
state.blockHasDelta.add(blockIndex)
229+
return events
230+
}
231+
196232
if (itemType !== "reasoning") {
197233
return events
198234
}
199235

200-
const outputIndex = rawEvent.output_index
201236
const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
202237
const signature = (item.encrypted_content ?? "") + "@" + item.id
203238
if (signature) {

src/routes/messages/responses-translation.ts

Lines changed: 117 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
} from "~/lib/config"
77
import {
88
type ResponsesPayload,
9+
type ResponseInputCompaction,
910
type ResponseInputContent,
1011
type ResponseInputImage,
1112
type ResponseInputItem,
@@ -14,6 +15,7 @@ import {
1415
type ResponseInputText,
1516
type ResponsesResult,
1617
type ResponseOutputContentBlock,
18+
type ResponseOutputCompaction,
1719
type ResponseOutputFunctionCall,
1820
type ResponseOutputItem,
1921
type ResponseOutputReasoning,
@@ -44,6 +46,8 @@ import {
4446
} from "./anthropic-types"
4547

4648
const MESSAGE_TYPE = "message"
49+
const COMPACTION_SIGNATURE_PREFIX = "cm1#"
50+
const COMPACTION_SIGNATURE_SEPARATOR = "@"
4751

4852
export const THINKING_TEXT = "Thinking..."
4953

@@ -89,6 +93,44 @@ export const translateAnthropicMessagesToResponsesPayload = (
8993
return responsesPayload
9094
}
9195

96+
type CompactionCarrier = {
97+
id: string
98+
encrypted_content: string
99+
}
100+
101+
export const encodeCompactionCarrierSignature = (
102+
compaction: CompactionCarrier,
103+
): string => {
104+
return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`
105+
}
106+
107+
export const decodeCompactionCarrierSignature = (
108+
signature: string,
109+
): CompactionCarrier | undefined => {
110+
if (signature.startsWith(COMPACTION_SIGNATURE_PREFIX)) {
111+
const raw = signature.slice(COMPACTION_SIGNATURE_PREFIX.length)
112+
const separatorIndex = raw.indexOf(COMPACTION_SIGNATURE_SEPARATOR)
113+
114+
if (separatorIndex <= 0 || separatorIndex === raw.length - 1) {
115+
return undefined
116+
}
117+
118+
const encrypted_content = raw.slice(0, separatorIndex)
119+
const id = raw.slice(separatorIndex + 1)
120+
121+
if (!encrypted_content) {
122+
return undefined
123+
}
124+
125+
return {
126+
id,
127+
encrypted_content,
128+
}
129+
}
130+
131+
return undefined
132+
}
133+
92134
const translateMessage = (
93135
message: AnthropicMessage,
94136
model: string,
@@ -165,17 +207,25 @@ const translateAssistantMessage = (
165207
continue
166208
}
167209

168-
if (
169-
block.type === "thinking"
170-
&& block.signature
171-
&& block.signature.includes("@")
172-
) {
173-
flushPendingContent(pendingContent, items, {
174-
role: "assistant",
175-
phase: assistantPhase,
176-
})
177-
items.push(createReasoningContent(block))
178-
continue
210+
if (block.type === "thinking" && block.signature) {
211+
const compactionContent = createCompactionContent(block)
212+
if (compactionContent) {
213+
flushPendingContent(pendingContent, items, {
214+
role: "assistant",
215+
phase: assistantPhase,
216+
})
217+
items.push(compactionContent)
218+
continue
219+
}
220+
221+
if (block.signature.includes("@")) {
222+
flushPendingContent(pendingContent, items, {
223+
role: "assistant",
224+
phase: assistantPhase,
225+
})
226+
items.push(createReasoningContent(block))
227+
continue
228+
}
179229
}
180230

181231
const converted = translateAssistantContentBlock(block)
@@ -302,15 +352,43 @@ const createReasoningContent = (
302352
// align with vscode-copilot-chat extractThinkingData, should add id, otherwise it will cause miss cache occasionally —— the usage input cached tokens to be 0
303353
// https://github.com/microsoft/vscode-copilot-chat/blob/main/src/platform/endpoint/node/responsesApi.ts#L162
304354
// when use in codex cli, reasoning id is empty, so it will cause miss cache occasionally
305-
const array = block.signature.split("@")
306-
const signature = array[0]
307-
const id = array[1]
355+
const { encryptedContent, id } = parseReasoningSignature(block.signature)
308356
const thinking = block.thinking === THINKING_TEXT ? "" : block.thinking
309357
return {
310358
id,
311359
type: "reasoning",
312360
summary: thinking ? [{ type: "summary_text", text: thinking }] : [],
313-
encrypted_content: signature,
361+
encrypted_content: encryptedContent,
362+
}
363+
}
364+
365+
const createCompactionContent = (
366+
block: AnthropicThinkingBlock,
367+
): ResponseInputCompaction | undefined => {
368+
const compaction = decodeCompactionCarrierSignature(block.signature)
369+
if (!compaction) {
370+
return undefined
371+
}
372+
373+
return {
374+
id: compaction.id,
375+
type: "compaction",
376+
encrypted_content: compaction.encrypted_content,
377+
}
378+
}
379+
380+
const parseReasoningSignature = (
381+
signature: string,
382+
): { encryptedContent: string; id: string } => {
383+
const splitIndex = signature.lastIndexOf("@")
384+
385+
if (splitIndex <= 0 || splitIndex === signature.length - 1) {
386+
return { encryptedContent: signature, id: "" }
387+
}
388+
389+
return {
390+
encryptedContent: signature.slice(0, splitIndex),
391+
id: signature.slice(splitIndex + 1),
314392
}
315393
}
316394

@@ -456,6 +534,13 @@ const mapOutputToAnthropicContent = (
456534
}
457535
break
458536
}
537+
case "compaction": {
538+
const compactionBlock = createCompactionThinkingBlock(item)
539+
if (compactionBlock) {
540+
contentBlocks.push(compactionBlock)
541+
}
542+
break
543+
}
459544
default: {
460545
// Future compatibility for unrecognized output item types.
461546
const combinedText = combineMessageTextContent(
@@ -549,6 +634,23 @@ const createToolUseContentBlock = (
549634
}
550635
}
551636

637+
const createCompactionThinkingBlock = (
638+
item: ResponseOutputCompaction,
639+
): AnthropicAssistantContentBlock | null => {
640+
if (!item.id || !item.encrypted_content) {
641+
return null
642+
}
643+
644+
return {
645+
type: "thinking",
646+
thinking: THINKING_TEXT,
647+
signature: encodeCompactionCarrierSignature({
648+
id: item.id,
649+
encrypted_content: item.encrypted_content,
650+
}),
651+
}
652+
}
653+
552654
const parseFunctionCallArguments = (
553655
rawArguments: string,
554656
): Record<string, unknown> => {

src/routes/responses/handler.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@ import {
1414
} from "~/services/copilot/create-responses"
1515

1616
import { createStreamIdTracker, fixStreamIds } from "./stream-id-sync"
17-
import { getResponsesRequestOptions } from "./utils"
17+
import {
18+
applyResponsesApiContextManagement,
19+
compactInputByLatestCompaction,
20+
getResponsesRequestOptions,
21+
} from "./utils"
1822

1923
const logger = createHandlerLogger("responses-handler")
2024

@@ -31,6 +35,8 @@ export const handleResponses = async (c: Context) => {
3135
// Remove web_search tool as it's not supported by GitHub Copilot
3236
removeWebSearchTool(payload)
3337

38+
compactInputByLatestCompaction(payload)
39+
3440
const selectedModel = state.models?.data.find(
3541
(model) => model.id === payload.model,
3642
)
@@ -50,6 +56,13 @@ export const handleResponses = async (c: Context) => {
5056
)
5157
}
5258

59+
applyResponsesApiContextManagement(
60+
payload,
61+
selectedModel?.capabilities.limits.max_prompt_tokens,
62+
)
63+
64+
logger.debug("Translated Responses payload:", JSON.stringify(payload))
65+
5366
const { vision, initiator } = getResponsesRequestOptions(payload)
5467

5568
if (state.manualApprove) {

0 commit comments

Comments
 (0)