Skip to content
This repository was archived by the owner on May 20, 2026. It is now read-only.

Commit 4d53076

Browse files
committed
removing stale compaction item
1 parent fd4349d commit 4d53076

2 files changed

Lines changed: 323 additions & 21 deletions

File tree

src/platform/endpoint/node/responsesApi.ts

Lines changed: 116 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,15 @@ import { getStatefulMarkerAndIndex } from '../common/statefulMarkerContainer';
3030
import { rawPartAsThinkingData } from '../common/thinkingDataContainer';
3131

3232
export function getResponsesApiCompactionThreshold(configService: IConfigurationService, expService: IExperimentationService, endpoint: IChatEndpoint): number | undefined {
33-
const contextManagementEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, expService) && !modelsWithoutResponsesContextManagement.has(endpoint.family);
33+
const contextManagementEnabled = !modelsWithoutResponsesContextManagement.has(endpoint.family);
3434
if (!contextManagementEnabled) {
3535
return undefined;
3636
}
3737

38-
return endpoint.modelMaxPromptTokens > 0
39-
? Math.floor(endpoint.modelMaxPromptTokens * 0.9)
40-
: 50000;
38+
// return endpoint.modelMaxPromptTokens > 0
39+
// ? Math.floor(endpoint.modelMaxPromptTokens * 0.9)
40+
// : 50000;
41+
return 10000;
4142
}
4243

4344
export function createResponsesRequestBody(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions, model: string, endpoint: IChatEndpoint): IEndpointBody {
@@ -49,7 +50,7 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options:
4950

5051
const body: IEndpointBody = {
5152
model,
52-
...rawMessagesToResponseAPI(model, options.messages, !!options.ignoreStatefulMarker, compactThreshold !== undefined),
53+
...rawMessagesToResponseAPI(model, options.messages, !!options.ignoreStatefulMarker),
5354
stream: true,
5455
tools: options.requestOptions?.tools?.map((tool): OpenAI.Responses.FunctionTool & OpenAiResponsesFunctionTool => ({
5556
...tool.function,
@@ -124,19 +125,32 @@ interface ResponseOutputItemWithPhase {
124125
phase?: string;
125126
}
126127

127-
function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMessage[], ignoreStatefulMarker: boolean, compactionEnabled: boolean): { input: OpenAI.Responses.ResponseInputItem[]; previous_response_id?: string } {
128+
interface LatestCompactionOutput {
129+
readonly item: OpenAIContextManagementResponse;
130+
readonly outputIndex: number;
131+
}
132+
133+
function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMessage[], ignoreStatefulMarker: boolean): { input: OpenAI.Responses.ResponseInputItem[]; previous_response_id?: string } {
128134
const latestCompactionMessageIndex = getLatestCompactionMessageIndex(messages);
135+
const latestCompactionMessage = latestCompactionMessageIndex !== undefined ? createCompactionRoundTripMessage(messages[latestCompactionMessageIndex]) : undefined;
129136
const statefulMarkerAndIndex = !ignoreStatefulMarker && getStatefulMarkerAndIndex(modelId, messages);
130137

131138
let previousResponseId: string | undefined;
132139
if (statefulMarkerAndIndex) {
133140
previousResponseId = statefulMarkerAndIndex.statefulMarker;
134-
if (!compactionEnabled) {
135-
messages = messages.slice(statefulMarkerAndIndex.index + 1);
136-
}
137-
}
138141

139-
if (latestCompactionMessageIndex !== undefined) {
142+
// Requests that resume from previous_response_id send only post-marker history,
143+
// but they still need the latest compaction item even when that item predates
144+
// the marker. This keeps both websocket and non-websocket traffic aligned.
145+
messages = messages.slice(statefulMarkerAndIndex.index + 1);
146+
if (latestCompactionMessageIndex !== undefined) {
147+
if (latestCompactionMessageIndex > statefulMarkerAndIndex.index) {
148+
messages = messages.slice(latestCompactionMessageIndex - (statefulMarkerAndIndex.index + 1));
149+
} else if (latestCompactionMessage) {
150+
messages = [latestCompactionMessage, ...messages];
151+
}
152+
}
153+
} else if (latestCompactionMessageIndex !== undefined) {
140154
messages = messages.slice(latestCompactionMessageIndex);
141155
}
142156

@@ -200,6 +214,22 @@ function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMe
200214
return { input, previous_response_id: previousResponseId };
201215
}
202216

217+
function createCompactionRoundTripMessage(message: Raw.ChatMessage): Raw.ChatMessage | undefined {
218+
if (message.role !== Raw.ChatRole.Assistant) {
219+
return undefined;
220+
}
221+
222+
const content = message.content.filter(part => part.type === Raw.ChatCompletionContentPartKind.Opaque && rawPartAsCompactionData(part));
223+
if (!content.length) {
224+
return undefined;
225+
}
226+
227+
return {
228+
role: Raw.ChatRole.Assistant,
229+
content,
230+
};
231+
}
232+
203233
function getLatestCompactionMessageIndex(messages: readonly Raw.ChatMessage[]): number | undefined {
204234
for (let idx = messages.length - 1; idx >= 0; idx--) {
205235
const message = messages[idx];
@@ -452,6 +482,39 @@ function responseFunctionOutputToRawContents(output: string | OpenAI.Responses.R
452482
return coalesce(output.map(responseContentToRawContent));
453483
}
454484

485+
function isCompactionOutputItem(item: OpenAI.Responses.ResponseOutputItem): boolean {
486+
return item.type.toString() === openAIContextManagementCompactionType;
487+
}
488+
489+
function getLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): LatestCompactionOutput | undefined {
490+
let latestCompactionOutput: LatestCompactionOutput | undefined;
491+
for (let idx = output.length - 1; idx >= 0; idx--) {
492+
const item = output[idx];
493+
if (isCompactionOutputItem(item)) {
494+
latestCompactionOutput = { item: item as unknown as OpenAIContextManagementResponse, outputIndex: idx };
495+
break;
496+
}
497+
}
498+
499+
if (preferredOutputIndex !== undefined) {
500+
const preferredItem = output[preferredOutputIndex];
501+
if (preferredItem && isCompactionOutputItem(preferredItem) && (!latestCompactionOutput || preferredOutputIndex >= latestCompactionOutput.outputIndex)) {
502+
return { item: preferredItem as unknown as OpenAIContextManagementResponse, outputIndex: preferredOutputIndex };
503+
}
504+
}
505+
506+
return latestCompactionOutput;
507+
}
508+
509+
function keepLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): OpenAI.Responses.ResponseOutputItem[] {
510+
const latestCompactionOutput = getLatestCompactionOutput(output, preferredOutputIndex);
511+
if (!latestCompactionOutput) {
512+
return output;
513+
}
514+
515+
return output.filter((item, idx) => !isCompactionOutputItem(item) || idx === latestCompactionOutput.outputIndex);
516+
}
517+
455518
export async function processResponseFromChatEndpoint(instantiationService: IInstantiationService, telemetryService: ITelemetryService, logService: ILogService, response: Response, expectedNumChoices: number, finishCallback: FinishedCallback, telemetryData: TelemetryData, compactionThreshold?: number): Promise<AsyncIterableObject<ChatCompletion>> {
456519
return new AsyncIterableObject<ChatCompletion>(async feed => {
457520
const requestId = response.headers.get('X-Request-ID') ?? generateUuid();
@@ -499,6 +562,8 @@ export class OpenAIResponsesProcessor {
499562
private textAccumulator: string = '';
500563
private hasReceivedReasoningSummary = false;
501564
private sawCompactionMessage = false;
565+
private latestCompactionOutputIndex: number | undefined;
566+
private latestCompactionItem: OpenAIContextManagementResponse | undefined;
502567
/** Maps output_index to { name, callId, arguments } for streaming tool call updates */
503568
private readonly toolCallInfo = new Map<number, { name: string; callId: string; arguments: string }>();
504569

@@ -560,6 +625,12 @@ export class OpenAIResponsesProcessor {
560625
case 'response.output_item.done':
561626
if (chunk.item.type.toString() === openAIContextManagementCompactionType) {
562627
const compactionItem = chunk.item as unknown as OpenAIContextManagementResponse;
628+
if (this.latestCompactionOutputIndex !== undefined && chunk.output_index < this.latestCompactionOutputIndex) {
629+
return;
630+
}
631+
632+
this.latestCompactionOutputIndex = chunk.output_index;
633+
this.latestCompactionItem = compactionItem;
563634
this.sawCompactionMessage = true;
564635
return onProgress({
565636
text: '',
@@ -617,33 +688,58 @@ export class OpenAIResponsesProcessor {
617688
id: chunk.item_id
618689
}
619690
});
620-
case 'response.completed':
691+
case 'response.completed': {
692+
const normalizedOutput = keepLatestCompactionOutput(chunk.response.output, this.latestCompactionOutputIndex);
693+
const latestCompactionOutput = getLatestCompactionOutput(normalizedOutput, this.latestCompactionOutputIndex);
694+
const latestCompactionItem = latestCompactionOutput?.item;
695+
const previousCompactionItem = this.latestCompactionItem;
696+
if (latestCompactionItem) {
697+
this.sawCompactionMessage = true;
698+
this.latestCompactionOutputIndex = latestCompactionOutput.outputIndex;
699+
}
700+
701+
const shouldEmitResolvedCompaction = latestCompactionItem && (
702+
!previousCompactionItem ||
703+
previousCompactionItem.id !== latestCompactionItem.id ||
704+
previousCompactionItem.encrypted_content !== latestCompactionItem.encrypted_content
705+
);
706+
if (latestCompactionItem) {
707+
this.latestCompactionItem = latestCompactionItem;
708+
}
621709
if (this.compactionThreshold !== undefined && this.sawCompactionMessage) {
710+
const promptTokens = chunk.response.usage?.input_tokens ?? 0;
711+
const totalTokens = chunk.response.usage?.total_tokens ?? 0;
622712
sendResponsesApiCompactionTelemetry(this.telemetryService, {
623713
outcome: 'compaction_returned',
624714
headerRequestId: this.requestId,
625715
gitHubRequestId: this.ghRequestId,
626716
model: chunk.response.model,
627717
}, {
628718
compactThreshold: this.compactionThreshold,
629-
promptTokens: chunk.response.usage?.input_tokens ?? 0,
630-
totalTokens: chunk.response.usage?.total_tokens ?? 0,
719+
promptTokens,
720+
totalTokens,
631721
});
632722
this.logService.debug(`[responsesAPI_compaction] Compaction enabled. headerRequestId=${this.requestId}`);
633723
} else if (this.compactionThreshold !== undefined && (chunk.response.usage?.input_tokens ?? 0) >= this.compactionThreshold) {
724+
const promptTokens = chunk.response.usage?.input_tokens ?? 0;
725+
const totalTokens = chunk.response.usage?.total_tokens ?? 0;
634726
sendResponsesApiCompactionTelemetry(this.telemetryService, {
635727
outcome: 'threshold_met_no_compaction',
636728
headerRequestId: this.requestId,
637729
gitHubRequestId: this.ghRequestId,
638730
model: chunk.response.model,
639731
}, {
640732
compactThreshold: this.compactionThreshold,
641-
promptTokens: chunk.response.usage?.input_tokens ?? 0,
642-
totalTokens: chunk.response.usage?.total_tokens ?? 0,
733+
promptTokens,
734+
totalTokens,
643735
});
644-
this.logService.debug(`[responsesAPI_compaction] Compaction enabled but context not compacted after threshold was met. headerRequestId=${this.requestId}`);
736+
this.logService.debug(`[responsesAPI_compaction] Compaction enabled but context not compacted after threshold was met. headerRequestId=${this.requestId}, gitHubRequestId=${this.ghRequestId}, promptTokens=${promptTokens}, totalTokens=${totalTokens}`);
645737
}
646-
onProgress({ text: '', statefulMarker: chunk.response.id });
738+
onProgress({
739+
text: '',
740+
statefulMarker: chunk.response.id,
741+
contextManagement: shouldEmitResolvedCompaction ? latestCompactionItem : undefined,
742+
});
647743
return {
648744
blockFinished: true,
649745
choiceIndex: 0,
@@ -667,7 +763,7 @@ export class OpenAIResponsesProcessor {
667763
finishReason: FinishedCompletionReason.Stop,
668764
message: {
669765
role: Raw.ChatRole.Assistant,
670-
content: chunk.response.output.map((item): Raw.ChatCompletionContentPart | undefined => {
766+
content: normalizedOutput.map((item): Raw.ChatCompletionContentPart | undefined => {
671767
if (item.type === 'message') {
672768
return { type: Raw.ChatCompletionContentPartKind.Text, text: item.content.map(c => c.type === 'output_text' ? c.text : c.refusal).join('') };
673769
} else if (item.type === 'image_generation_call' && item.result) {
@@ -676,6 +772,7 @@ export class OpenAIResponsesProcessor {
676772
}).filter(isDefined),
677773
}
678774
};
775+
}
679776
}
680777
}
681778
}

0 commit comments

Comments
 (0)