@@ -30,14 +30,15 @@ import { getStatefulMarkerAndIndex } from '../common/statefulMarkerContainer';
3030import { rawPartAsThinkingData } from '../common/thinkingDataContainer' ;
3131
3232export function getResponsesApiCompactionThreshold ( configService : IConfigurationService , expService : IExperimentationService , endpoint : IChatEndpoint ) : number | undefined {
33- const contextManagementEnabled = configService . getExperimentBasedConfig ( ConfigKey . ResponsesApiContextManagementEnabled , expService ) && ! modelsWithoutResponsesContextManagement . has ( endpoint . family ) ;
33+ const contextManagementEnabled = ! modelsWithoutResponsesContextManagement . has ( endpoint . family ) ;
3434 if ( ! contextManagementEnabled ) {
3535 return undefined ;
3636 }
3737
38- return endpoint . modelMaxPromptTokens > 0
39- ? Math . floor ( endpoint . modelMaxPromptTokens * 0.9 )
40- : 50000 ;
38+ // return endpoint.modelMaxPromptTokens > 0
39+ // ? Math.floor(endpoint.modelMaxPromptTokens * 0.9)
40+ // : 50000;
41+ return 10000 ;
4142}
4243
4344export function createResponsesRequestBody ( accessor : ServicesAccessor , options : ICreateEndpointBodyOptions , model : string , endpoint : IChatEndpoint ) : IEndpointBody {
@@ -49,7 +50,7 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options:
4950
5051 const body : IEndpointBody = {
5152 model,
52- ...rawMessagesToResponseAPI ( model , options . messages , ! ! options . ignoreStatefulMarker , compactThreshold !== undefined ) ,
53+ ...rawMessagesToResponseAPI ( model , options . messages , ! ! options . ignoreStatefulMarker ) ,
5354 stream : true ,
5455 tools : options . requestOptions ?. tools ?. map ( ( tool ) : OpenAI . Responses . FunctionTool & OpenAiResponsesFunctionTool => ( {
5556 ...tool . function ,
@@ -124,19 +125,32 @@ interface ResponseOutputItemWithPhase {
124125 phase ?: string ;
125126}
126127
127- function rawMessagesToResponseAPI ( modelId : string , messages : readonly Raw . ChatMessage [ ] , ignoreStatefulMarker : boolean , compactionEnabled : boolean ) : { input : OpenAI . Responses . ResponseInputItem [ ] ; previous_response_id ?: string } {
128+ interface LatestCompactionOutput {
129+ readonly item : OpenAIContextManagementResponse ;
130+ readonly outputIndex : number ;
131+ }
132+
133+ function rawMessagesToResponseAPI ( modelId : string , messages : readonly Raw . ChatMessage [ ] , ignoreStatefulMarker : boolean ) : { input : OpenAI . Responses . ResponseInputItem [ ] ; previous_response_id ?: string } {
128134 const latestCompactionMessageIndex = getLatestCompactionMessageIndex ( messages ) ;
135+ const latestCompactionMessage = latestCompactionMessageIndex !== undefined ? createCompactionRoundTripMessage ( messages [ latestCompactionMessageIndex ] ) : undefined ;
129136 const statefulMarkerAndIndex = ! ignoreStatefulMarker && getStatefulMarkerAndIndex ( modelId , messages ) ;
130137
131138 let previousResponseId : string | undefined ;
132139 if ( statefulMarkerAndIndex ) {
133140 previousResponseId = statefulMarkerAndIndex . statefulMarker ;
134- if ( ! compactionEnabled ) {
135- messages = messages . slice ( statefulMarkerAndIndex . index + 1 ) ;
136- }
137- }
138141
139- if ( latestCompactionMessageIndex !== undefined ) {
142+ // Requests that resume from previous_response_id send only post-marker history,
143+ // but they still need the latest compaction item even when that item predates
144+ // the marker. This keeps both websocket and non-websocket traffic aligned.
145+ messages = messages . slice ( statefulMarkerAndIndex . index + 1 ) ;
146+ if ( latestCompactionMessageIndex !== undefined ) {
147+ if ( latestCompactionMessageIndex > statefulMarkerAndIndex . index ) {
148+ messages = messages . slice ( latestCompactionMessageIndex - ( statefulMarkerAndIndex . index + 1 ) ) ;
149+ } else if ( latestCompactionMessage ) {
150+ messages = [ latestCompactionMessage , ...messages ] ;
151+ }
152+ }
153+ } else if ( latestCompactionMessageIndex !== undefined ) {
140154 messages = messages . slice ( latestCompactionMessageIndex ) ;
141155 }
142156
@@ -200,6 +214,22 @@ function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMe
200214 return { input, previous_response_id : previousResponseId } ;
201215}
202216
217+ function createCompactionRoundTripMessage ( message : Raw . ChatMessage ) : Raw . ChatMessage | undefined {
218+ if ( message . role !== Raw . ChatRole . Assistant ) {
219+ return undefined ;
220+ }
221+
222+ const content = message . content . filter ( part => part . type === Raw . ChatCompletionContentPartKind . Opaque && rawPartAsCompactionData ( part ) ) ;
223+ if ( ! content . length ) {
224+ return undefined ;
225+ }
226+
227+ return {
228+ role : Raw . ChatRole . Assistant ,
229+ content,
230+ } ;
231+ }
232+
203233function getLatestCompactionMessageIndex ( messages : readonly Raw . ChatMessage [ ] ) : number | undefined {
204234 for ( let idx = messages . length - 1 ; idx >= 0 ; idx -- ) {
205235 const message = messages [ idx ] ;
@@ -452,6 +482,39 @@ function responseFunctionOutputToRawContents(output: string | OpenAI.Responses.R
452482 return coalesce ( output . map ( responseContentToRawContent ) ) ;
453483}
454484
485+ function isCompactionOutputItem ( item : OpenAI . Responses . ResponseOutputItem ) : boolean {
486+ return item . type . toString ( ) === openAIContextManagementCompactionType ;
487+ }
488+
489+ function getLatestCompactionOutput ( output : OpenAI . Responses . ResponseOutputItem [ ] , preferredOutputIndex : number | undefined ) : LatestCompactionOutput | undefined {
490+ let latestCompactionOutput : LatestCompactionOutput | undefined ;
491+ for ( let idx = output . length - 1 ; idx >= 0 ; idx -- ) {
492+ const item = output [ idx ] ;
493+ if ( isCompactionOutputItem ( item ) ) {
494+ latestCompactionOutput = { item : item as unknown as OpenAIContextManagementResponse , outputIndex : idx } ;
495+ break ;
496+ }
497+ }
498+
499+ if ( preferredOutputIndex !== undefined ) {
500+ const preferredItem = output [ preferredOutputIndex ] ;
501+ if ( preferredItem && isCompactionOutputItem ( preferredItem ) && ( ! latestCompactionOutput || preferredOutputIndex >= latestCompactionOutput . outputIndex ) ) {
502+ return { item : preferredItem as unknown as OpenAIContextManagementResponse , outputIndex : preferredOutputIndex } ;
503+ }
504+ }
505+
506+ return latestCompactionOutput ;
507+ }
508+
509+ function keepLatestCompactionOutput ( output : OpenAI . Responses . ResponseOutputItem [ ] , preferredOutputIndex : number | undefined ) : OpenAI . Responses . ResponseOutputItem [ ] {
510+ const latestCompactionOutput = getLatestCompactionOutput ( output , preferredOutputIndex ) ;
511+ if ( ! latestCompactionOutput ) {
512+ return output ;
513+ }
514+
515+ return output . filter ( ( item , idx ) => ! isCompactionOutputItem ( item ) || idx === latestCompactionOutput . outputIndex ) ;
516+ }
517+
455518export async function processResponseFromChatEndpoint ( instantiationService : IInstantiationService , telemetryService : ITelemetryService , logService : ILogService , response : Response , expectedNumChoices : number , finishCallback : FinishedCallback , telemetryData : TelemetryData , compactionThreshold ?: number ) : Promise < AsyncIterableObject < ChatCompletion > > {
456519 return new AsyncIterableObject < ChatCompletion > ( async feed => {
457520 const requestId = response . headers . get ( 'X-Request-ID' ) ?? generateUuid ( ) ;
@@ -499,6 +562,8 @@ export class OpenAIResponsesProcessor {
499562 private textAccumulator : string = '' ;
500563 private hasReceivedReasoningSummary = false ;
501564 private sawCompactionMessage = false ;
565+ private latestCompactionOutputIndex : number | undefined ;
566+ private latestCompactionItem : OpenAIContextManagementResponse | undefined ;
502567 /** Maps output_index to { name, callId, arguments } for streaming tool call updates */
503568 private readonly toolCallInfo = new Map < number , { name : string ; callId : string ; arguments : string } > ( ) ;
504569
@@ -560,6 +625,12 @@ export class OpenAIResponsesProcessor {
560625 case 'response.output_item.done' :
561626 if ( chunk . item . type . toString ( ) === openAIContextManagementCompactionType ) {
562627 const compactionItem = chunk . item as unknown as OpenAIContextManagementResponse ;
628+ if ( this . latestCompactionOutputIndex !== undefined && chunk . output_index < this . latestCompactionOutputIndex ) {
629+ return ;
630+ }
631+
632+ this . latestCompactionOutputIndex = chunk . output_index ;
633+ this . latestCompactionItem = compactionItem ;
563634 this . sawCompactionMessage = true ;
564635 return onProgress ( {
565636 text : '' ,
@@ -617,33 +688,58 @@ export class OpenAIResponsesProcessor {
617688 id : chunk . item_id
618689 }
619690 } ) ;
620- case 'response.completed' :
691+ case 'response.completed' : {
692+ const normalizedOutput = keepLatestCompactionOutput ( chunk . response . output , this . latestCompactionOutputIndex ) ;
693+ const latestCompactionOutput = getLatestCompactionOutput ( normalizedOutput , this . latestCompactionOutputIndex ) ;
694+ const latestCompactionItem = latestCompactionOutput ?. item ;
695+ const previousCompactionItem = this . latestCompactionItem ;
696+ if ( latestCompactionItem ) {
697+ this . sawCompactionMessage = true ;
698+ this . latestCompactionOutputIndex = latestCompactionOutput . outputIndex ;
699+ }
700+
701+ const shouldEmitResolvedCompaction = latestCompactionItem && (
702+ ! previousCompactionItem ||
703+ previousCompactionItem . id !== latestCompactionItem . id ||
704+ previousCompactionItem . encrypted_content !== latestCompactionItem . encrypted_content
705+ ) ;
706+ if ( latestCompactionItem ) {
707+ this . latestCompactionItem = latestCompactionItem ;
708+ }
621709 if ( this . compactionThreshold !== undefined && this . sawCompactionMessage ) {
710+ const promptTokens = chunk . response . usage ?. input_tokens ?? 0 ;
711+ const totalTokens = chunk . response . usage ?. total_tokens ?? 0 ;
622712 sendResponsesApiCompactionTelemetry ( this . telemetryService , {
623713 outcome : 'compaction_returned' ,
624714 headerRequestId : this . requestId ,
625715 gitHubRequestId : this . ghRequestId ,
626716 model : chunk . response . model ,
627717 } , {
628718 compactThreshold : this . compactionThreshold ,
629- promptTokens : chunk . response . usage ?. input_tokens ?? 0 ,
630- totalTokens : chunk . response . usage ?. total_tokens ?? 0 ,
719+ promptTokens,
720+ totalTokens,
631721 } ) ;
632722 this . logService . debug ( `[responsesAPI_compaction] Compaction enabled. headerRequestId=${ this . requestId } ` ) ;
633723 } else if ( this . compactionThreshold !== undefined && ( chunk . response . usage ?. input_tokens ?? 0 ) >= this . compactionThreshold ) {
724+ const promptTokens = chunk . response . usage ?. input_tokens ?? 0 ;
725+ const totalTokens = chunk . response . usage ?. total_tokens ?? 0 ;
634726 sendResponsesApiCompactionTelemetry ( this . telemetryService , {
635727 outcome : 'threshold_met_no_compaction' ,
636728 headerRequestId : this . requestId ,
637729 gitHubRequestId : this . ghRequestId ,
638730 model : chunk . response . model ,
639731 } , {
640732 compactThreshold : this . compactionThreshold ,
641- promptTokens : chunk . response . usage ?. input_tokens ?? 0 ,
642- totalTokens : chunk . response . usage ?. total_tokens ?? 0 ,
733+ promptTokens,
734+ totalTokens,
643735 } ) ;
644- this . logService . debug ( `[responsesAPI_compaction] Compaction enabled but context not compacted after threshold was met. headerRequestId=${ this . requestId } ` ) ;
736+ this . logService . debug ( `[responsesAPI_compaction] Compaction enabled but context not compacted after threshold was met. headerRequestId=${ this . requestId } , gitHubRequestId= ${ this . ghRequestId } , promptTokens= ${ promptTokens } , totalTokens= ${ totalTokens } ` ) ;
645737 }
646- onProgress ( { text : '' , statefulMarker : chunk . response . id } ) ;
738+ onProgress ( {
739+ text : '' ,
740+ statefulMarker : chunk . response . id ,
741+ contextManagement : shouldEmitResolvedCompaction ? latestCompactionItem : undefined ,
742+ } ) ;
647743 return {
648744 blockFinished : true ,
649745 choiceIndex : 0 ,
@@ -667,7 +763,7 @@ export class OpenAIResponsesProcessor {
667763 finishReason : FinishedCompletionReason . Stop ,
668764 message : {
669765 role : Raw . ChatRole . Assistant ,
670- content : chunk . response . output . map ( ( item ) : Raw . ChatCompletionContentPart | undefined => {
766+ content : normalizedOutput . map ( ( item ) : Raw . ChatCompletionContentPart | undefined => {
671767 if ( item . type === 'message' ) {
672768 return { type : Raw . ChatCompletionContentPartKind . Text , text : item . content . map ( c => c . type === 'output_text' ? c . text : c . refusal ) . join ( '' ) } ;
673769 } else if ( item . type === 'image_generation_call' && item . result ) {
@@ -676,6 +772,7 @@ export class OpenAIResponsesProcessor {
676772 } ) . filter ( isDefined ) ,
677773 }
678774 } ;
775+ }
679776 }
680777 }
681778}
0 commit comments