From 2a30b060865ddef3e6b8f20e8f6ecf4c390144d1 Mon Sep 17 00:00:00 2001 From: Dileep Yavanamandha Date: Tue, 7 Apr 2026 22:32:27 -0700 Subject: [PATCH 1/2] fixing compaction --- .../node/oaiLanguageModelServer.ts | 4 +- .../extension/prompt/node/chatMLFetcher.ts | 4 +- .../platform/endpoint/node/chatEndpoint.ts | 5 +- .../platform/endpoint/node/responsesApi.ts | 183 ++++++- .../endpoint/node/test/responsesApi.spec.ts | 498 +++++++++++++++++- .../platform/networking/node/chatStream.ts | 39 ++ 6 files changed, 710 insertions(+), 23 deletions(-) diff --git a/extensions/copilot/src/extension/externalAgents/node/oaiLanguageModelServer.ts b/extensions/copilot/src/extension/externalAgents/node/oaiLanguageModelServer.ts index 4d6b879eb04b9..698b8f90dd820 100644 --- a/extensions/copilot/src/extension/externalAgents/node/oaiLanguageModelServer.ts +++ b/extensions/copilot/src/extension/externalAgents/node/oaiLanguageModelServer.ts @@ -10,7 +10,7 @@ import type OpenAI from 'openai'; import { IChatMLFetcher, Source } from '../../../platform/chat/common/chatMLFetcher'; import { ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes'; import { CustomModel, EndpointEditToolName, IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider'; -import { OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging } from '../../../platform/endpoint/node/responsesApi'; +import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging } from '../../../platform/endpoint/node/responsesApi'; import { ILogService } from '../../../platform/log/common/logService'; import { FinishedCallback, OptionalChatRequestParams } from '../../../platform/networking/common/fetch'; import { Response } from '../../../platform/networking/common/fetcherService'; @@ -455,7 +455,7 @@ class StreamingPassThroughEndpoint implements IChatEndpoint { // We parse the stream just to return a correct ChatCompletion for logging the response and token usage details. const requestId = response.headers.get('X-Request-ID') ?? generateUuid(); const ghRequestId = response.headers.get('x-github-request-id') ?? ''; - const processor = this.instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, requestId, ghRequestId); + const processor = this.instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, getResponsesApiCompactionThresholdFromBody(this.requestBody)); const parser = new SSEParser((ev) => { try { logService.trace(`[StreamingPassThroughEndpoint] SSE: ${ev.data}`); diff --git a/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts b/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts index fc2e21895c909..57e9f766bf223 100644 --- a/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts +++ b/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts @@ -17,7 +17,7 @@ import { IInteractionService } from '../../../platform/chat/common/interactionSe import { ConfigKey, HARD_TOOL_LIMIT, IConfigurationService } from '../../../platform/configuration/common/configurationService'; import { ICAPIClientService } from '../../../platform/endpoint/common/capiClient'; import { isAutoModel } from '../../../platform/endpoint/node/autoChatEndpoint'; -import { OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging, sendCompletionOutputTelemetry } from '../../../platform/endpoint/node/responsesApi'; +import { getResponsesApiCompactionThresholdFromBody, OpenAIResponsesProcessor, responseApiInputToRawMessagesForLogging, sendCompletionOutputTelemetry } from '../../../platform/endpoint/node/responsesApi'; import { collectSingleLineErrorMessage, ILogService } from '../../../platform/log/common/logService'; import { isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic'; import { FinishedCallback, getRequestId, IResponseDelta, OptionalChatRequestParams, RequestId } from '../../../platform/networking/common/fetch'; @@ -1103,7 +1103,7 @@ export class ChatMLFetcherImpl extends AbstractChatMLFetcher { const handle = connection.sendRequest(request, { userInitiated: !!userInitiatedRequest, turnId }, cancellationToken); const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId }); - const processor = this._instantiationService.createInstance(OpenAIResponsesProcessor, extendedBaseTelemetryData, modelRequestId.headerRequestId, modelRequestId.gitHubRequestId); + const processor = this._instantiationService.createInstance(OpenAIResponsesProcessor, extendedBaseTelemetryData, this._telemetryService, modelRequestId.headerRequestId, modelRequestId.gitHubRequestId, getResponsesApiCompactionThresholdFromBody(request)); // Set up streaming first so event listeners are registered before we // await the first event — AsyncIterableObject runs its executor eagerly. diff --git a/extensions/copilot/src/platform/endpoint/node/chatEndpoint.ts b/extensions/copilot/src/platform/endpoint/node/chatEndpoint.ts index 0e03a3e013a0c..e2b168a810c45 100644 --- a/extensions/copilot/src/platform/endpoint/node/chatEndpoint.ts +++ b/extensions/copilot/src/platform/endpoint/node/chatEndpoint.ts @@ -34,7 +34,7 @@ import { isGeminiFamily, modelSupportsContextEditing, modelSupportsToolSearch } import { IDomainService } from '../common/domainService'; import { CustomModel, IChatModelInformation, ModelSupportedEndpoint } from '../common/endpointProvider'; import { createMessagesRequestBody, processResponseFromMessagesEndpoint } from './messagesApi'; -import { createResponsesRequestBody, processResponseFromChatEndpoint } from './responsesApi'; +import { createResponsesRequestBody, getResponsesApiCompactionThreshold, processResponseFromChatEndpoint } from './responsesApi'; /** * The default processor for the stream format from CAPI @@ -366,7 +366,8 @@ export class ChatEndpoint implements IChatEndpoint { cancellationToken?: CancellationToken | undefined ): Promise> { if (this.useResponsesApi) { - return processResponseFromChatEndpoint(this._instantiationService, telemetryService, logService, response, expectedNumChoices, finishCallback, telemetryData); + const compactionThreshold = getResponsesApiCompactionThreshold(this._configurationService, this._expService, this); + return processResponseFromChatEndpoint(this._instantiationService, telemetryService, logService, response, expectedNumChoices, finishCallback, telemetryData, compactionThreshold); } else if (this.useMessagesApi) { return processResponseFromMessagesEndpoint(this._instantiationService, telemetryService, logService, response, finishCallback, telemetryData); } else if (!this._supportsStreaming) { diff --git a/extensions/copilot/src/platform/endpoint/node/responsesApi.ts b/extensions/copilot/src/platform/endpoint/node/responsesApi.ts index d878b152289fe..f34e6f805ca7f 100644 --- a/extensions/copilot/src/platform/endpoint/node/responsesApi.ts +++ b/extensions/copilot/src/platform/endpoint/node/responsesApi.ts @@ -19,7 +19,7 @@ import { ILogService } from '../../log/common/logService'; import { FinishedCallback, IResponseDelta, OpenAiResponsesFunctionTool } from '../../networking/common/fetch'; import { IChatEndpoint, ICreateEndpointBodyOptions, IEndpointBody } from '../../networking/common/networking'; import { ChatCompletion, FinishedCompletionReason, modelsWithoutResponsesContextManagement, openAIContextManagementCompactionType, OpenAIContextManagementResponse, rawMessageToCAPI, TokenLogProb } from '../../networking/common/openai'; -import { sendEngineMessagesTelemetry } from '../../networking/node/chatStream'; +import { sendEngineMessagesTelemetry, sendResponsesApiCompactionTelemetry } from '../../networking/node/chatStream'; import { IExperimentationService } from '../../telemetry/common/nullExperimentationService'; import { ITelemetryService } from '../../telemetry/common/telemetry'; import { TelemetryData } from '../../telemetry/common/telemetryData'; @@ -29,10 +29,22 @@ import { rawPartAsPhaseData } from '../common/phaseDataContainer'; import { getStatefulMarkerAndIndex } from '../common/statefulMarkerContainer'; import { rawPartAsThinkingData } from '../common/thinkingDataContainer'; +export function getResponsesApiCompactionThreshold(configService: IConfigurationService, expService: IExperimentationService, endpoint: IChatEndpoint): number | undefined { + const contextManagementEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, expService) && !modelsWithoutResponsesContextManagement.has(endpoint.family); + if (!contextManagementEnabled) { + return undefined; + } + + return endpoint.modelMaxPromptTokens > 0 + ? Math.floor(endpoint.modelMaxPromptTokens * 0.9) + : 50000; +} + export function createResponsesRequestBody(accessor: ServicesAccessor, options: ICreateEndpointBodyOptions, model: string, endpoint: IChatEndpoint): IEndpointBody { const configService = accessor.get(IConfigurationService); const expService = accessor.get(IExperimentationService); const verbosity = getVerbosityForModelSync(endpoint); + const compactThreshold = getResponsesApiCompactionThreshold(configService, expService, endpoint); // compaction supported for all the models but works well for codex models and any future models after 5.3 const body: IEndpointBody = { @@ -56,11 +68,7 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options: text: verbosity ? { verbosity } : undefined, }; - const contextManagementEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiContextManagementEnabled, expService) && !modelsWithoutResponsesContextManagement.has(endpoint.family); - if (contextManagementEnabled) { - const compactThreshold = endpoint.modelMaxPromptTokens > 0 - ? Math.floor(endpoint.modelMaxPromptTokens * 0.9) - : 50000; + if (compactThreshold !== undefined) { body.context_management = [{ 'type': openAIContextManagementCompactionType, // Trigger compaction at 90% of the model max prompt context to keep headroom for active turns. @@ -95,6 +103,21 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options: return body; } +export function getResponsesApiCompactionThresholdFromBody(body: Pick): number | undefined { + const contextManagement = body.context_management; + if (!Array.isArray(contextManagement)) { + return undefined; + } + + for (const item of contextManagement) { + if (item.type === openAIContextManagementCompactionType && typeof item.compact_threshold === 'number') { + return item.compact_threshold; + } + } + + return undefined; +} + type ResponseOutputMessageWithPhase = OpenAI.Responses.ResponseOutputMessage & { phase?: string; }; @@ -103,17 +126,33 @@ interface ResponseOutputItemWithPhase { phase?: string; } +interface LatestCompactionOutput { + readonly item: OpenAIContextManagementResponse; + readonly outputIndex: number; +} + function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMessage[], ignoreStatefulMarker: boolean): { input: OpenAI.Responses.ResponseInputItem[]; previous_response_id?: string } { const latestCompactionMessageIndex = getLatestCompactionMessageIndex(messages); - if (latestCompactionMessageIndex !== undefined) { - messages = messages.slice(latestCompactionMessageIndex); - } - + const latestCompactionMessage = latestCompactionMessageIndex !== undefined ? createCompactionRoundTripMessage(messages[latestCompactionMessageIndex]) : undefined; const statefulMarkerAndIndex = !ignoreStatefulMarker && getStatefulMarkerAndIndex(modelId, messages); + let previousResponseId: string | undefined; - if (latestCompactionMessageIndex === undefined && statefulMarkerAndIndex) { + if (statefulMarkerAndIndex) { previousResponseId = statefulMarkerAndIndex.statefulMarker; + + // Requests that resume from previous_response_id send only post-marker history, + // but they still need the latest compaction item even when that item predates + // the marker. This keeps both websocket and non-websocket traffic aligned. messages = messages.slice(statefulMarkerAndIndex.index + 1); + if (latestCompactionMessageIndex !== undefined) { + if (latestCompactionMessageIndex > statefulMarkerAndIndex.index) { + messages = messages.slice(latestCompactionMessageIndex - (statefulMarkerAndIndex.index + 1)); + } else if (latestCompactionMessage) { + messages = [latestCompactionMessage, ...messages]; + } + } + } else if (latestCompactionMessageIndex !== undefined) { + messages = messages.slice(latestCompactionMessageIndex); } const input: OpenAI.Responses.ResponseInputItem[] = []; @@ -176,6 +215,22 @@ function rawMessagesToResponseAPI(modelId: string, messages: readonly Raw.ChatMe return { input, previous_response_id: previousResponseId }; } +function createCompactionRoundTripMessage(message: Raw.ChatMessage): Raw.ChatMessage | undefined { + if (message.role !== Raw.ChatRole.Assistant) { + return undefined; + } + + const content = message.content.filter(part => part.type === Raw.ChatCompletionContentPartKind.Opaque && rawPartAsCompactionData(part)); + if (!content.length) { + return undefined; + } + + return { + role: Raw.ChatRole.Assistant, + content, + }; +} + function getLatestCompactionMessageIndex(messages: readonly Raw.ChatMessage[]): number | undefined { for (let idx = messages.length - 1; idx >= 0; idx--) { const message = messages[idx]; @@ -428,11 +483,44 @@ function responseFunctionOutputToRawContents(output: string | OpenAI.Responses.R return coalesce(output.map(responseContentToRawContent)); } -export async function processResponseFromChatEndpoint(instantiationService: IInstantiationService, telemetryService: ITelemetryService, logService: ILogService, response: Response, expectedNumChoices: number, finishCallback: FinishedCallback, telemetryData: TelemetryData): Promise> { +function isCompactionOutputItem(item: OpenAI.Responses.ResponseOutputItem): boolean { + return item.type.toString() === openAIContextManagementCompactionType; +} + +function getLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): LatestCompactionOutput | undefined { + let latestCompactionOutput: LatestCompactionOutput | undefined; + for (let idx = output.length - 1; idx >= 0; idx--) { + const item = output[idx]; + if (isCompactionOutputItem(item)) { + latestCompactionOutput = { item: item as unknown as OpenAIContextManagementResponse, outputIndex: idx }; + break; + } + } + + if (preferredOutputIndex !== undefined) { + const preferredItem = output[preferredOutputIndex]; + if (preferredItem && isCompactionOutputItem(preferredItem) && (!latestCompactionOutput || preferredOutputIndex >= latestCompactionOutput.outputIndex)) { + return { item: preferredItem as unknown as OpenAIContextManagementResponse, outputIndex: preferredOutputIndex }; + } + } + + return latestCompactionOutput; +} + +function keepLatestCompactionOutput(output: OpenAI.Responses.ResponseOutputItem[], preferredOutputIndex: number | undefined): OpenAI.Responses.ResponseOutputItem[] { + const latestCompactionOutput = getLatestCompactionOutput(output, preferredOutputIndex); + if (!latestCompactionOutput) { + return output; + } + + return output.filter((item, idx) => !isCompactionOutputItem(item) || idx === latestCompactionOutput.outputIndex); +} + +export async function processResponseFromChatEndpoint(instantiationService: IInstantiationService, telemetryService: ITelemetryService, logService: ILogService, response: Response, expectedNumChoices: number, finishCallback: FinishedCallback, telemetryData: TelemetryData, compactionThreshold?: number): Promise> { return new AsyncIterableObject(async feed => { const requestId = response.headers.get('X-Request-ID') ?? generateUuid(); const ghRequestId = response.headers.get('x-github-request-id') ?? ''; - const processor = instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, requestId, ghRequestId); + const processor = instantiationService.createInstance(OpenAIResponsesProcessor, telemetryData, telemetryService, requestId, ghRequestId, compactionThreshold); const parser = new SSEParser((ev) => { try { logService.trace(`SSE: ${ev.data}`); @@ -474,13 +562,19 @@ interface CapiResponsesTextDeltaEvent extends Omit(); constructor( private readonly telemetryData: TelemetryData, + private readonly telemetryService: ITelemetryService, private readonly requestId: string, private readonly ghRequestId: string, + private readonly compactionThreshold: number | undefined, + @ILogService private readonly logService: ILogService, ) { } public push(chunk: OpenAI.Responses.ResponseStreamEvent, _onProgress: FinishedCallback): ChatCompletion | undefined { @@ -532,6 +626,12 @@ export class OpenAIResponsesProcessor { case 'response.output_item.done': if (chunk.item.type.toString() === openAIContextManagementCompactionType) { const compactionItem = chunk.item as unknown as OpenAIContextManagementResponse; + if (this.latestCompactionOutputIndex !== undefined && chunk.output_index < this.latestCompactionOutputIndex) { + return; + } + this.latestCompactionOutputIndex = chunk.output_index; + this.latestCompactionItem = compactionItem; + this.sawCompactionMessage = true; return onProgress({ text: '', contextManagement: { @@ -588,8 +688,58 @@ export class OpenAIResponsesProcessor { id: chunk.item_id } }); - case 'response.completed': - onProgress({ text: '', statefulMarker: chunk.response.id }); + case 'response.completed': { + const normalizedOutput = keepLatestCompactionOutput(chunk.response.output, this.latestCompactionOutputIndex); + const latestCompactionOutput = getLatestCompactionOutput(normalizedOutput, this.latestCompactionOutputIndex); + const latestCompactionItem = latestCompactionOutput?.item; + const previousCompactionItem = this.latestCompactionItem; + if (latestCompactionItem) { + this.sawCompactionMessage = true; + this.latestCompactionOutputIndex = latestCompactionOutput.outputIndex; + } + + const shouldEmitResolvedCompaction = latestCompactionItem && ( + !previousCompactionItem || + previousCompactionItem.id !== latestCompactionItem.id || + previousCompactionItem.encrypted_content !== latestCompactionItem.encrypted_content + ); + if (latestCompactionItem) { + this.latestCompactionItem = latestCompactionItem; + } + if (this.compactionThreshold !== undefined && this.sawCompactionMessage) { + const promptTokens = chunk.response.usage?.input_tokens ?? 0; + const totalTokens = chunk.response.usage?.total_tokens ?? 0; + sendResponsesApiCompactionTelemetry(this.telemetryService, { + outcome: 'compaction_returned', + headerRequestId: this.requestId, + gitHubRequestId: this.ghRequestId, + model: chunk.response.model, + }, { + compactThreshold: this.compactionThreshold, + promptTokens, + totalTokens, + }); + this.logService.debug(`[responsesAPI_compaction] Compaction enabled. headerRequestId=${this.requestId}`); + } else if (this.compactionThreshold !== undefined && (chunk.response.usage?.input_tokens ?? 0) >= this.compactionThreshold) { + const promptTokens = chunk.response.usage?.input_tokens ?? 0; + const totalTokens = chunk.response.usage?.total_tokens ?? 0; + sendResponsesApiCompactionTelemetry(this.telemetryService, { + outcome: 'threshold_met_no_compaction', + headerRequestId: this.requestId, + gitHubRequestId: this.ghRequestId, + model: chunk.response.model, + }, { + compactThreshold: this.compactionThreshold, + promptTokens, + totalTokens, + }); + this.logService.debug(`[responsesAPI_compaction] Compaction enabled but context not compacted after threshold was met. headerRequestId=${this.requestId}, gitHubRequestId=${this.ghRequestId}, promptTokens=${promptTokens}, totalTokens=${totalTokens}`); + } + onProgress({ + text: '', + statefulMarker: chunk.response.id, + contextManagement: shouldEmitResolvedCompaction ? latestCompactionItem : undefined, + }); return { blockFinished: true, choiceIndex: 0, @@ -613,7 +763,7 @@ export class OpenAIResponsesProcessor { finishReason: FinishedCompletionReason.Stop, message: { role: Raw.ChatRole.Assistant, - content: chunk.response.output.map((item): Raw.ChatCompletionContentPart | undefined => { + content: normalizedOutput.map((item): Raw.ChatCompletionContentPart | undefined => { if (item.type === 'message') { return { type: Raw.ChatCompletionContentPartKind.Text, text: item.content.map(c => c.type === 'output_text' ? c.text : c.refusal).join('') }; } else if (item.type === 'image_generation_call' && item.result) { @@ -622,6 +772,7 @@ export class OpenAIResponsesProcessor { }).filter(isDefined), } }; + } } } } diff --git a/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts b/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts index 59d4498aa89bf..833ef99edcd6c 100644 --- a/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts +++ b/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts @@ -6,13 +6,94 @@ import { Raw } from '@vscode/prompt-tsx'; import type { OpenAI } from 'openai'; import { describe, expect, it } from 'vitest'; +import { TokenizerType } from '../../../../util/common/tokenizer'; import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation'; import { ILogService } from '../../../log/common/logService'; +import { isOpenAIContextManagementResponse } from '../../../networking/common/fetch'; +import { IChatEndpoint, ICreateEndpointBodyOptions } from '../../../networking/common/networking'; +import { openAIContextManagementCompactionType, OpenAIContextManagementResponse } from '../../../networking/common/openai'; import { TelemetryData } from '../../../telemetry/common/telemetryData'; import { SpyingTelemetryService } from '../../../telemetry/node/spyingTelemetryService'; import { createFakeStreamResponse } from '../../../test/node/fetcher'; import { createPlatformServices } from '../../../test/node/services'; -import { processResponseFromChatEndpoint, responseApiInputToRawMessagesForLogging } from '../responsesApi'; +import { CustomDataPartMimeTypes } from '../../common/endpointTypes'; +import { createResponsesRequestBody, getResponsesApiCompactionThresholdFromBody, processResponseFromChatEndpoint, responseApiInputToRawMessagesForLogging } from '../responsesApi'; + +const testEndpoint: IChatEndpoint = { + urlOrRequestMetadata: 'https://example.test/chat', + modelMaxPromptTokens: 128000, + name: 'Test Endpoint', + version: '1', + family: 'gpt-5-mini', + tokenizer: TokenizerType.O200K, + maxOutputTokens: 4096, + model: 'gpt-5-mini', + modelProvider: 'openai', + supportsToolCalls: true, + supportsVision: true, + supportsPrediction: true, + showInModelPicker: true, + isFallback: false, + acquireTokenizer() { + throw new Error('Not implemented in test'); + }, + async processResponseFromChatEndpoint() { + throw new Error('Not implemented in test'); + }, + async makeChatRequest() { + throw new Error('Not implemented in test'); + }, + async makeChatRequest2() { + throw new Error('Not implemented in test'); + }, + createRequestBody() { + throw new Error('Not implemented in test'); + }, + cloneWithTokenOverride() { + return this; + } +}; + +const createRequestOptions = (messages: Raw.ChatMessage[], useWebSocket: boolean): ICreateEndpointBodyOptions => ({ + debugName: 'test', + messages, + requestId: 'req-1', + postOptions: {}, + finishedCb: undefined, + location: undefined as any, + useWebSocket, +}); + +const createStatefulMarkerMessage = (modelId: string, marker: string): Raw.ChatMessage => ({ + role: Raw.ChatRole.Assistant, + content: [{ + type: Raw.ChatCompletionContentPartKind.Opaque, + value: { + type: CustomDataPartMimeTypes.StatefulMarker, + value: { + modelId, + marker, + } + } + }] +}); + +const createCompactionResponse = (id: string, encrypted_content: string): OpenAIContextManagementResponse => ({ + type: openAIContextManagementCompactionType, + id, + encrypted_content, +}); + +const createCompactionAssistantMessage = (compaction: OpenAIContextManagementResponse): Raw.ChatMessage => ({ + role: Raw.ChatRole.Assistant, + content: [{ + type: Raw.ChatCompletionContentPartKind.Opaque, + value: { + type: CustomDataPartMimeTypes.ContextManagement, + compaction, + } + }] +}); describe('responseApiInputToRawMessagesForLogging', () => { @@ -214,6 +295,146 @@ describe('responseApiInputToRawMessagesForLogging', () => { }); }); +describe('createResponsesRequestBody', () => { + it('extracts compaction threshold from request body context management', () => { + expect(getResponsesApiCompactionThresholdFromBody({ + context_management: [{ + type: openAIContextManagementCompactionType, + compact_threshold: 1234, + }] + })).toBe(1234); + }); + + it('still slices websocket requests by stateful marker index when compaction is disabled', () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const endpointWithoutCompaction = { ...testEndpoint, family: 'gpt-5' as const }; + const messages: Raw.ChatMessage[] = [ + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'before marker' }], + }, + createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'), + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }], + }, + ]; + + const webSocketBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, true), endpointWithoutCompaction.model, endpointWithoutCompaction)); + + expect(webSocketBody.previous_response_id).toBe('resp-prev'); + expect(webSocketBody.input).toHaveLength(1); + expect(webSocketBody.input?.[0]).toMatchObject({ + role: 'user', + content: [{ type: 'input_text', text: 'after marker' }], + }); + + accessor.dispose(); + services.dispose(); + }); + + it('includes the newest compaction item in websocket requests when it predates the stateful marker', () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const latestCompaction = createCompactionResponse('cmp_ws', 'enc_ws'); + const messages: Raw.ChatMessage[] = [ + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'before compaction' }], + }, + createCompactionAssistantMessage(latestCompaction), + createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'), + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }], + }, + ]; + + const webSocketBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, true), testEndpoint.model, testEndpoint)); + + expect(webSocketBody.previous_response_id).toBe('resp-prev'); + expect(webSocketBody.input).toContainEqual({ + type: openAIContextManagementCompactionType, + id: 'cmp_ws', + encrypted_content: 'enc_ws', + }); + expect(webSocketBody.input).toContainEqual({ + role: 'user', + content: [{ type: 'input_text', text: 'after marker' }], + }); + + accessor.dispose(); + services.dispose(); + }); + + it('includes the newest compaction item in non-websocket requests when it predates the stateful marker', () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const latestCompaction = createCompactionResponse('cmp_http', 'enc_http'); + const messages: Raw.ChatMessage[] = [ + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'before compaction' }], + }, + createCompactionAssistantMessage(latestCompaction), + createStatefulMarkerMessage(testEndpoint.model, 'resp-prev'), + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'after marker' }], + }, + ]; + + const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, false), testEndpoint.model, testEndpoint)); + + expect(body.previous_response_id).toBe('resp-prev'); + expect(body.input).toContainEqual({ + type: openAIContextManagementCompactionType, + id: 'cmp_http', + encrypted_content: 'enc_http', + }); + expect(body.input).toContainEqual({ + role: 'user', + content: [{ type: 'input_text', text: 'after marker' }], + }); + + accessor.dispose(); + services.dispose(); + }); + + it('round-trips the newest stored compaction item', () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const latestCompaction = createCompactionResponse('cmp_new', 'enc_new'); + const messages: Raw.ChatMessage[] = [ + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'earlier turn' }], + }, + createCompactionAssistantMessage(latestCompaction), + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'follow up' }], + }, + ]; + + const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, false), testEndpoint.model, testEndpoint)); + + expect(body.input).toContainEqual({ + type: openAIContextManagementCompactionType, + id: 'cmp_new', + encrypted_content: 'enc_new', + }); + + accessor.dispose(); + services.dispose(); + }); +}); + describe('processResponseFromChatEndpoint telemetry', () => { it('emits engine.messages for Responses API assistant output', async () => { const services = createPlatformServices(); @@ -273,4 +494,279 @@ describe('processResponseFromChatEndpoint telemetry', () => { accessor.dispose(); services.dispose(); }); + + it('reconciles the newest compaction item from response.completed for the next request', async () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const logService = accessor.get(ILogService); + const telemetryService = new SpyingTelemetryService(); + const streamedCompactions: OpenAIContextManagementResponse[] = []; + + const olderCompaction = createCompactionResponse('cmp_old', 'enc_old'); + const newerCompaction = createCompactionResponse('cmp_new', 'enc_new'); + const compactionEvent = { + type: 'response.output_item.done', + output_index: 0, + item: olderCompaction, + }; + const completedEvent = { + type: 'response.completed', + response: { + id: 'resp_latest_compaction', + model: 'gpt-5-mini', + created_at: 123, + usage: { + input_tokens: 1200, + output_tokens: 9, + total_tokens: 1209, + input_tokens_details: { cached_tokens: 0 }, + output_tokens_details: { reasoning_tokens: 0 }, + }, + output: [ + olderCompaction, + { + type: 'message', + content: [{ type: 'output_text', text: 'reply' }], + }, + newerCompaction, + ], + } + }; + + const response = createFakeStreamResponse(`data: ${JSON.stringify(compactionEvent)}\n\ndata: ${JSON.stringify(completedEvent)}\n\n`); + const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-latest-compaction' }, {}); + + const stream = await processResponseFromChatEndpoint( + instantiationService, + telemetryService, + logService, + response, + 1, + async (_text, _unused, delta) => { + if (delta.contextManagement && isOpenAIContextManagementResponse(delta.contextManagement)) { + streamedCompactions.push(delta.contextManagement); + } + return undefined; + }, + telemetryData, + 1000 + ); + + for await (const _ of stream) { + // consume stream + } + + expect(streamedCompactions.map(item => item.id)).toEqual(['cmp_old', 'cmp_new']); + + const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([ + createCompactionAssistantMessage(streamedCompactions[streamedCompactions.length - 1]), + { + role: Raw.ChatRole.User, + content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'continue' }], + }, + ], false), testEndpoint.model, testEndpoint)); + + expect(body.input).toContainEqual({ + type: openAIContextManagementCompactionType, + id: 'cmp_new', + encrypted_content: 'enc_new', + }); + expect(body.input).not.toContainEqual({ + type: openAIContextManagementCompactionType, + id: 'cmp_old', + encrypted_content: 'enc_old', + }); + + accessor.dispose(); + services.dispose(); + }); + + it('does not emit compaction telemetry when compaction is disabled', async () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const logService = accessor.get(ILogService); + const telemetryService = new SpyingTelemetryService(); + + const compactionEvent = { + type: 'response.output_item.done', + output_index: 0, + item: { + type: openAIContextManagementCompactionType, + id: 'cmp_disabled', + encrypted_content: 'enc', + } + }; + const completedEvent = { + type: 'response.completed', + response: { + id: 'resp_disabled', + model: 'gpt-5-mini', + created_at: 123, + usage: { + input_tokens: 1500, + output_tokens: 9, + total_tokens: 1509, + input_tokens_details: { cached_tokens: 0 }, + output_tokens_details: { reasoning_tokens: 0 }, + }, + output: [] + } + }; + + const response = createFakeStreamResponse(`data: ${JSON.stringify(compactionEvent)}\n\ndata: ${JSON.stringify(completedEvent)}\n\n`); + const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-4' }, {}); + + const stream = await processResponseFromChatEndpoint( + instantiationService, + telemetryService, + logService, + response, + 1, + async () => undefined, + telemetryData, + undefined + ); + + for await (const _ of stream) { + // consume stream + } + + const event = telemetryService.getEvents().telemetryServiceEvents.find(e => e.eventName === 'responsesApi.compactionOutcome'); + expect(event).toBeUndefined(); + + accessor.dispose(); + services.dispose(); + }); + + it('emits telemetry when the server returns a compaction item', async () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const logService = accessor.get(ILogService); + const telemetryService = new SpyingTelemetryService(); + + const compactionEvent = { + type: 'response.output_item.done', + output_index: 0, + item: { + type: openAIContextManagementCompactionType, + id: 'cmp_123', + encrypted_content: 'enc', + } + }; + const completedEvent = { + type: 'response.completed', + response: { + id: 'resp_456', + model: 'gpt-5-mini', + created_at: 123, + usage: { + input_tokens: 1200, + output_tokens: 7, + total_tokens: 1207, + input_tokens_details: { cached_tokens: 0 }, + output_tokens_details: { reasoning_tokens: 0 }, + }, + output: [] + } + }; + + const response = createFakeStreamResponse(`data: ${JSON.stringify(compactionEvent)}\n\ndata: ${JSON.stringify(completedEvent)}\n\n`); + const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-2' }, {}); + + const stream = await processResponseFromChatEndpoint( + instantiationService, + telemetryService, + logService, + response, + 1, + async () => undefined, + telemetryData, + 1000 + ); + + for await (const _ of stream) { + // consume stream + } + + const event = telemetryService.getEvents().telemetryServiceEvents.find(e => e.eventName === 'responsesApi.compactionOutcome'); + expect(event).toBeDefined(); + expect(event?.properties).toMatchObject({ + outcome: 'compaction_returned', + model: 'gpt-5-mini', + }); + expect(event?.measurements).toMatchObject({ + compactThreshold: 1000, + promptTokens: 1200, + totalTokens: 1207, + }); + + accessor.dispose(); + services.dispose(); + }); + + it('emits telemetry when the server exceeds threshold without returning a compaction item', async () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const logService = accessor.get(ILogService); + const telemetryService = new SpyingTelemetryService(); + + const completedEvent = { + type: 'response.completed', + response: { + id: 'resp_789', + model: 'gpt-5-mini', + created_at: 123, + usage: { + input_tokens: 1500, + output_tokens: 9, + total_tokens: 1509, + input_tokens_details: { cached_tokens: 0 }, + output_tokens_details: { reasoning_tokens: 0 }, + }, + output: [ + { + type: 'message', + content: [{ type: 'output_text', text: 'reply' }], + } + ] + } + }; + + const response = createFakeStreamResponse(`data: ${JSON.stringify(completedEvent)}\n\n`); + const telemetryData = TelemetryData.createAndMarkAsIssued({ modelCallId: 'model-call-3' }, {}); + + const stream = await processResponseFromChatEndpoint( + instantiationService, + telemetryService, + logService, + response, + 1, + async () => undefined, + telemetryData, + 1000 + ); + + for await (const _ of stream) { + // consume stream + } + + const event = telemetryService.getEvents().telemetryServiceEvents.find(e => e.eventName === 'responsesApi.compactionOutcome'); + expect(event).toBeDefined(); + expect(event?.properties).toMatchObject({ + outcome: 'threshold_met_no_compaction', + model: 'gpt-5-mini', + }); + expect(event?.measurements).toMatchObject({ + compactThreshold: 1000, + promptTokens: 1500, + totalTokens: 1509, + }); + + accessor.dispose(); + services.dispose(); + }); }); diff --git a/extensions/copilot/src/platform/networking/node/chatStream.ts b/extensions/copilot/src/platform/networking/node/chatStream.ts index 0d94539c06065..aee698b6d1956 100644 --- a/extensions/copilot/src/platform/networking/node/chatStream.ts +++ b/extensions/copilot/src/platform/networking/node/chatStream.ts @@ -460,6 +460,45 @@ export function sendEngineMessagesTelemetry(telemetryService: ITelemetryService, sendEngineMessagesLengthTelemetry(telemetryService, messages, telemetryData, isOutput, logService); } +export function sendResponsesApiCompactionTelemetry( + telemetryService: ITelemetryService, + properties: { + outcome: 'compaction_returned' | 'threshold_met_no_compaction'; + headerRequestId: string; + gitHubRequestId: string; + model: string; + }, + measurements: { + compactThreshold?: number; + promptTokens: number; + totalTokens: number; + } +): void { + /* __GDPR__ + "responsesApi.compactionOutcome" : { + "owner": "dileepy", + "comment": "Tracks server-side Responses API compaction outcomes.", + "outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the server returned a compaction item or exceeded the threshold without returning one." }, + "headerRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Request ID from the response headers." }, + "gitHubRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "GitHub request ID from the response headers if present." }, + "model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Model family reported by the response." }, + "compactThreshold": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Compaction threshold configured for the request." }, + "promptTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Prompt token count reported by the response." }, + "totalTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total token count reported by the response." } + } + */ + telemetryService.sendGHTelemetryEvent('responsesApi.compactionOutcome', { + outcome: properties.outcome, + headerRequestId: properties.headerRequestId, + gitHubRequestId: properties.gitHubRequestId, + model: properties.model, + }, { + compactThreshold: measurements.compactThreshold, + promptTokens: measurements.promptTokens, + totalTokens: measurements.totalTokens, + }); +} + export function prepareChatCompletionForReturn( telemetryService: ITelemetryService, logService: ILogService, From 33f1d2a4716f87c881a4aa538eceeb3dad270f8f Mon Sep 17 00:00:00 2001 From: dileepyavan <52841896+dileepyavan@users.noreply.github.com> Date: Tue, 7 Apr 2026 22:47:21 -0700 Subject: [PATCH 2/2] Update extensions/copilot/src/platform/networking/node/chatStream.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- extensions/copilot/src/platform/networking/node/chatStream.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/copilot/src/platform/networking/node/chatStream.ts b/extensions/copilot/src/platform/networking/node/chatStream.ts index aee698b6d1956..385ba7bf53c53 100644 --- a/extensions/copilot/src/platform/networking/node/chatStream.ts +++ b/extensions/copilot/src/platform/networking/node/chatStream.ts @@ -481,7 +481,7 @@ export function sendResponsesApiCompactionTelemetry( "outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the server returned a compaction item or exceeded the threshold without returning one." }, "headerRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Request ID from the response headers." }, "gitHubRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "GitHub request ID from the response headers if present." }, - "model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Model family reported by the response." }, + "model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Model identifier reported by the response." }, "compactThreshold": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Compaction threshold configured for the request." }, "promptTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Prompt token count reported by the response." }, "totalTokens": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total token count reported by the response." }