Skip to content

Commit 2d70e5a

Browse files
committed
Fix: count all tool tokens in budget calculation including deferred tools
Deferred tools (defer_loading: true) still count against the API context window. The 3/30 change (#4834) excluded them from toolTokens, causing the message budget to be ~31K tokens too generous and leading to context_length_exceeded errors followed by summarization failures ("No messages provided").
1 parent b6d44c3 commit 2d70e5a

6 files changed

Lines changed: 67 additions & 48 deletions

File tree

src/extension/intents/node/agentIntent.ts

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogS
1919
import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicCustomToolSearchEnabled, isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic';
2020
import { IChatEndpoint } from '../../../platform/networking/common/networking';
2121
import { modelsWithoutResponsesContextManagement } from '../../../platform/networking/common/openai';
22-
import { IToolDeferralService } from '../../../platform/networking/common/toolDeferralService';
2322
import { INotebookService } from '../../../platform/notebook/common/notebookService';
2423
import { GenAiMetrics } from '../../../platform/otel/common/genAiMetrics';
2524
import { IOTelService } from '../../../platform/otel/common/otelService';
@@ -379,7 +378,6 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
379378
@IExperimentationService private readonly expService: IExperimentationService,
380379
@IAutomodeService private readonly automodeService: IAutomodeService,
381380
@IOTelService override readonly otelService: IOTelService,
382-
@IToolDeferralService private readonly toolDeferralService: IToolDeferralService,
383381
) {
384382
super(intent, location, endpoint, request, intentOptions, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, otelService);
385383
}
@@ -405,15 +403,8 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
405403
}
406404

407405
const tools = promptContext.tools?.availableTools;
408-
// When Anthropic tool search is enabled, deferred tools are sent with
409-
// defer_loading: true and don't count against the context window until
410-
// the model loads them via tool_search. Only count non-deferred tools
411-
// so the budget isn't artificially reduced.
412406
const toolSearchEnabled = isAnthropicToolSearchEnabled(this.endpoint, this.configurationService);
413-
const effectiveTools = tools && toolSearchEnabled
414-
? tools.filter(t => this.toolDeferralService.isNonDeferredTool(t.name))
415-
: tools;
416-
const toolTokens = effectiveTools?.length ? await this.endpoint.acquireTokenizer().countToolTokens(effectiveTools) : 0;
407+
const toolTokens = tools?.length ? await this.endpoint.acquireTokenizer().countToolTokens(tools) : 0;
417408

418409
const summarizeThresholdOverride = this.configurationService.getConfig<number | undefined>(ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold);
419410
if (typeof summarizeThresholdOverride === 'number' && summarizeThresholdOverride < 100 && summarizeThresholdOverride > 0) {
@@ -441,7 +432,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
441432
const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : messageBudget;
442433
const endpoint = toolTokens > 0 ? this.endpoint.cloneWithTokenOverride(safeBudget) : this.endpoint;
443434

444-
this.logService.debug(`AgentIntent: rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}${toolSearchEnabled ? `, totalTools: ${tools?.length ?? 0}, nonDeferredTools: ${effectiveTools?.length ?? 0}` : ''}), summarizationEnabled=${summarizationEnabled}`);
435+
this.logService.debug(`AgentIntent: rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`);
445436
let result: RenderPromptResult;
446437
const props: AgentPromptProps = {
447438
endpoint,
@@ -595,7 +586,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
595586
const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
596587
...renderProps,
597588
endpoint: this.endpoint,
598-
promptContext: this._buildSummarizationPromptContext(renderProps.promptContext),
589+
promptContext: renderProps.promptContext,
599590
triggerSummarize: true,
600591
});
601592
return await renderer.render(progress, token);
@@ -868,7 +859,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
868859
const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
869860
...snapshotProps,
870861
endpoint: this.endpoint,
871-
promptContext: this._buildSummarizationPromptContext(snapshotProps.promptContext),
862+
promptContext: snapshotProps.promptContext,
872863
triggerSummarize: true,
873864
summarizationSource: 'background',
874865
});
@@ -978,30 +969,6 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
978969
));
979970
}
980971

981-
/**
982-
* Build a promptContext for summarization that filters availableTools to
983-
* non-deferred tools when Anthropic tool search is enabled. Deferred tool
984-
* schemas are unnecessary in the summarization prompt (which uses
985-
* tool_choice: 'none') and can push the prompt over the token budget.
986-
*/
987-
private _buildSummarizationPromptContext(promptContext: IBuildPromptContext): IBuildPromptContext {
988-
if (!promptContext.tools?.availableTools) {
989-
return promptContext;
990-
}
991-
const toolSearchEnabled = isAnthropicToolSearchEnabled(this.endpoint, this.configurationService);
992-
if (!toolSearchEnabled) {
993-
return promptContext;
994-
}
995-
const nonDeferredTools = promptContext.tools.availableTools.filter(t => this.toolDeferralService.isNonDeferredTool(t.name));
996-
return {
997-
...promptContext,
998-
tools: {
999-
...promptContext.tools,
1000-
availableTools: nonDeferredTools,
1001-
},
1002-
};
1003-
}
1004-
1005972
/**
1006973
* Record a background compaction failure on the current turn's metadata,
1007974
* matching how foreground compaction records its failures.

src/extension/intents/node/askAgentIntent.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import { IEnvService } from '../../../platform/env/common/envService';
1212
import { ILogService } from '../../../platform/log/common/logService';
1313
import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService';
1414
import { IChatEndpoint } from '../../../platform/networking/common/networking';
15-
import { IToolDeferralService } from '../../../platform/networking/common/toolDeferralService';
1615
import { INotebookService } from '../../../platform/notebook/common/notebookService';
1716
import { IOTelService } from '../../../platform/otel/common/otelService';
1817
import { IPromptPathRepresentationService } from '../../../platform/prompts/common/promptPathRepresentationService';
@@ -129,9 +128,8 @@ export class AskAgentIntentInvocation extends AgentIntentInvocation {
129128
@IExperimentationService expService: IExperimentationService,
130129
@IAutomodeService automodeService: IAutomodeService,
131130
@IOTelService otelService: IOTelService,
132-
@IToolDeferralService toolDeferralService: IToolDeferralService,
133131
) {
134-
super(intent, location, endpoint, request, { processCodeblocks: true }, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, logService, expService, automodeService, otelService, toolDeferralService);
132+
super(intent, location, endpoint, request, { processCodeblocks: true }, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, logService, expService, automodeService, otelService);
135133
}
136134

137135
public override async getAvailableTools(): Promise<vscode.LanguageModelToolInformation[]> {

src/extension/intents/node/editCodeIntent2.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import { IEnvService } from '../../../platform/env/common/envService';
1313
import { ILogService } from '../../../platform/log/common/logService';
1414
import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService';
1515
import { IChatEndpoint } from '../../../platform/networking/common/networking';
16-
import { IToolDeferralService } from '../../../platform/networking/common/toolDeferralService';
1716
import { requestHasNotebookRefs } from '../../../platform/notebook/common/helpers';
1817
import { INotebookService } from '../../../platform/notebook/common/notebookService';
1918
import { IOTelService } from '../../../platform/otel/common/otelService';
@@ -90,9 +89,8 @@ export class EditCode2IntentInvocation extends AgentIntentInvocation {
9089
@IExperimentationService expService: IExperimentationService,
9190
@IAutomodeService automodeService: IAutomodeService,
9291
@IOTelService otelService: IOTelService,
93-
@IToolDeferralService toolDeferralService: IToolDeferralService,
9492
) {
95-
super(intent, location, endpoint, request, intentOptions, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, logService, expService, automodeService, otelService, toolDeferralService);
93+
super(intent, location, endpoint, request, intentOptions, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, logService, expService, automodeService, otelService);
9694
}
9795

9896
public override async getAvailableTools(): Promise<vscode.LanguageModelToolInformation[]> {

src/extension/intents/node/notebookEditorIntent.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import { IEnvService } from '../../../platform/env/common/envService';
1212
import { ILogService } from '../../../platform/log/common/logService';
1313
import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService';
1414
import { IChatEndpoint } from '../../../platform/networking/common/networking';
15-
import { IToolDeferralService } from '../../../platform/networking/common/toolDeferralService';
1615
import { IAlternativeNotebookContentService } from '../../../platform/notebook/common/alternativeContent';
1716
import { getCellId } from '../../../platform/notebook/common/helpers';
1817
import { INotebookService } from '../../../platform/notebook/common/notebookService';
@@ -108,9 +107,8 @@ export class NotebookEditorIntentInvocation extends EditCode2IntentInvocation {
108107
@IExperimentationService expService: IExperimentationService,
109108
@IAutomodeService automodeService: IAutomodeService,
110109
@IOTelService otelService: IOTelService,
111-
@IToolDeferralService toolDeferralService: IToolDeferralService,
112110
) {
113-
super(intent, location, endpoint, request, intentOptions, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, logService, expService, automodeService, otelService, toolDeferralService);
111+
super(intent, location, endpoint, request, intentOptions, instantiationService, codeMapperService, envService, promptPathRepresentationService, endpointProvider, workspaceService, toolsService, configurationService, editLogService, commandService, telemetryService, notebookService, logService, expService, automodeService, otelService);
114112
}
115113

116114
protected override prompt = NotebookInlinePrompt;

src/extension/prompts/node/agent/summarizedConversationHistory.tsx

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ export class ConversationHistorySummarizationPrompt extends PromptElement<Conver
170170
</SystemMessage>
171171
{history}
172172
{this.props.workingNotebook && <WorkingNotebookSummary priority={this.props.priority - 2} notebook={this.props.workingNotebook} />}
173-
<UserMessage>
173+
<UserMessage priority={this.props.priority}>
174174
Summarize the conversation history so far, paying special attention to the most recent agent commands and tool results that triggered this summarization. Structure your summary using the enhanced format provided in the system message.<br />
175175
{isOpus && <>
176176
<br />
@@ -664,7 +664,18 @@ class ConversationHistorySummarizer {
664664

665665
private async getSummary(mode: SummaryMode, propsInfo: ISummarizedConversationHistoryInfo): Promise<SummarizationResult> {
666666
const stopwatch = new StopWatch(false);
667-
const endpoint = this.props.endpoint;
667+
668+
// In Full mode, tools are sent alongside the summarization prompt with
669+
// tool_choice: 'none'. Reserve budget for them so the rendered messages
670+
// plus tools don't exceed the model's context window.
671+
const tools = this.props.tools;
672+
const toolTokens = mode === SummaryMode.Full && tools?.length
673+
? await this.props.endpoint.acquireTokenizer().countToolTokens(tools)
674+
: 0;
675+
const endpoint = toolTokens > 0
676+
? this.props.endpoint.cloneWithTokenOverride(
677+
Math.max(1, Math.floor((this.props.endpoint.modelMaxPromptTokens - toolTokens) * 0.9)))
678+
: this.props.endpoint;
668679

669680
let summarizationPrompt: ChatMessage[];
670681
const associatedRequestId = this.props.promptContext.conversation?.getLatestTurn().id;

src/extension/prompts/node/agent/test/summarization.spec.tsx

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,53 @@ suite('Agent Summarization', () => {
440440
}
441441
});
442442

443+
test('simple mode summarization with small token budget renders zero messages (repro for No messages provided)', async () => {
444+
// Repro for: "Prompt failed validation with the reason: No messages provided"
445+
//
446+
// Root cause: when modelMaxPromptTokens is small enough that the summarization
447+
// prompt content exceeds the budget, prompt-tsx prunes all child elements.
448+
// After pruning, toChatMessages() silently skips messages whose content is
449+
// empty (isEmpty check), producing an empty messages array — without throwing
450+
// BudgetExceededError. The downstream makeChatRequest2 then hits the
451+
// isValidChatPayload check: "No messages provided".
452+
const instaService = accessor.get(IInstantiationService);
453+
const endpoint = instaService.createInstance(MockEndpoint, 'claude-sonnet');
454+
endpoint.modelMaxPromptTokens = 5; // So small that even a single short message cannot fit
455+
456+
const toolCallRounds = [
457+
new ToolCallRound('ok', [createEditFileToolCall(1)]),
458+
new ToolCallRound('ok 2', [createEditFileToolCall(2)]),
459+
];
460+
461+
const turn = new Turn('turnId', { type: 'user', message: 'hello' });
462+
const testConversation = new Conversation('sessionId', [turn]);
463+
464+
const promptContext: IBuildPromptContext = {
465+
chatVariables: new ChatVariablesCollection([]),
466+
history: [],
467+
query: 'edit this file',
468+
toolCallRounds,
469+
toolCallResults: createEditFileToolResult(1, 2),
470+
tools,
471+
conversation: testConversation,
472+
};
473+
474+
const baseProps = {
475+
priority: 1,
476+
endpoint,
477+
location: ChatLocation.Panel,
478+
promptContext,
479+
maxToolResultLength: Infinity,
480+
};
481+
482+
const propsInfo = instaService.createInstance(SummarizedConversationHistoryPropsBuilder).getProps(baseProps);
483+
const renderer = PromptRenderer.create(instaService, endpoint, ConversationHistorySummarizationPrompt, { ...propsInfo.props, simpleMode: true });
484+
const result = await renderer.render();
485+
486+
// prompt-tsx prunes all content and silently drops empty messages → 0 messages
487+
expect(result.messages.length).toBe(0);
488+
});
489+
443490
test('failure metadata on turn prevents repeated foreground summarization attempts', async () => {
444491
// This test verifies the contract that agentIntent.ts relies on:
445492
// after a foreground summarization failure, setting SummarizedConversationHistoryMetadata

0 commit comments

Comments
 (0)