Skip to content

Commit 533b876

Browse files
authored
Fix premature compaction: decouple ratio from tool tokens, raise thresholds (#4812)
* Fix premature compaction: decouple ratio from tool tokens, raise thresholds - Compute compaction ratios against baseBudget instead of budgetThreshold so users with many tools aren't penalized with earlier compaction - Reduce safety multiplier from 0.85 to 0.90 (10% covers observed tokenizer discrepancy without wasting 15% of context) - Raise background compaction kick-off from 75% to 80% Fixes microsoft/vscode#304475 * Address PR feedback: clamp messageBudget, clarify toolTokens guard comment
1 parent d105bd2 commit 533b876

1 file changed

Lines changed: 15 additions & 10 deletions

File tree

src/extension/intents/node/agentIntent.ts

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,6 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
395395
throw new Error(`Setting github.copilot.${ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold.id} is too low`);
396396
}
397397

398-
// Reserve extra space when tools are involved due to token counting issues
399398
const baseBudget = Math.min(
400399
this.configurationService.getConfig<number | undefined>(ConfigKey.Advanced.SummarizeAgentConversationHistoryThreshold) ?? this.endpoint.modelMaxPromptTokens,
401400
this.endpoint.modelMaxPromptTokens
@@ -405,8 +404,14 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
405404
const summarizationEnabled = this.configurationService.getConfig(ConfigKey.SummarizeAgentConversationHistory) && this.prompt === AgentPrompt && !responsesCompactionContextManagementEnabled;
406405
const backgroundCompactionEnabled = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.BackgroundCompaction, this.expService);
407406

408-
const budgetThreshold = Math.floor((baseBudget - toolTokens) * 0.85);
409-
const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : budgetThreshold;
407+
// When tools are present, apply a 10% safety margin on the message portion
408+
// to account for tokenizer discrepancies between our tool-token counter and
409+
// the model's actual tokenizer. Without this, an undercount could cause an
410+
// API-level context_length_exceeded error instead of a graceful
411+
// BudgetExceededError from prompt-tsx. When there are no tools the endpoint's
412+
// own modelMaxPromptTokens is used unchanged.
413+
const messageBudget = Math.max(1, Math.floor((baseBudget - toolTokens) * 0.9));
414+
const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : messageBudget;
410415
const endpoint = toolTokens > 0 ? this.endpoint.cloneWithTokenOverride(safeBudget) : this.endpoint;
411416

412417
this.logService.debug(`AgentIntent: rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}), summarizationEnabled=${summarizationEnabled}`);
@@ -436,12 +441,12 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
436441
// ≥ 95% + InProgress → block on the background compaction
437442
// completing, then apply before rendering.
438443
//
439-
// ≥ 75% + Idle (post-render) → kick off background compaction so
444+
// ≥ 80% + Idle (post-render) → kick off background compaction so
440445
// it is ready for a future iteration.
441446
//
442447
const backgroundSummarizer = backgroundCompactionEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined;
443-
const contextRatio = backgroundSummarizer && budgetThreshold > 0
444-
? this._lastRenderTokenCount / budgetThreshold
448+
const contextRatio = backgroundSummarizer && baseBudget > 0
449+
? (this._lastRenderTokenCount + toolTokens) / baseBudget
445450
: 0;
446451

447452
// Track whether we applied a summary in this iteration so we don't
@@ -625,8 +630,8 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
625630

626631
// 3. Post-render background compaction checks.
627632
if (backgroundCompactionEnabled && backgroundSummarizer && !summaryAppliedThisIteration) {
628-
const postRenderRatio = budgetThreshold > 0
629-
? result.tokenCount / budgetThreshold
633+
const postRenderRatio = baseBudget > 0
634+
? (result.tokenCount + toolTokens) / baseBudget
630635
: 0;
631636

632637
if (postRenderRatio >= 0.95 && backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
@@ -654,8 +659,8 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
654659
this.logService.debug(`[Agent] post-render background compaction finished but produced no usable result`);
655660
this._sendBackgroundCompactionTelemetry('postRenderBlocked', 'noResult', postRenderRatio, promptContext);
656661
}
657-
} else if (postRenderRatio >= 0.75 && (backgroundSummarizer.state === BackgroundSummarizationState.Idle || backgroundSummarizer.state === BackgroundSummarizationState.Failed)) {
658-
// At ≥ 75% with no running compaction (or a previous failure) — kick off background work.
662+
} else if (postRenderRatio >= 0.80 && (backgroundSummarizer.state === BackgroundSummarizationState.Idle || backgroundSummarizer.state === BackgroundSummarizationState.Failed)) {
663+
// At ≥ 80% with no running compaction (or a previous failure) — kick off background work.
659664
this._startBackgroundSummarization(backgroundSummarizer, props, endpoint, token, postRenderRatio);
660665
}
661666
}

0 commit comments

Comments
 (0)