Skip to content

Commit 1dcef8c

Browse files
LaZzyManclaude
andcommitted
feat(core)!: redesign auto-compaction thresholds with three-tier ladder
Replaces the single 70% proportional threshold with a three-tier ladder (warn/auto/hard) that combines proportional fallback with absolute reservation. Large-window models (>=128K) now reserve ~33K instead of 30% of the window, freeing tens of thousands of context tokens that the old formula wasted. Other improvements bundled in the same redesign: - Compression sideQuery now disables thinking and caps maxOutputTokens at 20K, matching claude-code so the buffer math is predictable across providers (Anthropic/OpenAI/Gemini handle thinking budgets inconsistently) - Failure handling upgraded from one-shot permanent lock to a 3-strike circuit breaker; reactive overflow still latches immediately - New estimatePromptTokens helper closes the lag-by-one-turn and first-send-is-0 gaps in lastPromptTokenCount - Hard-tier rescue pulls reactive overflow recovery forward to before the API call, saving an oversized round-trip - /context command displays the three-tier ladder + current tier - tipRegistry's context-* tips track the new thresholds instead of fixed 50/80/95 percentages BREAKING CHANGE: chatCompression.contextPercentageThreshold setting is removed. Settings files containing the field log a one-line deprecation warning at startup and the value is ignored; behaviour is now controlled by built-in thresholds via the new computeThresholds() function. Design: docs/design/auto-compaction-threshold-redesign.md Plan: docs/plans/2026-05-14-auto-compaction-threshold-redesign.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 796de4d commit 1dcef8c

20 files changed

Lines changed: 3689 additions & 210 deletions

docs/design/auto-compaction-threshold-redesign.md

Lines changed: 418 additions & 0 deletions
Large diffs are not rendered by default.

docs/plans/2026-05-14-auto-compaction-threshold-redesign.md

Lines changed: 1752 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/**
2+
* @license
3+
* Copyright 2025 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import { describe, it, expect } from 'vitest';
8+
import { tipRegistry, type TipContext } from './tipRegistry.js';
9+
10+
const baseCtx: TipContext = {
11+
lastPromptTokenCount: 0,
12+
contextWindowSize: 200_000,
13+
sessionPromptCount: 10,
14+
sessionCount: 1,
15+
platform: 'darwin',
16+
thresholds: {
17+
warn: 147_000,
18+
auto: 167_000,
19+
hard: 177_000,
20+
effectiveWindow: 180_000,
21+
},
22+
};
23+
24+
function tipById(id: string) {
25+
return tipRegistry.find((t) => t.id === id)!;
26+
}
27+
28+
describe('context-* tip thresholds align with computeThresholds', () => {
29+
it('compress-intro fires between warn and auto', () => {
30+
const t = tipById('compress-intro');
31+
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 100_000 })).toBe(
32+
false,
33+
);
34+
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 150_000 })).toBe(
35+
true,
36+
);
37+
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 168_000 })).toBe(
38+
false,
39+
);
40+
});
41+
42+
it('context-high fires between auto and hard', () => {
43+
const t = tipById('context-high');
44+
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 150_000 })).toBe(
45+
false,
46+
);
47+
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 170_000 })).toBe(
48+
true,
49+
);
50+
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 178_000 })).toBe(
51+
false,
52+
);
53+
});
54+
55+
it('context-critical fires at or above hard', () => {
56+
const t = tipById('context-critical');
57+
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 170_000 })).toBe(
58+
false,
59+
);
60+
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 178_000 })).toBe(
61+
true,
62+
);
63+
});
64+
65+
it('falls back gracefully when thresholds undefined (legacy callers)', () => {
66+
const ctx = { ...baseCtx, thresholds: undefined };
67+
// All three context-* tips return false when thresholds are missing
68+
// (the comparison would be unsafe without them).
69+
expect(tipById('compress-intro').isRelevant(ctx)).toBe(false);
70+
expect(tipById('context-high').isRelevant(ctx)).toBe(false);
71+
expect(tipById('context-critical').isRelevant(ctx)).toBe(false);
72+
});
73+
74+
it('compress-intro additionally gates on sessionPromptCount > 5', () => {
75+
const t = tipById('compress-intro');
76+
// Above warn, below auto, but session is too new.
77+
expect(
78+
t.isRelevant({
79+
...baseCtx,
80+
lastPromptTokenCount: 150_000,
81+
sessionPromptCount: 3,
82+
}),
83+
).toBe(false);
84+
expect(
85+
t.isRelevant({
86+
...baseCtx,
87+
lastPromptTokenCount: 150_000,
88+
sessionPromptCount: 6,
89+
}),
90+
).toBe(true);
91+
});
92+
});

packages/cli/src/services/tips/tipRegistry.ts

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
* Contextual tip registry — defines tips, their conditions, and display rules.
99
*/
1010

11-
import { DEFAULT_TOKEN_LIMIT } from '@qwen-code/qwen-code-core';
11+
import {
12+
DEFAULT_TOKEN_LIMIT,
13+
type CompactionThresholds,
14+
} from '@qwen-code/qwen-code-core';
1215

1316
export type TipTrigger = 'startup' | 'post-response';
1417

@@ -18,6 +21,12 @@ export interface TipContext {
1821
sessionPromptCount: number;
1922
sessionCount: number;
2023
platform: string;
24+
/**
25+
* Three-tier auto-compaction thresholds, computed by callers via
26+
* `computeThresholds(contextWindowSize)`. Optional for backward compat;
27+
* context-* tip checks return false when missing.
28+
*/
29+
thresholds?: CompactionThresholds;
2130
}
2231

2332
export interface ContextualTip {
@@ -39,31 +48,34 @@ export const tipRegistry: ContextualTip[] = [
3948
{
4049
id: 'context-critical',
4150
content:
42-
'Context is almost full! Run /compress now or start /new to continue.',
51+
'Context near hard limit — auto-compact will force on next send. Consider /clear if you want to start fresh.',
4352
trigger: 'post-response',
44-
isRelevant: (ctx) => getContextUsagePercent(ctx) >= 95,
53+
isRelevant: (ctx) =>
54+
ctx.thresholds !== undefined &&
55+
ctx.lastPromptTokenCount >= ctx.thresholds.hard,
4556
cooldownPrompts: 3,
4657
priority: 100,
4758
},
4859
{
4960
id: 'context-high',
5061
content: 'Context is getting full. Use /compress to free up space.',
5162
trigger: 'post-response',
52-
isRelevant: (ctx) => {
53-
const pct = getContextUsagePercent(ctx);
54-
return pct >= 80 && pct < 95;
55-
},
63+
isRelevant: (ctx) =>
64+
ctx.thresholds !== undefined &&
65+
ctx.lastPromptTokenCount >= ctx.thresholds.auto &&
66+
ctx.lastPromptTokenCount < ctx.thresholds.hard,
5667
cooldownPrompts: 5,
5768
priority: 90,
5869
},
5970
{
6071
id: 'compress-intro',
6172
content: 'Long conversation? /compress summarizes history to free context.',
6273
trigger: 'post-response',
63-
isRelevant: (ctx) => {
64-
const pct = getContextUsagePercent(ctx);
65-
return pct >= 50 && pct < 80 && ctx.sessionPromptCount > 5;
66-
},
74+
isRelevant: (ctx) =>
75+
ctx.thresholds !== undefined &&
76+
ctx.lastPromptTokenCount >= ctx.thresholds.warn &&
77+
ctx.lastPromptTokenCount < ctx.thresholds.auto &&
78+
ctx.sessionPromptCount > 5,
6779
cooldownPrompts: 10,
6880
priority: 50,
6981
},

packages/cli/src/ui/commands/contextCommand.test.ts

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,59 @@
66

77
import { describe, it, expect, vi, beforeEach } from 'vitest';
88
import type { Config } from '@qwen-code/qwen-code-core';
9-
import { collectContextData } from './contextCommand.js';
9+
import {
10+
collectContextData,
11+
formatContextUsageText,
12+
} from './contextCommand.js';
1013

1114
// uiTelemetryService is consumed inside collectContextData via the
1215
// re-export from core; mock it here so the function returns deterministic
13-
// numbers without needing a real session.
16+
// numbers without needing a real session. The mock fns live inside
17+
// vi.hoisted so they are available when vi.mock's factory runs (vi.mock
18+
// is hoisted above module-level const declarations).
19+
const { mockGetLastPromptTokenCount, mockGetLastCachedContentTokenCount } =
20+
vi.hoisted(() => ({
21+
mockGetLastPromptTokenCount: vi.fn().mockReturnValue(0),
22+
mockGetLastCachedContentTokenCount: vi.fn().mockReturnValue(0),
23+
}));
24+
1425
vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => {
1526
const original =
1627
await importOriginal<typeof import('@qwen-code/qwen-code-core')>();
1728
return {
1829
...original,
1930
uiTelemetryService: {
20-
getLastPromptTokenCount: vi.fn().mockReturnValue(0),
21-
getLastCachedContentTokenCount: vi.fn().mockReturnValue(0),
31+
getLastPromptTokenCount: mockGetLastPromptTokenCount,
32+
getLastCachedContentTokenCount: mockGetLastCachedContentTokenCount,
2233
},
2334
};
2435
});
2536

37+
function makeMockConfig(contextWindowSize = 32_000): Config {
38+
return {
39+
getModel: vi.fn().mockReturnValue('test-model'),
40+
getContentGeneratorConfig: vi.fn().mockReturnValue({
41+
contextWindowSize,
42+
}),
43+
getToolRegistry: vi.fn().mockReturnValue({
44+
getAllTools: vi.fn().mockReturnValue([]),
45+
getFunctionDeclarations: vi.fn().mockReturnValue([]),
46+
}),
47+
getUserMemory: vi.fn().mockReturnValue(''),
48+
getSkillManager: vi.fn().mockReturnValue({
49+
listSkills: vi.fn().mockResolvedValue([]),
50+
}),
51+
getChatCompression: vi.fn().mockReturnValue(undefined),
52+
} as unknown as Config;
53+
}
54+
2655
describe('collectContextData (contextCommand)', () => {
2756
let getFunctionDeclarationsSpy: ReturnType<typeof vi.fn>;
2857
let mockConfig: Config;
2958

3059
beforeEach(() => {
60+
mockGetLastPromptTokenCount.mockReturnValue(0);
61+
mockGetLastCachedContentTokenCount.mockReturnValue(0);
3162
getFunctionDeclarationsSpy = vi.fn().mockReturnValue([]);
3263
mockConfig = {
3364
getModel: vi.fn().mockReturnValue('test-model'),
@@ -62,3 +93,72 @@ describe('collectContextData (contextCommand)', () => {
6293
});
6394
});
6495
});
96+
97+
describe('/context shows three-tier thresholds', () => {
98+
beforeEach(() => {
99+
mockGetLastPromptTokenCount.mockReturnValue(0);
100+
mockGetLastCachedContentTokenCount.mockReturnValue(0);
101+
});
102+
103+
it('renders warn/auto/hard with the warn-tier marker when usage sits between warn and auto', async () => {
104+
// 200K window. computeThresholds(200K) = {
105+
// warn: 147,000, auto: 167,000, hard: 177,000, effectiveWindow: 180,000
106+
// }
107+
// lastPromptTokenCount = 150K → between warn and auto → tier = warn.
108+
mockGetLastPromptTokenCount.mockReturnValue(150_000);
109+
const data = await collectContextData(makeMockConfig(200_000), false);
110+
const text = formatContextUsageText(data);
111+
112+
expect(text).toMatch(/Effective window:\s+180,000/);
113+
expect(text).toMatch(/Warn threshold:\s+147,000/);
114+
expect(text).toMatch(/Auto threshold:\s+167,000/);
115+
expect(text).toMatch(/Hard threshold:\s+177,000/);
116+
expect(text).toMatch(/Current tier:\s+warn/);
117+
expect(data.breakdown.currentTier).toBe('warn');
118+
expect(data.breakdown.thresholds).toEqual({
119+
effectiveWindow: 180_000,
120+
warn: 147_000,
121+
auto: 167_000,
122+
hard: 177_000,
123+
});
124+
});
125+
126+
it('classifies usage below the warn threshold as the safe tier', async () => {
127+
mockGetLastPromptTokenCount.mockReturnValue(50_000);
128+
const data = await collectContextData(makeMockConfig(200_000), false);
129+
const text = formatContextUsageText(data);
130+
131+
expect(text).toMatch(/Current tier:\s+safe/);
132+
expect(data.breakdown.currentTier).toBe('safe');
133+
});
134+
135+
it('classifies usage at or above the hard threshold as the hard tier', async () => {
136+
mockGetLastPromptTokenCount.mockReturnValue(180_000);
137+
const data = await collectContextData(makeMockConfig(200_000), false);
138+
expect(data.breakdown.currentTier).toBe('hard');
139+
});
140+
141+
it('classifies usage between auto and hard as the auto tier', async () => {
142+
// 200K window — between 167K (auto) and 177K (hard) → tier = auto.
143+
mockGetLastPromptTokenCount.mockReturnValue(170_000);
144+
const data = await collectContextData(makeMockConfig(200_000), false);
145+
expect(data.breakdown.currentTier).toBe('auto');
146+
const text = formatContextUsageText(data);
147+
expect(text).toMatch(/Current tier:\s+auto/);
148+
});
149+
150+
it('treats no-API-data sessions as safe and omits the threshold section from text', async () => {
151+
// lastPromptTokenCount = 0 → collectContextData uses the estimated branch:
152+
// currentTier should be `safe` regardless of overhead size, and
153+
// formatContextUsageText must NOT emit the "Compaction thresholds" section
154+
// because the estimated path renders a different layout.
155+
mockGetLastPromptTokenCount.mockReturnValue(0);
156+
const data = await collectContextData(makeMockConfig(200_000), false);
157+
expect(data.breakdown.currentTier).toBe('safe');
158+
// Thresholds are still computed and exposed on the breakdown for downstream
159+
// consumers, even though the text layout suppresses them.
160+
expect(data.breakdown.thresholds.auto).toBe(167_000);
161+
const text = formatContextUsageText(data);
162+
expect(text).not.toMatch(/Compaction thresholds/);
163+
});
164+
});

0 commit comments

Comments
 (0)