Skip to content

Commit 2d9c19d

Browse files
scidominogemini-cli-robot
authored andcommitted
Change default compression threshold (#12306)
# Conflicts: # packages/core/src/services/chatCompressionService.ts
1 parent 5213d9f commit 2d9c19d

1 file changed

Lines changed: 218 additions & 0 deletions

File tree

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
/**
2+
* @license
3+
* Copyright 2025 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import type { Content } from '@google/genai';
8+
import type { Config } from '../config/config.js';
9+
import type { GeminiChat } from '../core/geminiChat.js';
10+
import { type ChatCompressionInfo, CompressionStatus } from '../core/turn.js';
11+
import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
12+
import { tokenLimit } from '../core/tokenLimits.js';
13+
import { getCompressionPrompt } from '../core/prompts.js';
14+
import { getResponseText } from '../utils/partUtils.js';
15+
import { logChatCompression } from '../telemetry/loggers.js';
16+
import { makeChatCompressionEvent } from '../telemetry/types.js';
17+
import { getInitialChatHistory } from '../utils/environmentContext.js';
18+
19+
/**
20+
* Default threshold for compression token count as a fraction of the model's
21+
* token limit. If the chat history exceeds this threshold, it will be compressed.
22+
*/
23+
export const DEFAULT_COMPRESSION_TOKEN_THRESHOLD = 0.2;
24+
25+
/**
26+
* The fraction of the latest chat history to keep. A value of 0.3
27+
* means that only the last 30% of the chat history will be kept after compression.
28+
*/
29+
export const COMPRESSION_PRESERVE_THRESHOLD = 0.3;
30+
31+
/**
32+
* Returns the index of the oldest item to keep when compressing. May return
33+
* contents.length which indicates that everything should be compressed.
34+
*
35+
* Exported for testing purposes.
36+
*/
37+
export function findCompressSplitPoint(
38+
contents: Content[],
39+
fraction: number,
40+
): number {
41+
if (fraction <= 0 || fraction >= 1) {
42+
throw new Error('Fraction must be between 0 and 1');
43+
}
44+
45+
const charCounts = contents.map((content) => JSON.stringify(content).length);
46+
const totalCharCount = charCounts.reduce((a, b) => a + b, 0);
47+
const targetCharCount = totalCharCount * fraction;
48+
49+
let lastSplitPoint = 0; // 0 is always valid (compress nothing)
50+
let cumulativeCharCount = 0;
51+
for (let i = 0; i < contents.length; i++) {
52+
const content = contents[i];
53+
if (
54+
content.role === 'user' &&
55+
!content.parts?.some((part) => !!part.functionResponse)
56+
) {
57+
if (cumulativeCharCount >= targetCharCount) {
58+
return i;
59+
}
60+
lastSplitPoint = i;
61+
}
62+
cumulativeCharCount += charCounts[i];
63+
}
64+
65+
// We found no split points after targetCharCount.
66+
// Check if it's safe to compress everything.
67+
const lastContent = contents[contents.length - 1];
68+
if (
69+
lastContent?.role === 'model' &&
70+
!lastContent?.parts?.some((part) => part.functionCall)
71+
) {
72+
return contents.length;
73+
}
74+
75+
// Can't compress everything so just compress at last splitpoint.
76+
return lastSplitPoint;
77+
}
78+
79+
export class ChatCompressionService {
80+
async compress(
81+
chat: GeminiChat,
82+
promptId: string,
83+
force: boolean,
84+
model: string,
85+
config: Config,
86+
hasFailedCompressionAttempt: boolean,
87+
): Promise<{ newHistory: Content[] | null; info: ChatCompressionInfo }> {
88+
const curatedHistory = chat.getHistory(true);
89+
90+
// Regardless of `force`, don't do anything if the history is empty.
91+
if (
92+
curatedHistory.length === 0 ||
93+
(hasFailedCompressionAttempt && !force)
94+
) {
95+
return {
96+
newHistory: null,
97+
info: {
98+
originalTokenCount: 0,
99+
newTokenCount: 0,
100+
compressionStatus: CompressionStatus.NOOP,
101+
},
102+
};
103+
}
104+
105+
const originalTokenCount = uiTelemetryService.getLastPromptTokenCount();
106+
107+
// Don't compress if not forced and we are under the limit.
108+
if (!force) {
109+
const threshold =
110+
config.getChatCompression()?.contextPercentageThreshold ??
111+
DEFAULT_COMPRESSION_TOKEN_THRESHOLD;
112+
if (originalTokenCount < threshold * tokenLimit(model)) {
113+
return {
114+
newHistory: null,
115+
info: {
116+
originalTokenCount,
117+
newTokenCount: originalTokenCount,
118+
compressionStatus: CompressionStatus.NOOP,
119+
},
120+
};
121+
}
122+
}
123+
124+
const splitPoint = findCompressSplitPoint(
125+
curatedHistory,
126+
1 - COMPRESSION_PRESERVE_THRESHOLD,
127+
);
128+
129+
const historyToCompress = curatedHistory.slice(0, splitPoint);
130+
const historyToKeep = curatedHistory.slice(splitPoint);
131+
132+
if (historyToCompress.length === 0) {
133+
return {
134+
newHistory: null,
135+
info: {
136+
originalTokenCount,
137+
newTokenCount: originalTokenCount,
138+
compressionStatus: CompressionStatus.NOOP,
139+
},
140+
};
141+
}
142+
143+
const summaryResponse = await config.getContentGenerator().generateContent(
144+
{
145+
model,
146+
contents: [
147+
...historyToCompress,
148+
{
149+
role: 'user',
150+
parts: [
151+
{
152+
text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
153+
},
154+
],
155+
},
156+
],
157+
config: {
158+
systemInstruction: { text: getCompressionPrompt() },
159+
},
160+
},
161+
promptId,
162+
);
163+
const summary = getResponseText(summaryResponse) ?? '';
164+
165+
const extraHistory: Content[] = [
166+
{
167+
role: 'user',
168+
parts: [{ text: summary }],
169+
},
170+
{
171+
role: 'model',
172+
parts: [{ text: 'Got it. Thanks for the additional context!' }],
173+
},
174+
...historyToKeep,
175+
];
176+
177+
// Use a shared utility to construct the initial history for an accurate token count.
178+
const fullNewHistory = await getInitialChatHistory(config, extraHistory);
179+
180+
// Estimate token count 1 token ≈ 4 characters
181+
const newTokenCount = Math.floor(
182+
fullNewHistory.reduce(
183+
(total, content) => total + JSON.stringify(content).length,
184+
0,
185+
) / 4,
186+
);
187+
188+
logChatCompression(
189+
config,
190+
makeChatCompressionEvent({
191+
tokens_before: originalTokenCount,
192+
tokens_after: newTokenCount,
193+
}),
194+
);
195+
196+
if (newTokenCount > originalTokenCount) {
197+
return {
198+
newHistory: null,
199+
info: {
200+
originalTokenCount,
201+
newTokenCount,
202+
compressionStatus:
203+
CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
204+
},
205+
};
206+
} else {
207+
uiTelemetryService.setLastPromptTokenCount(newTokenCount);
208+
return {
209+
newHistory: extraHistory,
210+
info: {
211+
originalTokenCount,
212+
newTokenCount,
213+
compressionStatus: CompressionStatus.COMPRESSED,
214+
},
215+
};
216+
}
217+
}
218+
}

0 commit comments

Comments
 (0)