Skip to content

Commit e74be4f

Browse files
committed
fix(core): sanitize prompt inputs, propagate AbortSignal, lower distillation cap
Address Gemini Code Assist review feedback on #24736: - Add sanitizePromptInput() to agentHistoryProvider, clusterSummarizer, and toolDistillationService to prevent prompt injection from untrusted message content and tool outputs - Thread abortSignal through distill() → performDistillation() → generateIntentSummary() using AbortSignal.any() with 15s timeout - Reduce MAX_DISTILLATION_SIZE from 1M to 64K chars - Fix broken import paths in chatCompressionService after rebase
1 parent ae04768 commit e74be4f

4 files changed

Lines changed: 47 additions & 15 deletions

File tree

packages/core/src/context/agentHistoryProvider.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ import {
2020
normalizeFunctionResponse,
2121
} from './truncation.js';
2222

23+
function sanitizePromptInput(value: unknown): string {
24+
return (JSON.stringify(value) ?? '')
25+
.replace(/\\[rn]/g, ' ')
26+
.replace(/[\r\n\u2028\u2029]+/g, ' ')
27+
.replace(/```/g, "'''")
28+
.replace(/[<>]/g, (char) => (char === '<' ? '&lt;' : '&gt;'))
29+
.replace(/[\x00-\x1f\x7f]/g, ''); // eslint-disable-line no-control-regex
30+
}
31+
2332
export class AgentHistoryProvider {
2433
// TODO(joshualitt): just pass the BaseLlmClient instead of the whole Config.
2534
constructor(
@@ -379,10 +388,10 @@ Distill these into a high-density Markdown block that orientates the agent on th
379388
- **Brevity:** Maximum 15 lines. No conversational preamble.
380389
381390
${hasPreviousSummary ? 'PREVIOUS SUMMARY AND TRUNCATED HISTORY:' : 'TRUNCATED HISTORY:'}
382-
${JSON.stringify(messagesToTruncate)}
391+
${sanitizePromptInput(messagesToTruncate)}
383392
384393
ACTIVE BRIDGE (LOOKAHEAD):
385-
${JSON.stringify(bridge)}`;
394+
${sanitizePromptInput(bridge)}`;
386395

387396
const summaryResponse = await this.config
388397
.getBaseLlmClient()

packages/core/src/context/chatCompressionService.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ import {
3333
PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL,
3434
} from '../config/models.js';
3535
import { PreCompressTrigger } from '../hooks/types.js';
36-
import { ContextWindow } from './contextWindow.js';
37-
import { TFIDFEmbedder } from './embeddingService.js';
38-
import { ClusterSummarizer } from './clusterSummarizer.js';
36+
import { ContextWindow } from '../services/contextWindow.js';
37+
import { TFIDFEmbedder } from '../services/embeddingService.js';
38+
import { ClusterSummarizer } from '../services/clusterSummarizer.js';
3939

4040
/**
4141
* Default threshold for compression token count as a fraction of the model's
@@ -162,12 +162,12 @@ async function truncateHistoryToBudget(
162162
} else if (responseObj && typeof responseObj === 'object') {
163163
if (
164164
'output' in responseObj &&
165-
typeof responseObj['output'] === 'string'
165+
typeof responseObj['output'] === 'string' // eslint-disable-line no-restricted-syntax
166166
) {
167167
contentStr = responseObj['output'];
168168
} else if (
169169
'content' in responseObj &&
170-
typeof responseObj['content'] === 'string'
170+
typeof responseObj['content'] === 'string' // eslint-disable-line no-restricted-syntax
171171
) {
172172
contentStr = responseObj['content'];
173173
} else {

packages/core/src/context/toolDistillationService.ts

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,18 @@ import {
2727
normalizeFunctionResponse,
2828
} from './truncation.js';
2929

30+
function sanitizePromptInput(value: string): string {
31+
return value
32+
.replace(/\\[rn]/g, ' ')
33+
.replace(/[\r\n\u2028\u2029]+/g, ' ')
34+
.replace(/```/g, "'''")
35+
.replace(/[<>]/g, (char) => (char === '<' ? '&lt;' : '&gt;'))
36+
.replace(/[\x00-\x1f\x7f]/g, ''); // eslint-disable-line no-control-regex
37+
}
38+
3039
// Skip structural map generation for outputs larger than this threshold (in characters)
3140
// as it consumes excessive tokens and may not be representative of the full content.
32-
const MAX_DISTILLATION_SIZE = 1_000_000;
41+
const MAX_DISTILLATION_SIZE = 64_000;
3342

3443
export interface DistilledToolOutput {
3544
truncatedContent: PartListUnion;
@@ -53,6 +62,7 @@ export class ToolOutputDistillationService {
5362
toolName: string,
5463
callId: string,
5564
content: PartListUnion,
65+
abortSignal?: AbortSignal,
5666
): Promise<DistilledToolOutput> {
5767
// Explicitly bypass escape hatches that natively handle large outputs
5868
if (this.isExemptFromDistillation(toolName)) {
@@ -74,6 +84,7 @@ export class ToolOutputDistillationService {
7484
content,
7585
originalContentLength,
7686
thresholdChars,
87+
abortSignal,
7788
);
7889
}
7990

@@ -119,6 +130,7 @@ export class ToolOutputDistillationService {
119130
content: PartListUnion,
120131
originalContentLength: number,
121132
threshold: number,
133+
abortSignal?: AbortSignal,
122134
): Promise<DistilledToolOutput> {
123135
const stringifiedContent = this.stringifyContent(content);
124136

@@ -145,6 +157,7 @@ export class ToolOutputDistillationService {
145157
toolName,
146158
stringifiedContent,
147159
Math.floor(MAX_DISTILLATION_SIZE),
160+
abortSignal,
148161
);
149162

150163
if (summary) {
@@ -254,10 +267,13 @@ export class ToolOutputDistillationService {
254267
toolName: string,
255268
stringifiedContent: string,
256269
maxPreviewLen: number,
270+
abortSignal?: AbortSignal,
257271
): Promise<string | undefined> {
258272
try {
259-
const controller = new AbortController();
260-
const timeoutId = setTimeout(() => controller.abort(), 15000); // 15s timeout
273+
const timeoutSignal = AbortSignal.timeout(15000);
274+
const summaryAbortSignal = abortSignal
275+
? AbortSignal.any([abortSignal, timeoutSignal])
276+
: timeoutSignal;
261277

262278
const promptText = `The following output from the tool '${toolName}' is large and has been truncated. Extract the most critical factual information from this output so the main agent doesn't lose context.
263279
@@ -269,17 +285,15 @@ Focus strictly on concrete data points:
269285
Do not philosophize about the strategic intent. Keep the extraction under 10 lines and use exact quotes where helpful.
270286
271287
Output to summarize:
272-
${stringifiedContent.slice(0, maxPreviewLen)}...`;
288+
${sanitizePromptInput(stringifiedContent.slice(0, maxPreviewLen))}...`;
273289

274290
const summaryResponse = await this.geminiClient.generateContent(
275291
{ model: 'agent-history-provider-summarizer' },
276292
[{ role: 'user', parts: [{ text: promptText }] }],
277-
controller.signal,
293+
summaryAbortSignal,
278294
LlmRole.UTILITY_COMPRESSOR,
279295
);
280296

281-
clearTimeout(timeoutId);
282-
283297
return summaryResponse.candidates?.[0]?.content?.parts?.[0]?.text;
284298
} catch (e) {
285299
// Fail gracefully, summarization is a progressive enhancement

packages/core/src/services/clusterSummarizer.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,15 @@ import { getResponseText } from '../utils/partUtils.js';
99
import { LlmRole } from '../telemetry/types.js';
1010
import type { Summarizer } from './contextWindow.js';
1111

12+
function sanitizePromptInput(value: string): string {
13+
return value
14+
.replace(/\\[rn]/g, ' ')
15+
.replace(/[\r\n\u2028\u2029]+/g, ' ')
16+
.replace(/```/g, "'''")
17+
.replace(/[<>]/g, (char) => (char === '<' ? '&lt;' : '&gt;'))
18+
.replace(/[\x00-\x1f\x7f]/g, ''); // eslint-disable-line no-control-regex
19+
}
20+
1221
/**
1322
* Cluster summarizer using BaseLlmClient for LLM-generated summaries.
1423
*
@@ -34,7 +43,7 @@ export class ClusterSummarizer implements Summarizer {
3443
role: 'user',
3544
parts: [
3645
{
37-
text: `Summarize the following conversation messages into a concise, information-dense paragraph. Preserve all specific technical details, file paths, tool results, variable names, and user constraints.\n\nMessages:\n${messages.map((m, i) => `[${i + 1}] ${m}`).join('\n\n')}`,
46+
text: `Summarize the following conversation messages into a concise, information-dense paragraph. Preserve all specific technical details, file paths, tool results, variable names, and user constraints.\n\nMessages:\n${messages.map((m, i) => `[${i + 1}] ${sanitizePromptInput(m)}`).join('\n')}`,
3847
},
3948
],
4049
},

0 commit comments

Comments
 (0)