Skip to content

Commit 6b361ff

Browse files
authored
🤖 fix: preserve assistant markdown whitespace (#3209)
Summary - Preserve assistant text separators when normalizing history for provider requests, preventing prior streamed Markdown from being re-sent as clobbered headings like `## VerdictThis`. Background - Streaming stores assistant text as separate deltas. The request sanitizer dropped whitespace-only text deltas before later coalescing, so a prior sequence like `## Verdict`, `\n\n`, `This...` could become `## VerdictThis...` in future prompts. Implementation - Coalesce consecutive assistant text parts before filtering whitespace-only blocks. - Continue filtering assistant messages that are genuinely empty or whitespace-only. - Add regression coverage for heading separators surviving the full sanitizer + provider transform path. Validation - `bun test src/node/services/messagePipeline.test.ts` - `make typecheck` - `make static-check` - post-rebase `make static-check` Risks - Low. The change is request-only and preserves existing filtering of whitespace-only assistant messages while avoiding destructive separator deletion between meaningful text chunks. --- _Generated with `mux` • Model: `openai:gpt-5.5` • Thinking: `xhigh` • Cost: `$7.32`_ <!-- mux-attribution: model=openai:gpt-5.5 thinking=xhigh costs=7.32 -->
1 parent 2b735e0 commit 6b361ff

2 files changed

Lines changed: 93 additions & 7 deletions

File tree

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { describe, expect, it } from "bun:test";
2+
import type { AssistantModelMessage, ModelMessage } from "ai";
3+
4+
import { transformModelMessages } from "@/browser/utils/messages/modelMessageTransform";
5+
import { sanitizeAssistantModelMessages } from "./messagePipeline";
6+
7+
function isAssistantMessage(message: ModelMessage | undefined): message is AssistantModelMessage {
8+
return message?.role === "assistant";
9+
}
10+
11+
describe("sanitizeAssistantModelMessages", () => {
12+
it("preserves whitespace-only separators before later text coalescing", () => {
13+
const messages: ModelMessage[] = [
14+
{
15+
role: "assistant",
16+
content: [
17+
{ type: "text", text: "## Verdict" },
18+
{ type: "text", text: "\n\n" },
19+
{ type: "text", text: "This is now **strong evidence**." },
20+
],
21+
},
22+
];
23+
24+
const sanitized = sanitizeAssistantModelMessages(messages);
25+
const transformed = transformModelMessages(sanitized, "openai");
26+
27+
expect(isAssistantMessage(sanitized[0])).toBe(true);
28+
if (isAssistantMessage(sanitized[0])) {
29+
expect(sanitized[0].content).toEqual([
30+
{ type: "text", text: "## Verdict\n\nThis is now **strong evidence**." },
31+
]);
32+
}
33+
34+
expect(isAssistantMessage(transformed[0])).toBe(true);
35+
if (isAssistantMessage(transformed[0])) {
36+
expect(transformed[0].content).toEqual([
37+
{ type: "text", text: "## Verdict\n\nThis is now **strong evidence**." },
38+
]);
39+
}
40+
});
41+
42+
it("still filters assistant messages that contain only whitespace text", () => {
43+
const messages: ModelMessage[] = [
44+
{
45+
role: "assistant",
46+
content: [
47+
{ type: "text", text: "\n" },
48+
{ type: "text", text: "\t " },
49+
],
50+
},
51+
];
52+
53+
expect(sanitizeAssistantModelMessages(messages)).toEqual([]);
54+
});
55+
});

src/node/services/messagePipeline.ts

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* All contextual data is passed via the options object.
99
*/
1010

11-
import { convertToModelMessages, type ModelMessage } from "ai";
11+
import { convertToModelMessages, type AssistantModelMessage, type ModelMessage } from "ai";
1212
import { applyToolOutputRedaction } from "@/browser/utils/messages/applyToolOutputRedaction";
1313
import { sanitizeToolInputs } from "@/browser/utils/messages/sanitizeToolInput";
1414
import { inlineSvgAsTextForProvider } from "@/node/utils/messages/inlineSvgAsTextForProvider";
@@ -210,6 +210,39 @@ export async function prepareMessagesForProvider(
210210
return finalMessages;
211211
}
212212

213+
type AssistantContentArray = Exclude<AssistantModelMessage["content"], string>;
214+
type AssistantContentPart = AssistantContentArray[number];
215+
216+
function isTextPart(
217+
part: AssistantContentPart
218+
): part is Extract<AssistantContentPart, { type: "text" }> {
219+
return part.type === "text";
220+
}
221+
222+
function normalizeAssistantContent(content: AssistantContentArray): AssistantContentArray {
223+
let changed = false;
224+
const coalesced: AssistantContentArray = [];
225+
226+
for (const part of content) {
227+
const lastPart = coalesced.at(-1);
228+
if (isTextPart(part) && lastPart && isTextPart(lastPart)) {
229+
// Preserve provider-emitted whitespace separators before filtering whitespace-only
230+
// blocks; dropping a standalone "\n\n" delta can corrupt headings in future prompts.
231+
lastPart.text += part.text;
232+
changed = true;
233+
continue;
234+
}
235+
236+
coalesced.push(isTextPart(part) ? { ...part } : part);
237+
}
238+
239+
const filtered = coalesced.filter(
240+
(part): part is AssistantContentPart => !isTextPart(part) || part.text.trim().length > 0
241+
);
242+
243+
return changed || filtered.length !== content.length ? filtered : content;
244+
}
245+
213246
/**
214247
* Self-healing: filter empty or whitespace-only assistant model messages.
215248
*
@@ -241,20 +274,18 @@ export function sanitizeAssistantModelMessages(
241274
return [];
242275
}
243276

244-
const filteredContent = msg.content.filter(
245-
(part) => part.type !== "text" || part.text.trim().length > 0
246-
);
277+
const normalizedContent = normalizeAssistantContent(msg.content);
247278

248-
if (filteredContent.length === 0) {
279+
if (normalizedContent.length === 0) {
249280
return [];
250281
}
251282

252283
// Avoid mutating the original message (which can be reused in debug logging).
253-
if (filteredContent.length === msg.content.length) {
284+
if (normalizedContent === msg.content) {
254285
return [msg];
255286
}
256287

257-
return [{ ...msg, content: filteredContent }];
288+
return [{ ...msg, content: normalizedContent }];
258289
});
259290

260291
if (result.length < messages.length) {

0 commit comments

Comments
 (0)