Skip to content

Commit 88fd62d

Browse files
committed
fix: improve control char stripping
1 parent fa39c97 commit 88fd62d

2 files changed

Lines changed: 30 additions & 39 deletions

File tree

src/utils/ansi.test.ts

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,12 @@ describe("stripAnsi", () => {
1111
});
1212

1313
it("strips carriage returns and other control chars that can corrupt TUI rendering", () => {
14-
const output = stripAnsi("step 1\rstep 2\b\b done");
15-
expect(output).toBe("step 1step 2 done");
16-
for (const char of output) {
17-
const code = char.codePointAt(0) ?? 0;
18-
const isDisallowedC0 =
19-
(code >= 0x00 && code <= 0x08) ||
20-
(code >= 0x0b && code <= 0x1f) ||
21-
code === 0x7f;
22-
expect(isDisallowedC0).toBe(false);
23-
}
14+
expect(stripAnsi("step 1\rstep 2\b\b done")).toBe("step 1step 2 done");
15+
expect(stripAnsi("null\u0000byte")).toBe("nullbyte");
16+
expect(stripAnsi("delete\u007fchar")).toBe("deletechar");
17+
});
18+
19+
it("preserves tabs and newlines", () => {
20+
expect(stripAnsi("one\ttwo\nthree")).toBe("one\ttwo\nthree");
2421
});
2522
});

src/utils/ansi.ts

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,45 +7,39 @@
77
* - APC sequences (\x1b_...\x07 or \x1b_...\x1b\\)
88
* - Remaining C0 control chars except tab/newline
99
*/
10+
const ESC = String.fromCodePoint(0x001b);
11+
const BEL = String.fromCodePoint(0x0007);
12+
13+
const ANSI_REPLACEMENTS: RegExp[] = [
14+
// CSI sequences: SGR, cursor movement, erase, scroll, etc.
15+
new RegExp(`${ESC}\\[[0-9;]*[A-Za-z]`, "gu"),
16+
// OSC sequences: ESC]...<BEL> or ESC]...<ESC>\\.
17+
new RegExp(`${ESC}\\][^${BEL}${ESC}]*(?:${BEL}|${ESC}\\\\)`, "gu"),
18+
// APC sequences: ESC_...<BEL> or ESC_...<ESC>\\.
19+
new RegExp(`${ESC}_[^${BEL}${ESC}]*(?:${BEL}|${ESC}\\\\)`, "gu"),
20+
];
21+
22+
// Strip C0 terminal control characters that can corrupt TUI layout when
23+
// rendered back into pi, such as carriage return and backspace. Keep tabs and
24+
// newlines because logs use them as printable whitespace/line breaks.
25+
// biome-ignore lint/suspicious/noControlCharactersInRegex: this regex intentionally targets terminal control characters.
26+
const TERMINAL_CONTROL_CHARS = /[\u0000-\u0008\u000b-\u001f\u007f]/gu;
27+
1028
/**
1129
* Check if a string contains ANSI escape codes.
1230
*/
1331
export function hasAnsi(str: string): boolean {
14-
return str.includes(String.fromCodePoint(0x001b));
32+
return str.includes(ESC);
1533
}
1634

1735
export function stripAnsi(str: string): string {
18-
// ESC = \u001b, BEL = \u0007
19-
const ESC = String.fromCodePoint(0x001b);
20-
const BEL = String.fromCodePoint(0x0007);
21-
2236
let clean = str;
2337

2438
if (str.includes(ESC)) {
25-
// Strip all CSI sequences (ESC[...X where X is any letter)
26-
clean = clean.replace(new RegExp(`${ESC}\\[[0-9;]*[A-Za-z]`, "gu"), "");
27-
// Strip OSC sequences: ESC]...<BEL> or ESC]...<ESC>\\
28-
clean = clean.replace(
29-
new RegExp(`${ESC}\\][^${BEL}${ESC}]*(?:${BEL}|${ESC}\\\\)`, "gu"),
30-
"",
31-
);
32-
// Strip APC sequences: ESC_...<BEL> or ESC_...<ESC>\\ (used for cursor marker)
33-
clean = clean.replace(
34-
new RegExp(`${ESC}_[^${BEL}${ESC}]*(?:${BEL}|${ESC}\\\\)`, "gu"),
35-
"",
36-
);
39+
for (const pattern of ANSI_REPLACEMENTS) {
40+
clean = clean.replace(pattern, "");
41+
}
3742
}
3843

39-
// Strip terminal control chars like carriage return/backspace that can
40-
// corrupt TUI layout when rendered back into pi.
41-
return Array.from(clean)
42-
.filter((char) => {
43-
const code = char.codePointAt(0) ?? 0;
44-
const isDisallowedC0 =
45-
(code >= 0x00 && code <= 0x08) ||
46-
(code >= 0x0b && code <= 0x1f) ||
47-
code === 0x7f;
48-
return !isDisallowedC0;
49-
})
50-
.join("");
44+
return clean.replace(TERMINAL_CONTROL_CHARS, "");
5145
}

0 commit comments

Comments
 (0)