Skip to content

Commit b2230cf

Browse files
committed
fix(chat): prevent panic on UTF-8 boundary when rendering response
When the AI response contains multi-byte characters (e.g. non-ASCII text adjacent to triple backticks), the byte offset accumulated via parsed.offset_from() can land in the middle of a UTF-8 character boundary. The subsequent &buf[offset..] slice then panics at runtime. The same pattern existed in two places: - crates/chat-cli/src/cli/chat/mod.rs (streaming response loop) - crates/chat-cli/src/cli/chat/parse.rs (validate! test macro loop) Replace the direct slice with .get(offset..) which returns None instead of panicking, and break the loop gracefully. Add a regression test with Indonesian, Chinese, and emoji inputs adjacent to triple backticks to verify no panic occurs. Fixes #3715
1 parent e14ea18 commit b2230cf

2 files changed

Lines changed: 45 additions & 2 deletions

File tree

crates/chat-cli/src/cli/chat/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3099,7 +3099,11 @@ impl ChatSession {
30993099

31003100
// Print the response for normal cases
31013101
loop {
3102-
let input = Partial::new(&buf[offset..]);
3102+
// Use `get` to avoid panicking if `offset` lands on a non-UTF-8 boundary,
3103+
// which can happen when the response contains multi-byte characters (e.g.
3104+
// non-ASCII text adjacent to triple backticks). See: #3715
3105+
let Some(slice) = buf.get(offset..) else { break };
3106+
let input = Partial::new(slice);
31033107
if self.stdout.should_send_structured_event {
31043108
match interpret_markdown(input, &mut temp_buf, &mut state) {
31053109
Ok(parsed) => {

crates/chat-cli/src/cli/chat/parse.rs

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,8 @@ mod tests {
672672
let mut offset = 0;
673673

674674
loop {
675-
let input = Partial::new(&input[offset..]);
675+
let Some(slice) = input.get(offset..) else { break };
676+
let input = Partial::new(slice);
676677
match interpret_markdown(input, &mut presult, &mut state) {
677678
Ok(parsed) => {
678679
offset += parsed.offset_from(&input);
@@ -824,4 +825,42 @@ mod tests {
824825
[style::Print("+ % @ . ?")],
825826
true
826827
);
828+
829+
/// Regression test for #3715: multi-byte UTF-8 characters adjacent to triple backticks
830+
/// must not cause a panic from byte-index slicing.
831+
#[test]
832+
fn multibyte_utf8_adjacent_to_triple_backticks_does_not_panic() {
833+
// Indonesian / non-ASCII text followed by a code fence — the combination that
834+
// triggered "byte index N is out of bounds" in the wild.
835+
let inputs = [
836+
"Benar. Ganti dengan deskripsi langsung. Gunakan ini:\n\n```\ncontoh kode\n```",
837+
"移除 eagleeye-ec-databases 任務狀況確認\n```bash\necho ok\n```",
838+
"emoji 🎉 before ``` fence ```",
839+
];
840+
841+
for raw in inputs {
842+
let mut input = raw.to_owned();
843+
input.push_str(" "); // simulate incomplete stream sentinel
844+
845+
let mut state = ParseState::new(Some(80), Some(false));
846+
let mut out = vec![];
847+
let mut offset = 0;
848+
849+
loop {
850+
let Some(slice) = input.get(offset..) else { break };
851+
let partial = Partial::new(slice);
852+
match interpret_markdown(partial, &mut out, &mut state) {
853+
Ok(parsed) => {
854+
offset += parsed.offset_from(&partial);
855+
state.newline = state.set_newline;
856+
state.set_newline = false;
857+
},
858+
Err(err) => match err.into_inner() {
859+
Some(err) => panic!("parse error on input {:?}: {err}", raw),
860+
None => break,
861+
},
862+
}
863+
}
864+
}
865+
}
827866
}

0 commit comments

Comments
 (0)