Skip to content

Commit 8d3a166

Browse files
authored
fix: implement proper unicode-aware word wrapping for text overflow (#97)
- Add paragraph word wrapping in markdown renderer (handlers.rs) - Track current line width for proper wrap decisions - Replace chars().count() with UnicodeWidthStr::width() in text_utils.rs - Replace chars().count() with UnicodeWidthStr::width() in wrapping.rs - Add split_at_visual_width() for proper CJK/emoji handling - Update tests to verify visual width calculations
1 parent f8c6f03 commit 8d3a166

4 files changed

Lines changed: 247 additions & 35 deletions

File tree

src/cortex-core/src/markdown/renderer/handlers.rs

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use pulldown_cmark::{CodeBlockKind, HeadingLevel};
44
use ratatui::style::Style;
55
use ratatui::text::{Line, Span};
6-
use unicode_width::UnicodeWidthStr;
6+
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
77

88
use crate::markdown::inline::{render_blockquote_prefix, render_hr};
99
use crate::markdown::list::ListContext;
@@ -20,6 +20,7 @@ impl<'a> RenderState<'a> {
2020
pub(super) fn start_paragraph(&mut self) {
2121
self.add_blank_line_if_needed();
2222
self.in_paragraph = true;
23+
self.current_line_width = 0;
2324
}
2425

2526
pub(super) fn start_heading(&mut self, level: HeadingLevel) {
@@ -116,6 +117,7 @@ impl<'a> RenderState<'a> {
116117
self.flush_line();
117118
self.in_paragraph = false;
118119
self.needs_newline = true;
120+
self.current_line_width = 0;
119121
}
120122

121123
pub(super) fn end_heading(&mut self) {
@@ -338,11 +340,101 @@ impl<'a> RenderState<'a> {
338340
return;
339341
}
340342

341-
// Regular text
343+
// Paragraph text - apply word wrapping
344+
if self.in_paragraph {
345+
self.wrap_paragraph_text(text);
346+
return;
347+
}
348+
349+
// Regular text (headings, etc.) - no wrapping needed
342350
let style = self.current_style();
343351
self.push_span(Span::styled(text.to_string(), style));
344352
}
345353

354+
/// Wraps paragraph text to fit within the renderer's width.
355+
///
356+
/// Handles word boundaries, preserves inline styles across line breaks,
357+
/// and properly measures unicode character widths (CJK characters, emoji).
358+
fn wrap_paragraph_text(&mut self, text: &str) {
359+
let style = self.current_style();
360+
let max_width = self.renderer.width as usize;
361+
362+
// Account for blockquote prefix width
363+
let prefix_width = if self.blockquote_depth > 0 {
364+
// Each blockquote level adds "│ " (2 chars)
365+
self.blockquote_depth * 2
366+
} else {
367+
0
368+
};
369+
370+
let available_width = max_width.saturating_sub(prefix_width);
371+
if available_width == 0 {
372+
// No room for text
373+
return;
374+
}
375+
376+
// Process text word by word
377+
for word in text.split_inclusive(|c: char| c.is_whitespace()) {
378+
let word_width = UnicodeWidthStr::width(word);
379+
380+
// Check if word fits on current line
381+
if self.current_line_width + word_width <= available_width {
382+
// Word fits - add it
383+
self.push_span(Span::styled(word.to_string(), style));
384+
self.current_line_width += word_width;
385+
} else if self.current_line_width == 0 {
386+
// First word on line but too long - break it
387+
self.wrap_long_word(word, style, available_width);
388+
} else {
389+
// Word doesn't fit - start new line
390+
self.flush_line();
391+
self.current_line_width = 0;
392+
393+
// Trim leading whitespace from word when starting new line
394+
let trimmed = word.trim_start();
395+
let trimmed_width = UnicodeWidthStr::width(trimmed);
396+
397+
if trimmed_width <= available_width {
398+
self.push_span(Span::styled(trimmed.to_string(), style));
399+
self.current_line_width = trimmed_width;
400+
} else {
401+
// Even trimmed word is too long - break it
402+
self.wrap_long_word(trimmed, style, available_width);
403+
}
404+
}
405+
}
406+
}
407+
408+
/// Wraps a word that is too long to fit on a single line.
409+
///
410+
/// Breaks the word at visual width boundaries, handling multi-width
411+
/// unicode characters properly.
412+
fn wrap_long_word(&mut self, word: &str, style: Style, max_width: usize) {
413+
let mut current_chunk = String::new();
414+
let mut current_width = 0;
415+
416+
for ch in word.chars() {
417+
let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
418+
419+
if current_width + ch_width > max_width && !current_chunk.is_empty() {
420+
// Flush current chunk as a line
421+
self.push_span(Span::styled(current_chunk.clone(), style));
422+
self.flush_line();
423+
current_chunk.clear();
424+
current_width = 0;
425+
}
426+
427+
current_chunk.push(ch);
428+
current_width += ch_width;
429+
}
430+
431+
// Handle remaining characters
432+
if !current_chunk.is_empty() {
433+
self.push_span(Span::styled(current_chunk, style));
434+
self.current_line_width = current_width;
435+
}
436+
}
437+
346438
pub(super) fn handle_code(&mut self, code: &str) {
347439
if self.table_builder.is_some() {
348440
self.current_cell.push_str(code);
@@ -423,6 +515,7 @@ impl<'a> RenderState<'a> {
423515
let mut spans = self.get_blockquote_prefix();
424516
spans.extend(self.current_spans.drain(..));
425517
self.lines.push(Line::from(spans));
518+
self.current_line_width = 0;
426519
}
427520

428521
/// Push a span to the current line.

src/cortex-core/src/markdown/renderer/state.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ pub(super) struct RenderState<'a> {
8787
pub(super) in_paragraph: bool,
8888
/// Whether we need a blank line before the next block.
8989
pub(super) needs_newline: bool,
90+
/// Current line width for paragraph wrapping (visual columns).
91+
pub(super) current_line_width: usize,
9092

9193
// Heading state
9294
/// Current heading level (if in a heading).
@@ -121,6 +123,7 @@ impl<'a> RenderState<'a> {
121123
current_cell: String::new(),
122124
in_paragraph: false,
123125
needs_newline: false,
126+
current_line_width: 0,
124127
current_heading_level: None,
125128
current_link_url: None,
126129
current_list_item: Vec::new(),

src/cortex-core/src/widgets/chat/wrapping.rs

Lines changed: 74 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
//! Text wrapping utilities.
22
//!
33
//! Provides functions for wrapping text to fit within specified widths.
4+
//! Uses unicode-width for proper handling of CJK characters and emoji.
45
5-
/// Wraps text to fit within the specified width.
6+
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
7+
8+
/// Wraps text to fit within the specified width (measured in visual columns).
69
pub fn wrap_text(text: &str, max_width: usize) -> Vec<String> {
710
if max_width == 0 {
811
return vec![text.to_string()];
@@ -19,15 +22,15 @@ pub fn wrap_text(text: &str, max_width: usize) -> Vec<String> {
1922
let mut current_width = 0;
2023

2124
for word in line.split_whitespace() {
22-
let word_width = word.chars().count();
25+
let word_width = UnicodeWidthStr::width(word);
2326

2427
if current_width == 0 {
2528
// First word on the line
2629
if word_width > max_width {
27-
// Word is longer than max width, split it
30+
// Word is longer than max width, split it by visual width
2831
let mut remaining = word;
2932
while !remaining.is_empty() {
30-
let (chunk, rest) = split_at_char_boundary(remaining, max_width);
33+
let (chunk, rest) = split_at_visual_width(remaining, max_width);
3134
lines.push(chunk.to_string());
3235
remaining = rest;
3336
}
@@ -36,21 +39,21 @@ pub fn wrap_text(text: &str, max_width: usize) -> Vec<String> {
3639
current_width = word_width;
3740
}
3841
} else if current_width + 1 + word_width <= max_width {
39-
// Word fits on current line
42+
// Word fits on current line (1 for space)
4043
current_line.push(' ');
4144
current_line.push_str(word);
4245
current_width += 1 + word_width;
4346
} else {
4447
// Word doesn't fit, start new line
4548
lines.push(std::mem::take(&mut current_line));
4649
if word_width > max_width {
47-
// Word is longer than max width, split it
50+
// Word is longer than max width, split it by visual width
4851
let mut remaining = word;
4952
while !remaining.is_empty() {
50-
let (chunk, rest) = split_at_char_boundary(remaining, max_width);
53+
let (chunk, rest) = split_at_visual_width(remaining, max_width);
5154
if rest.is_empty() {
5255
current_line = chunk.to_string();
53-
current_width = chunk.chars().count();
56+
current_width = UnicodeWidthStr::width(chunk);
5457
} else {
5558
lines.push(chunk.to_string());
5659
}
@@ -75,7 +78,35 @@ pub fn wrap_text(text: &str, max_width: usize) -> Vec<String> {
7578
lines
7679
}
7780

81+
/// Splits a string at a visual width boundary, returning (prefix, suffix).
82+
///
83+
/// Uses unicode-width to properly handle multi-width characters like CJK
84+
/// characters (2 columns) and emoji.
85+
pub fn split_at_visual_width(s: &str, max_width: usize) -> (&str, &str) {
86+
if max_width == 0 {
87+
return ("", s);
88+
}
89+
90+
let mut visual_width = 0;
91+
let mut byte_idx = s.len();
92+
93+
for (idx, ch) in s.char_indices() {
94+
let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
95+
if visual_width + ch_width > max_width {
96+
byte_idx = idx;
97+
break;
98+
}
99+
visual_width += ch_width;
100+
}
101+
102+
s.split_at(byte_idx)
103+
}
104+
78105
/// Splits a string at a character boundary, returning (prefix, suffix).
106+
///
107+
/// Note: This function splits by character count, not visual width.
108+
/// For visual width-aware splitting, use `split_at_visual_width` instead.
109+
#[allow(dead_code)]
79110
pub fn split_at_char_boundary(s: &str, max_chars: usize) -> (&str, &str) {
80111
if max_chars == 0 {
81112
return ("", s);
@@ -127,7 +158,8 @@ mod tests {
127158
let wrapped = wrap_text(text, 10);
128159
assert!(wrapped.len() > 1);
129160
for line in &wrapped {
130-
assert!(line.chars().count() <= 10);
161+
// Visual width should be <= max_width
162+
assert!(UnicodeWidthStr::width(line.as_str()) <= 10);
131163
}
132164
}
133165

@@ -148,6 +180,28 @@ mod tests {
148180
assert_eq!(wrapped[2], "Line 3");
149181
}
150182

183+
#[test]
184+
fn test_split_at_visual_width() {
185+
// ASCII - 1 column per char
186+
let (a, b) = split_at_visual_width("hello", 3);
187+
assert_eq!(a, "hel");
188+
assert_eq!(b, "lo");
189+
190+
let (a, b) = split_at_visual_width("hello", 10);
191+
assert_eq!(a, "hello");
192+
assert_eq!(b, "");
193+
194+
// CJK characters - 2 columns per char
195+
// "日本語" = 6 visual columns (3 chars * 2 columns each)
196+
let (a, b) = split_at_visual_width("日本語", 4);
197+
assert_eq!(a, "日本"); // 4 visual columns
198+
assert_eq!(b, "語");
199+
200+
let (a, b) = split_at_visual_width("日本語", 2);
201+
assert_eq!(a, "日"); // 2 visual columns
202+
assert_eq!(b, "本語");
203+
}
204+
151205
#[test]
152206
fn test_split_at_char_boundary() {
153207
let (a, b) = split_at_char_boundary("hello", 3);
@@ -158,8 +212,19 @@ mod tests {
158212
assert_eq!(a, "hello");
159213
assert_eq!(b, "");
160214

215+
// Character-based split: 2 chars regardless of visual width
161216
let (a, b) = split_at_char_boundary("日本語", 2);
162217
assert_eq!(a, "日本");
163218
assert_eq!(b, "語");
164219
}
220+
221+
#[test]
222+
fn test_wrap_cjk_text() {
223+
// "日本語テスト" = 12 visual columns
224+
let text = "日本語テスト";
225+
let wrapped = wrap_text(text, 6);
226+
assert_eq!(wrapped.len(), 2);
227+
assert_eq!(wrapped[0], "日本語"); // 6 visual columns
228+
assert_eq!(wrapped[1], "テスト"); // 6 visual columns
229+
}
165230
}

0 commit comments

Comments
 (0)