Skip to content

Commit 1b496f7

Browse files
committed
Add new function ansi::slice_ansi_str
1 parent 0bf645d commit 1b496f7

2 files changed

Lines changed: 159 additions & 61 deletions

File tree

src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ pub use crate::term::{
9595
pub use crate::utils::{
9696
colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
9797
set_colors_enabled, set_colors_enabled_stderr, set_true_colors_enabled,
98-
set_true_colors_enabled_stderr, style, true_colors_enabled, true_colors_enabled_stderr,
99-
truncate_str, Alignment, Attribute, Color, Emoji, Style, StyledObject,
98+
set_true_colors_enabled_stderr, slice_str, style, true_colors_enabled,
99+
true_colors_enabled_stderr, truncate_str, Alignment, Color, Emoji, Style, StyledObject,
100100
};
101101

102102
#[cfg(all(feature = "ansi-parsing", feature = "alloc"))]

src/utils.rs

Lines changed: 157 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use core::{
44
sync::atomic::{AtomicBool, Ordering},
55
};
66
use std::env;
7+
use std::ops::Range;
78

89
use once_cell::sync::Lazy;
910

@@ -890,78 +891,124 @@ pub(crate) fn char_width(_c: char) -> usize {
890891
1
891892
}
892893

893-
/// Truncates a string to a certain number of characters.
894+
/// Slice a `&str` in terms of text width. This means that only the text
895+
/// columns strictly between `start` and `stop` will be kept.
894896
///
895-
/// This ensures that escape codes are not screwed up in the process.
896-
/// If the maximum length is hit the string will be truncated but
897-
/// escapes code will still be honored. If truncation takes place
898-
/// the tail string will be appended.
899-
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
900-
if measure_text_width(s) <= width {
901-
return Cow::Borrowed(s);
902-
}
903-
897+
/// If a multi-columns character overlaps with the end of the interval it will
898+
/// not be included. In such a case, the result will be less than `end - start`
899+
/// columns wide.
900+
///
901+
/// This ensures that escape codes are not screwed up in the process. And if
902+
/// non-empty head and tail are specified, they are inserted between the ANSI
903+
/// codes from truncated bounds and the slice.
904+
pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
904905
#[cfg(feature = "ansi-parsing")]
905906
{
906-
use core::cmp::Ordering;
907-
let mut iter = AnsiCodeIterator::new(s);
908-
let mut length = 0;
909-
let mut rv = None;
910-
911-
while let Some(item) = iter.next() {
912-
match item {
913-
(s, false) => {
914-
if rv.is_none() {
915-
if str_width(s) + length > width.saturating_sub(str_width(tail)) {
916-
let ts = iter.current_slice();
917-
918-
let mut s_byte = 0;
919-
let mut s_width = 0;
920-
let rest_width =
921-
width.saturating_sub(str_width(tail)).saturating_sub(length);
922-
for c in s.chars() {
923-
s_byte += c.len_utf8();
924-
s_width += char_width(c);
925-
match s_width.cmp(&rest_width) {
926-
Ordering::Equal => break,
927-
Ordering::Greater => {
928-
s_byte -= c.len_utf8();
929-
break;
930-
}
931-
Ordering::Less => continue,
932-
}
933-
}
934-
935-
let idx = ts.len() - s.len() + s_byte;
936-
let mut buf = ts[..idx].to_string();
937-
buf.push_str(tail);
938-
rv = Some(buf);
939-
}
940-
length += str_width(s);
907+
let mut pos = 0; // Current search index by width
908+
let mut code_iter = AnsiCodeIterator::new(s).peekable();
909+
910+
// Search for the begining of the slice while collecting heading ANSI
911+
// codes
912+
let mut front_ansi = String::new(); // ANSI codes found before bound start
913+
let mut slice_start = 0; // Current search index by bytes
914+
915+
// Extract the leading slice, which *may be mutated* to remove just its first character.
916+
'search_slice_start: while pos < bounds.start {
917+
let Some((sub, is_ansi)) = code_iter.peek_mut() else {
918+
break;
919+
};
920+
921+
if *is_ansi {
922+
// Keep track of leading ANSI for later output.
923+
front_ansi.push_str(sub);
924+
slice_start += sub.len();
925+
} else {
926+
for (c_idx, c) in sub.char_indices() {
927+
if pos >= bounds.start {
928+
// Ensure we don't drop the remaining of the slice before searching for the
929+
// end bound.
930+
*sub = &sub[c_idx..];
931+
break 'search_slice_start;
941932
}
933+
934+
pos += char_width(c);
935+
slice_start += c.len_utf8();
942936
}
943-
(s, true) => {
944-
if let Some(ref mut rv) = rv {
945-
rv.push_str(s);
946-
}
937+
}
938+
939+
code_iter.next();
940+
}
941+
942+
// Search for the end of the slice. This loop is a bit simpler because we don't need to
943+
// keep track of remaining characters if we cut in the middle of a non-ANSI slice.
944+
let mut slice_end = slice_start;
945+
946+
'search_slice_end: for (sub, is_ansi) in &mut code_iter {
947+
if is_ansi {
948+
// Keep ANSI in the output slice but don't account for them in the total width.
949+
slice_end += sub.len();
950+
continue;
951+
}
952+
953+
for c in sub.chars() {
954+
let c_width = char_width(c);
955+
956+
if pos + c_width > bounds.end {
957+
// We will only search for ANSI codes after breaking this
958+
// loop, so we can safely drop the remaining of `sub`
959+
break 'search_slice_end;
947960
}
961+
962+
pos += c_width;
963+
slice_end += c.len_utf8();
948964
}
949965
}
950966

951-
if let Some(buf) = rv {
952-
Cow::Owned(buf)
953-
} else {
954-
Cow::Borrowed(s)
967+
// Initialise the result (before appending remaining ANSI slices)
968+
let slice = &s[slice_start..slice_end];
969+
970+
let mut result = {
971+
if front_ansi.is_empty() && head.is_empty() && tail.is_empty() {
972+
// No allocation may have to be performed if there are no bounds.
973+
Cow::Borrowed(slice)
974+
} else {
975+
Cow::Owned(front_ansi + head + slice + tail)
976+
}
977+
};
978+
979+
// Push back remaining ANSI codes to result
980+
for (sub, is_ansi) in code_iter {
981+
if is_ansi {
982+
result.to_mut().push_str(sub);
983+
}
955984
}
956-
}
957985

986+
result
987+
}
958988
#[cfg(not(feature = "ansi-parsing"))]
959989
{
960-
Cow::Owned(format!(
961-
"{}{}",
962-
&s[..width.saturating_sub(tail.len())],
963-
tail
964-
))
990+
let slice = s.get(bounds).unwrap_or("");
991+
992+
if head.is_empty() && tail.is_empty() {
993+
Cow::Borrowed(slice)
994+
} else {
995+
Cow::Owned(format!("{head}{slice}{tail}"))
996+
}
997+
}
998+
}
999+
1000+
/// Truncates a string to a certain number of characters.
1001+
///
1002+
/// This ensures that escape codes are not screwed up in the process.
1003+
/// If the maximum length is hit the string will be truncated but
1004+
/// escapes code will still be honored. If truncation takes place
1005+
/// the tail string will be appended.
1006+
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
1007+
if measure_text_width(s) <= width {
1008+
Cow::Borrowed(s)
1009+
} else {
1010+
let tail_width = measure_text_width(tail);
1011+
slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
9651012
}
9661013
}
9671014

@@ -1089,6 +1136,57 @@ fn test_truncate_str() {
10891136
);
10901137
}
10911138

1139+
#[test]
1140+
fn test_slice_ansi_str() {
1141+
// Note that 🐶 is two columns wide
1142+
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
1143+
assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);
1144+
1145+
assert_eq!(
1146+
slice_str(test_str, ">>>", 0..test_str.len(), "<<<"),
1147+
format!(">>>{test_str}<<<"),
1148+
);
1149+
1150+
if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
1151+
assert_eq!(measure_text_width(test_str), 16);
1152+
1153+
assert_eq!(
1154+
slice_str(test_str, "", 5..5, ""),
1155+
"\u{1b}[31m\u{1b}[1m\u{1b}[0m"
1156+
);
1157+
1158+
assert_eq!(
1159+
slice_str(test_str, "", 0..5, ""),
1160+
"Hello\x1b[31m\x1b[1m\x1b[0m"
1161+
);
1162+
1163+
assert_eq!(
1164+
slice_str(test_str, "", 0..6, ""),
1165+
"Hello\x1b[31m\x1b[1m\x1b[0m"
1166+
);
1167+
1168+
assert_eq!(
1169+
slice_str(test_str, "", 0..7, ""),
1170+
"Hello\x1b[31m🐶\x1b[1m\x1b[0m"
1171+
);
1172+
1173+
assert_eq!(
1174+
slice_str(test_str, "", 4..9, ""),
1175+
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
1176+
);
1177+
1178+
assert_eq!(
1179+
slice_str(test_str, "", 7..21, ""),
1180+
"\x1b[31m\x1b[1m🐶\x1b[0m world!"
1181+
);
1182+
1183+
assert_eq!(
1184+
slice_str(test_str, ">>>", 7..21, "<<<"),
1185+
"\x1b[31m>>>\x1b[1m🐶\x1b[0m world!<<<"
1186+
);
1187+
}
1188+
}
1189+
10921190
#[test]
10931191
fn test_truncate_str_no_ansi() {
10941192
assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");

0 commit comments

Comments
 (0)