@@ -4,6 +4,7 @@ use core::{
44 sync:: atomic:: { AtomicBool , Ordering } ,
55} ;
66use std:: env;
7+ use std:: ops:: Range ;
78
89use once_cell:: sync:: Lazy ;
910
@@ -890,78 +891,124 @@ pub(crate) fn char_width(_c: char) -> usize {
890891 1
891892}
892893
893- /// Truncates a string to a certain number of characters.
894+ /// Slice a `&str` in terms of text width. This means that only the text
895+ /// columns strictly between `start` and `stop` will be kept.
894896///
895- /// This ensures that escape codes are not screwed up in the process.
896- /// If the maximum length is hit the string will be truncated but
897- /// escapes code will still be honored. If truncation takes place
898- /// the tail string will be appended.
899- pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
900- if measure_text_width ( s) <= width {
901- return Cow :: Borrowed ( s) ;
902- }
903-
897+ /// If a multi-columns character overlaps with the end of the interval it will
898+ /// not be included. In such a case, the result will be less than `end - start`
899+ /// columns wide.
900+ ///
901+ /// This ensures that escape codes are not screwed up in the process. And if
902+ /// non-empty head and tail are specified, they are inserted between the ANSI
903+ /// codes from truncated bounds and the slice.
904+ pub fn slice_str < ' a > ( s : & ' a str , head : & str , bounds : Range < usize > , tail : & str ) -> Cow < ' a , str > {
904905 #[ cfg( feature = "ansi-parsing" ) ]
905906 {
906- use core:: cmp:: Ordering ;
907- let mut iter = AnsiCodeIterator :: new ( s) ;
908- let mut length = 0 ;
909- let mut rv = None ;
910-
911- while let Some ( item) = iter. next ( ) {
912- match item {
913- ( s, false ) => {
914- if rv. is_none ( ) {
915- if str_width ( s) + length > width. saturating_sub ( str_width ( tail) ) {
916- let ts = iter. current_slice ( ) ;
917-
918- let mut s_byte = 0 ;
919- let mut s_width = 0 ;
920- let rest_width =
921- width. saturating_sub ( str_width ( tail) ) . saturating_sub ( length) ;
922- for c in s. chars ( ) {
923- s_byte += c. len_utf8 ( ) ;
924- s_width += char_width ( c) ;
925- match s_width. cmp ( & rest_width) {
926- Ordering :: Equal => break ,
927- Ordering :: Greater => {
928- s_byte -= c. len_utf8 ( ) ;
929- break ;
930- }
931- Ordering :: Less => continue ,
932- }
933- }
934-
935- let idx = ts. len ( ) - s. len ( ) + s_byte;
936- let mut buf = ts[ ..idx] . to_string ( ) ;
937- buf. push_str ( tail) ;
938- rv = Some ( buf) ;
939- }
940- length += str_width ( s) ;
907+ let mut pos = 0 ; // Current search index by width
908+ let mut code_iter = AnsiCodeIterator :: new ( s) . peekable ( ) ;
909+
910+ // Search for the begining of the slice while collecting heading ANSI
911+ // codes
912+ let mut front_ansi = String :: new ( ) ; // ANSI codes found before bound start
913+ let mut slice_start = 0 ; // Current search index by bytes
914+
915+ // Extract the leading slice, which *may be mutated* to remove just its first character.
916+ ' search_slice_start: while pos < bounds. start {
917+ let Some ( ( sub, is_ansi) ) = code_iter. peek_mut ( ) else {
918+ break ;
919+ } ;
920+
921+ if * is_ansi {
922+ // Keep track of leading ANSI for later output.
923+ front_ansi. push_str ( sub) ;
924+ slice_start += sub. len ( ) ;
925+ } else {
926+ for ( c_idx, c) in sub. char_indices ( ) {
927+ if pos >= bounds. start {
928+ // Ensure we don't drop the remaining of the slice before searching for the
929+ // end bound.
930+ * sub = & sub[ c_idx..] ;
931+ break ' search_slice_start;
941932 }
933+
934+ pos += char_width ( c) ;
935+ slice_start += c. len_utf8 ( ) ;
942936 }
943- ( s, true ) => {
944- if let Some ( ref mut rv) = rv {
945- rv. push_str ( s) ;
946- }
937+ }
938+
939+ code_iter. next ( ) ;
940+ }
941+
942+ // Search for the end of the slice. This loop is a bit simpler because we don't need to
943+ // keep track of remaining characters if we cut in the middle of a non-ANSI slice.
944+ let mut slice_end = slice_start;
945+
946+ ' search_slice_end: for ( sub, is_ansi) in & mut code_iter {
947+ if is_ansi {
948+ // Keep ANSI in the output slice but don't account for them in the total width.
949+ slice_end += sub. len ( ) ;
950+ continue ;
951+ }
952+
953+ for c in sub. chars ( ) {
954+ let c_width = char_width ( c) ;
955+
956+ if pos + c_width > bounds. end {
957+ // We will only search for ANSI codes after breaking this
958+ // loop, so we can safely drop the remaining of `sub`
959+ break ' search_slice_end;
947960 }
961+
962+ pos += c_width;
963+ slice_end += c. len_utf8 ( ) ;
948964 }
949965 }
950966
951- if let Some ( buf) = rv {
952- Cow :: Owned ( buf)
953- } else {
954- Cow :: Borrowed ( s)
967+ // Initialise the result (before appending remaining ANSI slices)
968+ let slice = & s[ slice_start..slice_end] ;
969+
970+ let mut result = {
971+ if front_ansi. is_empty ( ) && head. is_empty ( ) && tail. is_empty ( ) {
972+ // No allocation may have to be performed if there are no bounds.
973+ Cow :: Borrowed ( slice)
974+ } else {
975+ Cow :: Owned ( front_ansi + head + slice + tail)
976+ }
977+ } ;
978+
979+ // Push back remaining ANSI codes to result
980+ for ( sub, is_ansi) in code_iter {
981+ if is_ansi {
982+ result. to_mut ( ) . push_str ( sub) ;
983+ }
955984 }
956- }
957985
986+ result
987+ }
958988 #[ cfg( not( feature = "ansi-parsing" ) ) ]
959989 {
960- Cow :: Owned ( format ! (
961- "{}{}" ,
962- & s[ ..width. saturating_sub( tail. len( ) ) ] ,
963- tail
964- ) )
990+ let slice = s. get ( bounds) . unwrap_or ( "" ) ;
991+
992+ if head. is_empty ( ) && tail. is_empty ( ) {
993+ Cow :: Borrowed ( slice)
994+ } else {
995+ Cow :: Owned ( format ! ( "{head}{slice}{tail}" ) )
996+ }
997+ }
998+ }
999+
1000+ /// Truncates a string to a certain number of characters.
1001+ ///
1002+ /// This ensures that escape codes are not screwed up in the process.
1003+ /// If the maximum length is hit the string will be truncated but
1004+ /// escapes code will still be honored. If truncation takes place
1005+ /// the tail string will be appended.
1006+ pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
1007+ if measure_text_width ( s) <= width {
1008+ Cow :: Borrowed ( s)
1009+ } else {
1010+ let tail_width = measure_text_width ( tail) ;
1011+ slice_str ( s, "" , 0 ..width. saturating_sub ( tail_width) , tail)
9651012 }
9661013}
9671014
@@ -1089,6 +1136,57 @@ fn test_truncate_str() {
10891136 ) ;
10901137}
10911138
1139+ #[ test]
1140+ fn test_slice_ansi_str ( ) {
1141+ // Note that 🐶 is two columns wide
1142+ let test_str = "Hello\x1b [31m🐶\x1b [1m🐶\x1b [0m world!" ;
1143+ assert_eq ! ( slice_str( test_str, "" , 0 ..test_str. len( ) , "" ) , test_str) ;
1144+
1145+ assert_eq ! (
1146+ slice_str( test_str, ">>>" , 0 ..test_str. len( ) , "<<<" ) ,
1147+ format!( ">>>{test_str}<<<" ) ,
1148+ ) ;
1149+
1150+ if cfg ! ( feature = "unicode-width" ) && cfg ! ( feature = "ansi-parsing" ) {
1151+ assert_eq ! ( measure_text_width( test_str) , 16 ) ;
1152+
1153+ assert_eq ! (
1154+ slice_str( test_str, "" , 5 ..5 , "" ) ,
1155+ "\u{1b} [31m\u{1b} [1m\u{1b} [0m"
1156+ ) ;
1157+
1158+ assert_eq ! (
1159+ slice_str( test_str, "" , 0 ..5 , "" ) ,
1160+ "Hello\x1b [31m\x1b [1m\x1b [0m"
1161+ ) ;
1162+
1163+ assert_eq ! (
1164+ slice_str( test_str, "" , 0 ..6 , "" ) ,
1165+ "Hello\x1b [31m\x1b [1m\x1b [0m"
1166+ ) ;
1167+
1168+ assert_eq ! (
1169+ slice_str( test_str, "" , 0 ..7 , "" ) ,
1170+ "Hello\x1b [31m🐶\x1b [1m\x1b [0m"
1171+ ) ;
1172+
1173+ assert_eq ! (
1174+ slice_str( test_str, "" , 4 ..9 , "" ) ,
1175+ "o\x1b [31m🐶\x1b [1m🐶\x1b [0m"
1176+ ) ;
1177+
1178+ assert_eq ! (
1179+ slice_str( test_str, "" , 7 ..21 , "" ) ,
1180+ "\x1b [31m\x1b [1m🐶\x1b [0m world!"
1181+ ) ;
1182+
1183+ assert_eq ! (
1184+ slice_str( test_str, ">>>" , 7 ..21 , "<<<" ) ,
1185+ "\x1b [31m>>>\x1b [1m🐶\x1b [0m world!<<<"
1186+ ) ;
1187+ }
1188+ }
1189+
10921190#[ test]
10931191fn test_truncate_str_no_ansi ( ) {
10941192 assert_eq ! ( & truncate_str( "foo bar" , 7 , "!" ) , "foo bar" ) ;
0 commit comments