@@ -11,7 +11,6 @@ use std::fs::File;
1111use std:: io:: { self , BufReader , BufWriter , Read , Stdin , Stdout , Write , stdin, stdout} ;
1212use std:: num:: IntErrorKind ;
1313use std:: path:: Path ;
14- use std:: str:: from_utf8;
1514use thiserror:: Error ;
1615use uucore:: display:: Quotable ;
1716use uucore:: error:: { FromIo , UError , UResult , USimpleError , set_exit_code} ;
@@ -396,29 +395,14 @@ enum CharType {
396395 Other ,
397396}
398397
399- fn next_char_info ( utf8 : bool , buf : & [ u8 ] , byte : usize ) -> ( CharType , usize , usize ) {
398+ fn char_info ( c : char ) -> ( CharType , usize ) {
400399 use CharType :: { Backspace , Other , Space , Tab } ;
401- let b = buf[ byte] ;
402- if b. is_ascii ( ) {
403- return match b {
404- b' ' => ( Space , 0 , 1 ) ,
405- b'\t' => ( Tab , 0 , 1 ) ,
406- b'\x08' => ( Backspace , 0 , 1 ) ,
407- _ => ( Other , 1 , 1 ) ,
408- } ;
409- }
410-
411- if utf8 {
412- let nbytes = char:: from ( b) . len_utf8 ( ) ;
413- // don't overrun the buffer because of invalid UTF-8
414- if buf
415- . get ( byte..byte + nbytes)
416- . is_some_and ( |s| from_utf8 ( s) . is_ok ( ) )
417- {
418- return ( Other , nbytes, nbytes) ;
419- }
400+ match c {
401+ ' ' => ( Space , 1 ) ,
402+ '\t' => ( Tab , 0 ) ,
403+ '\x08' => ( Backspace , 0 ) ,
404+ _ => ( Other , 1 ) ,
420405 }
421- ( Other , 1 , 1 )
422406}
423407
424408// This struct is used to store the current state of printing the input buf.
@@ -506,56 +490,65 @@ fn unexpand_buf(
506490 }
507491 }
508492
509- while byte < buf. len ( ) {
510- // when we have a finite number of columns, never convert past the last column
511- if lastcol > 0 && print_state. col >= lastcol {
512- write_tabs ( output, tab_config, print_state, true ) ?;
513- output. write_all ( & buf[ byte..] ) ?;
514- print_state. scol = print_state. col ;
515- break ;
516- }
517-
518- // figure out how big the next char is, if it's UTF-8
519- let ( ctype, cwidth, nbytes) = next_char_info ( options. utf8 , buf, byte) ;
493+ for chunk in buf. utf8_chunks ( ) {
494+ for c in chunk. valid ( ) . chars ( ) {
495+ // when we have a finite number of columns, never convert past the last column
496+ if lastcol > 0 && print_state. col >= lastcol {
497+ write_tabs ( output, tab_config, print_state, true ) ?;
498+ output. write_all ( & buf[ byte..] ) ?;
499+ print_state. scol = print_state. col ;
500+ break ;
501+ }
520502
521- // now figure out how many columns this char takes up, and maybe print it
522- let tabs_buffered = print_state. leading || options. aflag ;
523- match ctype {
524- CharType :: Space | CharType :: Tab => {
525- // compute next col, but only write space or tab chars if not buffering
526- print_state. col += if ctype == CharType :: Space {
527- 1
528- } else {
529- next_tabstop ( tab_config, print_state. col ) . unwrap_or ( 1 )
530- } ;
503+ // figure out how big the next char is, if it's UTF-8
504+ let ( ctype, cwidth) = char_info ( c) ;
505+ let nbytes = c. len_utf8 ( ) ;
506+
507+ // now figure out how many columns this char takes up, and maybe print it
508+ let tabs_buffered = print_state. leading || options. aflag ;
509+ match ctype {
510+ CharType :: Space | CharType :: Tab => {
511+ // compute next col, but only write space or tab chars if not buffering
512+ print_state. col += if ctype == CharType :: Space {
513+ 1
514+ } else {
515+ next_tabstop ( tab_config, print_state. col ) . unwrap_or ( 1 )
516+ } ;
531517
532- if !tabs_buffered {
518+ if !tabs_buffered {
519+ output. write_all ( & buf[ byte..byte + nbytes] ) ?;
520+ print_state. scol = print_state. col ; // now printed up to this column
521+ }
522+ }
523+ CharType :: Other | CharType :: Backspace => {
524+ // always
525+ write_tabs ( output, tab_config, print_state, options. aflag ) ?;
526+ print_state. leading = false ; // no longer at the start of a line
527+ print_state. col = if ctype == CharType :: Other {
528+ // use computed width
529+ print_state. col + cwidth
530+ } else if print_state. col > 0 {
531+ // Backspace case, but only if col > 0
532+ print_state. col - 1
533+ } else {
534+ 0
535+ } ;
533536 output. write_all ( & buf[ byte..byte + nbytes] ) ?;
534- print_state. scol = print_state. col ; // now printed up to this column
537+ print_state. scol = print_state. col ; // we've now printed up to this column
535538 }
536539 }
537- CharType :: Other | CharType :: Backspace => {
538- // always
539- write_tabs ( output, tab_config, print_state, options. aflag ) ?;
540- print_state. leading = false ; // no longer at the start of a line
541- print_state. col = if ctype == CharType :: Other {
542- // use computed width
543- print_state. col + cwidth
544- } else if print_state. col > 0 {
545- // Backspace case, but only if col > 0
546- print_state. col - 1
547- } else {
548- 0
549- } ;
550- output. write_all ( & buf[ byte..byte + nbytes] ) ?;
551- print_state. scol = print_state. col ; // we've now printed up to this column
552- }
540+ print_state. pctype = ctype; // save the previous type
553541 }
554542
555- byte += nbytes; // move on to next char
556- print_state. pctype = ctype; // save the previous type
543+ for & b in chunk. invalid ( ) {
544+ write_tabs ( output, tab_config, print_state, options. aflag ) ?;
545+ print_state. leading = false ;
546+ print_state. col += 1 ;
547+ output. write_all ( & [ b] ) ?;
548+ print_state. scol = print_state. col ;
549+ print_state. pctype = CharType :: Other ;
550+ }
557551 }
558-
559552 Ok ( ( ) )
560553}
561554
0 commit comments