88use clap:: { Arg , ArgAction , Command } ;
99use std:: ffi:: OsString ;
1010use std:: fs:: File ;
11- use std:: io:: { BufRead , BufReader , BufWriter , Read , Stdout , Write , stdin, stdout} ;
11+ use std:: io:: { BufReader , BufWriter , Read , Stdout , Write , stdin, stdout} ;
1212use std:: num:: IntErrorKind ;
1313use std:: path:: Path ;
1414use std:: str:: from_utf8;
@@ -347,7 +347,7 @@ fn next_tabstop(tab_config: &TabConfig, col: usize) -> Option<usize> {
347347fn write_tabs (
348348 output : & mut BufWriter < Stdout > ,
349349 tab_config : & TabConfig ,
350- mut scol : usize ,
350+ scol : & mut usize ,
351351 col : usize ,
352352 prevtab : bool ,
353353 init : bool ,
@@ -357,20 +357,20 @@ fn write_tabs(
357357 // We never turn a single space before a non-blank into
358358 // a tab, unless it's at the start of the line.
359359 let ai = init || amode;
360- if ( ai && !prevtab && col > scol + 1 ) || ( col > scol && ( init || ai && prevtab) ) {
361- while let Some ( nts) = next_tabstop ( tab_config, scol) {
362- if col < scol + nts {
360+ if ( ai && !prevtab && col > * scol + 1 ) || ( col > * scol && ( init || ai && prevtab) ) {
361+ while let Some ( nts) = next_tabstop ( tab_config, * scol) {
362+ if col < * scol + nts {
363363 break ;
364364 }
365365
366366 output. write_all ( b"\t " ) ?;
367- scol += nts;
367+ * scol += nts;
368368 }
369369 }
370370
371- while col > scol {
371+ while col > * scol {
372372 output. write_all ( b" " ) ?;
373- scol += 1 ;
373+ * scol += 1 ;
374374 }
375375 Ok ( ( ) )
376376}
@@ -424,101 +424,118 @@ fn next_char_info(uflag: bool, buf: &[u8], byte: usize) -> (CharType, usize, usi
424424}
425425
426426#[ allow( clippy:: cognitive_complexity) ]
427- fn unexpand_line (
428- buf : & mut Vec < u8 > ,
427+ #[ allow( clippy:: too_many_arguments) ]
428+ fn unexpand_buf (
429+ buf : & [ u8 ] ,
429430 output : & mut BufWriter < Stdout > ,
430431 options : & Options ,
431432 lastcol : usize ,
432433 tab_config : & TabConfig ,
434+ col : & mut usize ,
435+ scol : & mut usize ,
436+ leading : & mut bool ,
437+ pctype : & mut CharType ,
433438) -> UResult < ( ) > {
434- // Fast path: if we're not converting all spaces (-a flag not set)
435- // and the line doesn't start with spaces, just write it directly
436- if !options. aflag && !buf. is_empty ( ) && buf[ 0 ] != b' ' && buf[ 0 ] != b'\t' {
437- output. write_all ( buf) ?;
438- buf. truncate ( 0 ) ;
439- return Ok ( ( ) ) ;
439+ // We can only fast forward if we don't need to calculate col/scol
440+ if let Some ( b'\n' ) = buf. last ( ) {
441+ // Fast path: if we're not converting all spaces (-a flag not set)
442+ // and the line doesn't start with spaces, just write it directly
443+ if !options. aflag && !buf. is_empty ( ) && ( ( buf[ 0 ] != b' ' && buf[ 0 ] != b'\t' ) || !* leading) {
444+ write_tabs (
445+ output,
446+ tab_config,
447+ scol,
448+ * col,
449+ * pctype == CharType :: Tab ,
450+ * leading,
451+ options. aflag ,
452+ ) ?;
453+ * scol = * col;
454+ * col += buf. len ( ) ;
455+ output. write_all ( buf) ?;
456+ return Ok ( ( ) ) ;
457+ }
440458 }
441459
442460 let mut byte = 0 ; // offset into the buffer
443- let mut col = 0 ; // the current column
444- let mut scol = 0 ; // the start col for the current span, i.e., the already-printed width
445- let mut init = true ; // are we at the start of the line?
446- let mut pctype = CharType :: Other ;
447461
448- // Fast path for leading spaces in non-UTF8 mode: count consecutive spaces/tabs at start
449- if !options. uflag && !options. aflag {
450- // In default mode (not -a), we only convert leading spaces
451- // So we can batch process them and then copy the rest
452- while byte < buf. len ( ) {
453- match buf[ byte] {
454- b' ' => {
455- col += 1 ;
456- byte += 1 ;
457- }
458- b'\t' => {
459- col += next_tabstop ( tab_config, col) . unwrap_or ( 1 ) ;
460- byte += 1 ;
461- pctype = CharType :: Tab ;
462+ // We can only fast forward if we don't need to calculate col/scol
463+ if let Some ( b'\n' ) = buf. last ( ) {
464+ // Fast path for leading spaces in non-UTF8 mode: count consecutive spaces/tabs at start
465+ if !options. uflag && !options. aflag && * leading {
466+ // In default mode (not -a), we only convert leading spaces
467+ // So we can batch process them and then copy the rest
468+ while byte < buf. len ( ) {
469+ match buf[ byte] {
470+ b' ' => {
471+ * col += 1 ;
472+ byte += 1 ;
473+ }
474+ b'\t' => {
475+ * col += next_tabstop ( tab_config, * col) . unwrap_or ( 1 ) ;
476+ byte += 1 ;
477+ * pctype = CharType :: Tab ;
478+ }
479+ _ => break ,
462480 }
463- _ => break ,
464481 }
465- }
466482
467- // If we found spaces/tabs, write them as tabs
468- if byte > 0 {
469- write_tabs (
470- output,
471- tab_config,
472- 0 ,
473- col,
474- pctype == CharType :: Tab ,
475- true ,
476- true ,
477- ) ?;
478- }
483+ // If we found spaces/tabs, write them as tabs
484+ if byte > 0 {
485+ write_tabs (
486+ output,
487+ tab_config,
488+ scol ,
489+ * col,
490+ * pctype == CharType :: Tab ,
491+ true ,
492+ options . aflag ,
493+ ) ?;
494+ }
479495
480- // Write the rest of the line directly (no more tab conversion needed)
481- if byte < buf. len ( ) {
482- output. write_all ( & buf[ byte..] ) ?;
496+ // Write the rest of the line directly (no more tab conversion needed)
497+ if byte < buf. len ( ) {
498+ * leading = false ;
499+ output. write_all ( & buf[ byte..] ) ?;
500+ }
501+ return Ok ( ( ) ) ;
483502 }
484- buf. truncate ( 0 ) ;
485- return Ok ( ( ) ) ;
486503 }
487504
488505 while byte < buf. len ( ) {
489506 // when we have a finite number of columns, never convert past the last column
490- if lastcol > 0 && col >= lastcol {
507+ if lastcol > 0 && * col >= lastcol {
491508 write_tabs (
492509 output,
493510 tab_config,
494511 scol,
495- col,
496- pctype == CharType :: Tab ,
497- init ,
512+ * col,
513+ * pctype == CharType :: Tab ,
514+ * leading ,
498515 true ,
499516 ) ?;
500517 output. write_all ( & buf[ byte..] ) ?;
501- scol = col;
518+ * scol = * col;
502519 break ;
503520 }
504521
505522 // figure out how big the next char is, if it's UTF-8
506523 let ( ctype, cwidth, nbytes) = next_char_info ( options. uflag , buf, byte) ;
507524
508525 // now figure out how many columns this char takes up, and maybe print it
509- let tabs_buffered = init || options. aflag ;
526+ let tabs_buffered = * leading || options. aflag ;
510527 match ctype {
511528 CharType :: Space | CharType :: Tab => {
512529 // compute next col, but only write space or tab chars if not buffering
513- col += if ctype == CharType :: Space {
530+ * col += if ctype == CharType :: Space {
514531 1
515532 } else {
516- next_tabstop ( tab_config, col) . unwrap_or ( 1 )
533+ next_tabstop ( tab_config, * col) . unwrap_or ( 1 )
517534 } ;
518535
519536 if !tabs_buffered {
520537 output. write_all ( & buf[ byte..byte + nbytes] ) ?;
521- scol = col; // now printed up to this column
538+ * scol = * col; // now printed up to this column
522539 }
523540 }
524541 CharType :: Other | CharType :: Backspace => {
@@ -527,42 +544,30 @@ fn unexpand_line(
527544 output,
528545 tab_config,
529546 scol,
530- col,
531- pctype == CharType :: Tab ,
532- init ,
547+ * col,
548+ * pctype == CharType :: Tab ,
549+ * leading ,
533550 options. aflag ,
534551 ) ?;
535- init = false ; // no longer at the start of a line
536- col = if ctype == CharType :: Other {
552+ * leading = false ; // no longer at the start of a line
553+ * col = if ctype == CharType :: Other {
537554 // use computed width
538- col + cwidth
539- } else if col > 0 {
555+ * col + cwidth
556+ } else if * col > 0 {
540557 // Backspace case, but only if col > 0
541- col - 1
558+ * col - 1
542559 } else {
543560 0
544561 } ;
545562 output. write_all ( & buf[ byte..byte + nbytes] ) ?;
546- scol = col; // we've now printed up to this column
563+ * scol = * col; // we've now printed up to this column
547564 }
548565 }
549566
550567 byte += nbytes; // move on to next char
551- pctype = ctype; // save the previous type
568+ * pctype = ctype; // save the previous type
552569 }
553570
554- // write out anything remaining
555- write_tabs (
556- output,
557- tab_config,
558- scol,
559- col,
560- pctype == CharType :: Tab ,
561- init,
562- true ,
563- ) ?;
564- buf. truncate ( 0 ) ; // clear out the buffer
565-
566571 Ok ( ( ) )
567572}
568573
@@ -573,15 +578,49 @@ fn unexpand_file(
573578 lastcol : usize ,
574579 tab_config : & TabConfig ,
575580) -> UResult < ( ) > {
576- let mut buf = Vec :: new ( ) ;
581+ let mut buf = [ 0u8 ; 4096 ] ;
577582 let mut input = open ( file) ?;
583+ let mut col = 0 ;
584+ let mut scol = 0 ;
585+ let mut leading = true ;
586+ let mut pctype = CharType :: Other ;
578587 loop {
579- match input. read_until ( b'\n' , & mut buf) {
588+ match input. read ( & mut buf) {
580589 Ok ( 0 ) => break ,
581- Ok ( _) => unexpand_line ( & mut buf, output, options, lastcol, tab_config) ?,
590+ Ok ( n) => {
591+ for line in buf[ ..n] . split_inclusive ( |b| * b == b'\n' ) {
592+ unexpand_buf (
593+ line,
594+ output,
595+ options,
596+ lastcol,
597+ tab_config,
598+ & mut col,
599+ & mut scol,
600+ & mut leading,
601+ & mut pctype,
602+ ) ?;
603+ if let Some ( b'\n' ) = line. last ( ) {
604+ col = 0 ;
605+ scol = 0 ;
606+ leading = true ;
607+ pctype = CharType :: Other ;
608+ }
609+ }
610+ }
582611 Err ( e) => return Err ( e. map_err_context ( || file. maybe_quote ( ) . to_string ( ) ) ) ,
583612 }
584613 }
614+ // write out anything remaining
615+ write_tabs (
616+ output,
617+ tab_config,
618+ & mut scol,
619+ col,
620+ pctype == CharType :: Tab ,
621+ leading,
622+ options. aflag ,
623+ ) ?;
585624 Ok ( ( ) )
586625}
587626
0 commit comments