88use clap:: { Arg , ArgAction , Command } ;
99use std:: ffi:: OsString ;
1010use std:: fs:: File ;
11- use std:: io:: { BufRead , BufReader , BufWriter , Read , Stdout , Write , stdin, stdout} ;
11+ use std:: io:: { BufReader , BufWriter , Read , Stdout , Write , stdin, stdout} ;
1212use std:: num:: IntErrorKind ;
1313use std:: path:: Path ;
1414use std:: str:: from_utf8;
@@ -347,7 +347,7 @@ fn next_tabstop(tab_config: &TabConfig, col: usize) -> Option<usize> {
347347fn write_tabs (
348348 output : & mut BufWriter < Stdout > ,
349349 tab_config : & TabConfig ,
350- mut scol : usize ,
350+ scol : & mut usize ,
351351 col : usize ,
352352 prevtab : bool ,
353353 init : bool ,
@@ -357,20 +357,20 @@ fn write_tabs(
357357 // We never turn a single space before a non-blank into
358358 // a tab, unless it's at the start of the line.
359359 let ai = init || amode;
360- if ( ai && !prevtab && col > scol + 1 ) || ( col > scol && ( init || ai && prevtab) ) {
361- while let Some ( nts) = next_tabstop ( tab_config, scol) {
362- if col < scol + nts {
360+ if ( ai && !prevtab && col > * scol + 1 ) || ( col > * scol && ( init || ai && prevtab) ) {
361+ while let Some ( nts) = next_tabstop ( tab_config, * scol) {
362+ if col < * scol + nts {
363363 break ;
364364 }
365365
366366 output. write_all ( b"\t " ) ?;
367- scol += nts;
367+ * scol += nts;
368368 }
369369 }
370370
371- while col > scol {
371+ while col > * scol {
372372 output. write_all ( b" " ) ?;
373- scol += 1 ;
373+ * scol += 1 ;
374374 }
375375 Ok ( ( ) )
376376}
@@ -424,101 +424,108 @@ fn next_char_info(uflag: bool, buf: &[u8], byte: usize) -> (CharType, usize, usi
424424}
425425
426426#[ allow( clippy:: cognitive_complexity) ]
427+ #[ allow( clippy:: too_many_arguments) ]
427428fn unexpand_line (
428- buf : & mut Vec < u8 > ,
429+ buf : & [ u8 ] ,
429430 output : & mut BufWriter < Stdout > ,
430431 options : & Options ,
431432 lastcol : usize ,
432433 tab_config : & TabConfig ,
434+ col : & mut usize ,
435+ scol : & mut usize ,
436+ leading : & mut bool ,
433437) -> UResult < ( ) > {
434- // Fast path: if we're not converting all spaces (-a flag not set)
435- // and the line doesn't start with spaces, just write it directly
436- if !options. aflag && !buf. is_empty ( ) && buf[ 0 ] != b' ' && buf[ 0 ] != b'\t' {
437- output. write_all ( buf) ?;
438- buf. truncate ( 0 ) ;
439- return Ok ( ( ) ) ;
438+ // We can only fast forward if we don't need to calculate col/scol
439+ if let Some ( b'\n' ) = buf. last ( ) {
440+ // Fast path: if we're not converting all spaces (-a flag not set)
441+ // and the line doesn't start with spaces, just write it directly
442+ if !options. aflag && !buf. is_empty ( ) && ( ( buf[ 0 ] != b' ' && buf[ 0 ] != b'\t' ) || !* leading) {
443+ * col += buf. len ( ) ;
444+ output. write_all ( buf) ?;
445+ return Ok ( ( ) ) ;
446+ }
440447 }
441448
442449 let mut byte = 0 ; // offset into the buffer
443- let mut col = 0 ; // the current column
444- let mut scol = 0 ; // the start col for the current span, i.e., the already-printed width
445- let mut init = true ; // are we at the start of the line?
446450 let mut pctype = CharType :: Other ;
447451
448- // Fast path for leading spaces in non-UTF8 mode: count consecutive spaces/tabs at start
449- if !options. uflag && !options. aflag {
450- // In default mode (not -a), we only convert leading spaces
451- // So we can batch process them and then copy the rest
452- while byte < buf. len ( ) {
453- match buf[ byte] {
454- b' ' => {
455- col += 1 ;
456- byte += 1 ;
457- }
458- b'\t' => {
459- col += next_tabstop ( tab_config, col) . unwrap_or ( 1 ) ;
460- byte += 1 ;
461- pctype = CharType :: Tab ;
452+ // We can only fast forward if we don't need to calculate col/scol
453+ if let Some ( b'\n' ) = buf. last ( ) {
454+ // Fast path for leading spaces in non-UTF8 mode: count consecutive spaces/tabs at start
455+ if !options. uflag && !options. aflag && * leading {
456+ // In default mode (not -a), we only convert leading spaces
457+ // So we can batch process them and then copy the rest
458+ while byte < buf. len ( ) {
459+ match buf[ byte] {
460+ b' ' => {
461+ * col += 1 ;
462+ byte += 1 ;
463+ }
464+ b'\t' => {
465+ * col += next_tabstop ( tab_config, * col) . unwrap_or ( 1 ) ;
466+ byte += 1 ;
467+ pctype = CharType :: Tab ;
468+ }
469+ _ => break ,
462470 }
463- _ => break ,
464471 }
465- }
466472
467- // If we found spaces/tabs, write them as tabs
468- if byte > 0 {
469- write_tabs (
470- output,
471- tab_config,
472- 0 ,
473- col,
474- pctype == CharType :: Tab ,
475- true ,
476- true ,
477- ) ?;
478- }
473+ // If we found spaces/tabs, write them as tabs
474+ if byte > 0 {
475+ write_tabs (
476+ output,
477+ tab_config,
478+ scol ,
479+ * col,
480+ pctype == CharType :: Tab ,
481+ true ,
482+ true ,
483+ ) ?;
484+ }
479485
480- // Write the rest of the line directly (no more tab conversion needed)
481- if byte < buf. len ( ) {
482- output. write_all ( & buf[ byte..] ) ?;
486+ // Write the rest of the line directly (no more tab conversion needed)
487+ if byte < buf. len ( ) {
488+ * leading = false ;
489+ output. write_all ( & buf[ byte..] ) ?;
490+ }
491+ return Ok ( ( ) ) ;
483492 }
484- buf. truncate ( 0 ) ;
485- return Ok ( ( ) ) ;
486493 }
487494
488495 while byte < buf. len ( ) {
489496 // when we have a finite number of columns, never convert past the last column
490- if lastcol > 0 && col >= lastcol {
497+ if lastcol > 0 && * col >= lastcol {
491498 write_tabs (
492499 output,
493500 tab_config,
494501 scol,
495- col,
502+ * col,
496503 pctype == CharType :: Tab ,
497- init ,
504+ * leading ,
498505 true ,
499506 ) ?;
500507 output. write_all ( & buf[ byte..] ) ?;
501- scol = col;
508+ * scol = * col;
502509 break ;
503510 }
504511
505512 // figure out how big the next char is, if it's UTF-8
506513 let ( ctype, cwidth, nbytes) = next_char_info ( options. uflag , buf, byte) ;
507514
508515 // now figure out how many columns this char takes up, and maybe print it
509- let tabs_buffered = init || options. aflag ;
516+ let tabs_buffered = * leading || options. aflag ;
510517 match ctype {
511518 CharType :: Space | CharType :: Tab => {
512519 // compute next col, but only write space or tab chars if not buffering
513- col += if ctype == CharType :: Space {
520+ * col += if ctype == CharType :: Space {
514521 1
515522 } else {
516- next_tabstop ( tab_config, col) . unwrap_or ( 1 )
523+ next_tabstop ( tab_config, * col) . unwrap_or ( 1 )
517524 } ;
518525
519526 if !tabs_buffered {
520527 output. write_all ( & buf[ byte..byte + nbytes] ) ?;
521- scol = col; // now printed up to this column
528+ * scol = * col; // now printed up to this column
522529 }
523530 }
524531 CharType :: Other | CharType :: Backspace => {
@@ -527,23 +534,23 @@ fn unexpand_line(
527534 output,
528535 tab_config,
529536 scol,
530- col,
537+ * col,
531538 pctype == CharType :: Tab ,
532- init ,
539+ * leading ,
533540 options. aflag ,
534541 ) ?;
535- init = false ; // no longer at the start of a line
536- col = if ctype == CharType :: Other {
542+ * leading = false ; // no longer at the start of a line
543+ * col = if ctype == CharType :: Other {
537544 // use computed width
538- col + cwidth
539- } else if col > 0 {
545+ * col + cwidth
546+ } else if * col > 0 {
540547 // Backspace case, but only if col > 0
541- col - 1
548+ * col - 1
542549 } else {
543550 0
544551 } ;
545552 output. write_all ( & buf[ byte..byte + nbytes] ) ?;
546- scol = col; // we've now printed up to this column
553+ * scol = * col; // we've now printed up to this column
547554 }
548555 }
549556
@@ -556,12 +563,11 @@ fn unexpand_line(
556563 output,
557564 tab_config,
558565 scol,
559- col,
566+ * col,
560567 pctype == CharType :: Tab ,
561- init ,
568+ * leading ,
562569 true ,
563570 ) ?;
564- buf. truncate ( 0 ) ; // clear out the buffer
565571
566572 Ok ( ( ) )
567573}
@@ -573,12 +579,33 @@ fn unexpand_file(
573579 lastcol : usize ,
574580 tab_config : & TabConfig ,
575581) -> UResult < ( ) > {
576- let mut buf = Vec :: new ( ) ;
582+ let mut buf = [ 0u8 ; 4096 ] ;
577583 let mut input = open ( file) ?;
584+ let mut col = 0 ;
585+ let mut scol = 0 ;
586+ let mut leading = true ;
578587 loop {
579- match input. read_until ( b'\n' , & mut buf) {
588+ match input. read ( & mut buf) {
580589 Ok ( 0 ) => break ,
581- Ok ( _) => unexpand_line ( & mut buf, output, options, lastcol, tab_config) ?,
590+ Ok ( n) => {
591+ for line in buf[ ..n] . split_inclusive ( |b| * b == b'\n' ) {
592+ unexpand_line (
593+ line,
594+ output,
595+ options,
596+ lastcol,
597+ tab_config,
598+ & mut col,
599+ & mut scol,
600+ & mut leading,
601+ ) ?;
602+ if let Some ( b'\n' ) = line. last ( ) {
603+ col = 0 ;
604+ scol = 0 ;
605+ leading = true ;
606+ }
607+ }
608+ }
582609 Err ( e) => return Err ( e. map_err_context ( || file. maybe_quote ( ) . to_string ( ) ) ) ,
583610 }
584611 }
0 commit comments