Skip to content

Commit 1f4a976

Browse files
authored
Revert "unexpand: use buffered read & improve performance by 34.66% (#10798)" (#10819)
This reverts commit 4bbd71f.
1 parent 4c1536b commit 1f4a976

22 files changed

Lines changed: 73 additions & 283 deletions

src/uu/unexpand/src/unexpand.rs

Lines changed: 73 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use clap::{Arg, ArgAction, Command};
99
use std::ffi::OsString;
1010
use std::fs::File;
11-
use std::io::{BufReader, BufWriter, Read, Stdout, Write, stdin, stdout};
11+
use std::io::{BufRead, BufReader, BufWriter, Read, Stdout, Write, stdin, stdout};
1212
use std::num::IntErrorKind;
1313
use std::path::Path;
1414
use std::str::from_utf8;
@@ -347,7 +347,7 @@ fn next_tabstop(tab_config: &TabConfig, col: usize) -> Option<usize> {
347347
fn write_tabs(
348348
output: &mut BufWriter<Stdout>,
349349
tab_config: &TabConfig,
350-
scol: &mut usize,
350+
mut scol: usize,
351351
col: usize,
352352
prevtab: bool,
353353
init: bool,
@@ -357,20 +357,20 @@ fn write_tabs(
357357
// We never turn a single space before a non-blank into
358358
// a tab, unless it's at the start of the line.
359359
let ai = init || amode;
360-
if (ai && !prevtab && col > *scol + 1) || (col > *scol && (init || ai && prevtab)) {
361-
while let Some(nts) = next_tabstop(tab_config, *scol) {
362-
if col < *scol + nts {
360+
if (ai && !prevtab && col > scol + 1) || (col > scol && (init || ai && prevtab)) {
361+
while let Some(nts) = next_tabstop(tab_config, scol) {
362+
if col < scol + nts {
363363
break;
364364
}
365365

366366
output.write_all(b"\t")?;
367-
*scol += nts;
367+
scol += nts;
368368
}
369369
}
370370

371-
while col > *scol {
371+
while col > scol {
372372
output.write_all(b" ")?;
373-
*scol += 1;
373+
scol += 1;
374374
}
375375
Ok(())
376376
}
@@ -424,108 +424,101 @@ fn next_char_info(uflag: bool, buf: &[u8], byte: usize) -> (CharType, usize, usi
424424
}
425425

426426
#[allow(clippy::cognitive_complexity)]
427-
#[allow(clippy::too_many_arguments)]
428427
fn unexpand_line(
429-
buf: &[u8],
428+
buf: &mut Vec<u8>,
430429
output: &mut BufWriter<Stdout>,
431430
options: &Options,
432431
lastcol: usize,
433432
tab_config: &TabConfig,
434-
col: &mut usize,
435-
scol: &mut usize,
436-
leading: &mut bool,
437433
) -> UResult<()> {
438-
// We can only fast forward if we don't need to calculate col/scol
439-
if let Some(b'\n') = buf.last() {
440-
// Fast path: if we're not converting all spaces (-a flag not set)
441-
// and the line doesn't start with spaces, just write it directly
442-
if !options.aflag && !buf.is_empty() && ((buf[0] != b' ' && buf[0] != b'\t') || !*leading) {
443-
*col += buf.len();
444-
output.write_all(buf)?;
445-
return Ok(());
446-
}
434+
// Fast path: if we're not converting all spaces (-a flag not set)
435+
// and the line doesn't start with spaces, just write it directly
436+
if !options.aflag && !buf.is_empty() && buf[0] != b' ' && buf[0] != b'\t' {
437+
output.write_all(buf)?;
438+
buf.truncate(0);
439+
return Ok(());
447440
}
448441

449442
let mut byte = 0; // offset into the buffer
443+
let mut col = 0; // the current column
444+
let mut scol = 0; // the start col for the current span, i.e., the already-printed width
445+
let mut init = true; // are we at the start of the line?
450446
let mut pctype = CharType::Other;
451447

452-
// We can only fast forward if we don't need to calculate col/scol
453-
if let Some(b'\n') = buf.last() {
454-
// Fast path for leading spaces in non-UTF8 mode: count consecutive spaces/tabs at start
455-
if !options.uflag && !options.aflag && *leading {
456-
// In default mode (not -a), we only convert leading spaces
457-
// So we can batch process them and then copy the rest
458-
while byte < buf.len() {
459-
match buf[byte] {
460-
b' ' => {
461-
*col += 1;
462-
byte += 1;
463-
}
464-
b'\t' => {
465-
*col += next_tabstop(tab_config, *col).unwrap_or(1);
466-
byte += 1;
467-
pctype = CharType::Tab;
468-
}
469-
_ => break,
448+
// Fast path for leading spaces in non-UTF8 mode: count consecutive spaces/tabs at start
449+
if !options.uflag && !options.aflag {
450+
// In default mode (not -a), we only convert leading spaces
451+
// So we can batch process them and then copy the rest
452+
while byte < buf.len() {
453+
match buf[byte] {
454+
b' ' => {
455+
col += 1;
456+
byte += 1;
470457
}
458+
b'\t' => {
459+
col += next_tabstop(tab_config, col).unwrap_or(1);
460+
byte += 1;
461+
pctype = CharType::Tab;
462+
}
463+
_ => break,
471464
}
465+
}
472466

473-
// If we found spaces/tabs, write them as tabs
474-
if byte > 0 {
475-
write_tabs(
476-
output,
477-
tab_config,
478-
scol,
479-
*col,
480-
pctype == CharType::Tab,
481-
true,
482-
true,
483-
)?;
484-
}
467+
// If we found spaces/tabs, write them as tabs
468+
if byte > 0 {
469+
write_tabs(
470+
output,
471+
tab_config,
472+
0,
473+
col,
474+
pctype == CharType::Tab,
475+
true,
476+
true,
477+
)?;
478+
}
485479

486-
// Write the rest of the line directly (no more tab conversion needed)
487-
if byte < buf.len() {
488-
*leading = false;
489-
output.write_all(&buf[byte..])?;
490-
}
491-
return Ok(());
480+
// Write the rest of the line directly (no more tab conversion needed)
481+
if byte < buf.len() {
482+
output.write_all(&buf[byte..])?;
492483
}
484+
buf.truncate(0);
485+
return Ok(());
493486
}
494487

495488
while byte < buf.len() {
496489
// when we have a finite number of columns, never convert past the last column
497-
if lastcol > 0 && *col >= lastcol {
490+
if lastcol > 0 && col >= lastcol {
498491
write_tabs(
499492
output,
500493
tab_config,
501494
scol,
502-
*col,
495+
col,
503496
pctype == CharType::Tab,
504-
*leading,
497+
init,
505498
true,
506499
)?;
507500
output.write_all(&buf[byte..])?;
508-
*scol = *col;
501+
scol = col;
509502
break;
510503
}
511504

512505
// figure out how big the next char is, if it's UTF-8
513506
let (ctype, cwidth, nbytes) = next_char_info(options.uflag, buf, byte);
514507

515508
// now figure out how many columns this char takes up, and maybe print it
516-
let tabs_buffered = *leading || options.aflag;
509+
let tabs_buffered = init || options.aflag;
517510
match ctype {
518511
CharType::Space | CharType::Tab => {
519512
// compute next col, but only write space or tab chars if not buffering
520-
*col += if ctype == CharType::Space {
513+
col += if ctype == CharType::Space {
521514
1
522515
} else {
523-
next_tabstop(tab_config, *col).unwrap_or(1)
516+
next_tabstop(tab_config, col).unwrap_or(1)
524517
};
525518

526519
if !tabs_buffered {
527520
output.write_all(&buf[byte..byte + nbytes])?;
528-
*scol = *col; // now printed up to this column
521+
scol = col; // now printed up to this column
529522
}
530523
}
531524
CharType::Other | CharType::Backspace => {
@@ -534,23 +527,23 @@ fn unexpand_line(
534527
output,
535528
tab_config,
536529
scol,
537-
*col,
530+
col,
538531
pctype == CharType::Tab,
539-
*leading,
532+
init,
540533
options.aflag,
541534
)?;
542-
*leading = false; // no longer at the start of a line
543-
*col = if ctype == CharType::Other {
535+
init = false; // no longer at the start of a line
536+
col = if ctype == CharType::Other {
544537
// use computed width
545-
*col + cwidth
546-
} else if *col > 0 {
538+
col + cwidth
539+
} else if col > 0 {
547540
// Backspace case, but only if col > 0
548-
*col - 1
541+
col - 1
549542
} else {
550543
0
551544
};
552545
output.write_all(&buf[byte..byte + nbytes])?;
553-
*scol = *col; // we've now printed up to this column
546+
scol = col; // we've now printed up to this column
554547
}
555548
}
556549

@@ -563,11 +556,12 @@ fn unexpand_line(
563556
output,
564557
tab_config,
565558
scol,
566-
*col,
559+
col,
567560
pctype == CharType::Tab,
568-
*leading,
561+
init,
569562
true,
570563
)?;
564+
buf.truncate(0); // clear out the buffer
571565

572566
Ok(())
573567
}
@@ -579,33 +573,12 @@ fn unexpand_file(
579573
lastcol: usize,
580574
tab_config: &TabConfig,
581575
) -> UResult<()> {
582-
let mut buf = [0u8; 4096];
576+
let mut buf = Vec::new();
583577
let mut input = open(file)?;
584-
let mut col = 0;
585-
let mut scol = 0;
586-
let mut leading = true;
587578
loop {
588-
match input.read(&mut buf) {
579+
match input.read_until(b'\n', &mut buf) {
589580
Ok(0) => break,
590-
Ok(n) => {
591-
for line in buf[..n].split_inclusive(|b| *b == b'\n') {
592-
unexpand_line(
593-
line,
594-
output,
595-
options,
596-
lastcol,
597-
tab_config,
598-
&mut col,
599-
&mut scol,
600-
&mut leading,
601-
)?;
602-
if let Some(b'\n') = line.last() {
603-
col = 0;
604-
scol = 0;
605-
leading = true;
606-
}
607-
}
608-
}
581+
Ok(_) => unexpand_line(&mut buf, output, options, lastcol, tab_config)?,
609582
Err(e) => return Err(e.map_err_context(|| file.maybe_quote().to_string())),
610583
}
611584
}

0 commit comments

Comments
 (0)