Skip to content

Commit 41affdf

Browse files
committed
pr: don't convert to String when storing lines to print
1 parent de5e4f5 commit 41affdf

1 file changed

Lines changed: 22 additions & 27 deletions

File tree

src/uu/pr/src/pr.rs

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct FileLine {
9191
file_id: usize,
9292
page_number: usize,
9393
line_number: usize,
94-
line_content: String,
94+
line_content: Vec<u8>,
9595
}
9696

9797
impl FileLine {
@@ -101,29 +101,23 @@ impl FileLine {
101101
line_number: usize,
102102
buf: &[u8],
103103
options: &OutputOptions,
104-
) -> Result<Self, PrError> {
105-
// TODO Don't read bytes to String just to directly write them
106-
// out again anyway.
104+
) -> Self {
107105
let line_content = if let Some(expand_tabs) = &options.expand_tabs {
108-
// Anticipate a few expandable chars to reduce reallocations
109-
let mut line_content =
110-
String::with_capacity(buf.len() + buf.len() / 20 * expand_tabs.width as usize);
111-
// validate utf correctness
112-
let s = std::str::from_utf8(buf)?;
113-
for b in s.as_bytes() {
114-
apply_expand_tab(&mut line_content, *b, expand_tabs);
106+
let mut result = Vec::with_capacity(buf.len() + buf.len() / 20 * expand_tabs.width as usize);
107+
for b in buf {
108+
apply_expand_tab(&mut result, *b, expand_tabs);
115109
}
116-
line_content
110+
result
117111
} else {
118-
String::from_utf8(buf.to_vec())?
112+
buf.to_vec()
119113
};
120114

121-
Ok(Self {
115+
Self {
122116
file_id,
123117
page_number,
124118
line_number,
125119
line_content,
126-
})
120+
}
127121
}
128122
}
129123

@@ -858,24 +852,24 @@ fn read_to_end(path: &str) -> Result<Vec<u8>, std::io::Error> {
858852
}
859853
}
860854

861-
fn apply_expand_tab(chunk: &mut String, byte: u8, expand_options: &ExpandTabsOptions) {
855+
fn apply_expand_tab(chunk: &mut Vec<u8>, byte: u8, expand_options: &ExpandTabsOptions) {
862856
if byte == expand_options.input_char as u8 {
863857
// If the byte encountered is the input char we use width to calculate
864858
// the amount of spaces needed (if no input char given we stored '\t'
865859
// in our struct)
866860
let spaces_needed =
867861
expand_options.width as usize - (chunk.len() % expand_options.width as usize);
868-
chunk.extend(std::iter::repeat_n(' ', spaces_needed));
862+
chunk.extend(std::iter::repeat_n(b' ', spaces_needed));
869863
} else if byte == TAB as u8 {
870864
// If a byte got passed to the -e flag (eg -ea1) which is not '\t' GNU
871865
// still expands it but does not use an optionally given width parameter
872866
// but does the '\t' expansion with the default value (8)
873867
let spaces_needed = 8 - (chunk.len() % 8);
874-
chunk.extend(std::iter::repeat_n(' ', spaces_needed));
868+
chunk.extend(std::iter::repeat_n(b' ', spaces_needed));
875869
} else {
876870
// This arm means the byte is neither '\t' nor the bytes to be
877871
// expanded
878-
chunk.push(byte as char);
872+
chunk.push(byte);
879873
}
880874
}
881875

@@ -885,7 +879,7 @@ fn pr(path: &str, options: &OutputOptions) -> Result<i32, PrError> {
885879
// TODO Read incrementally.
886880
let buf = read_to_end(path)?;
887881

888-
let pages = get_pages(options, 0, &buf)?;
882+
let pages = get_pages(options, 0, &buf);
889883

890884
// Split the text into pages, and then print each line in each page.
891885
for page_with_page_number in pages {
@@ -908,7 +902,7 @@ fn get_pages(
908902
options: &OutputOptions,
909903
file_id: usize,
910904
buf: &[u8],
911-
) -> Result<Vec<(usize, Vec<FileLine>)>, PrError> {
905+
) -> Vec<(usize, Vec<FileLine>)> {
912906
let start_page = options.start_page;
913907
let end_page = options.end_page;
914908
let lines_needed_per_page = lines_to_read_for_page(options);
@@ -944,7 +938,7 @@ fn get_pages(
944938
// `\f` as its own line; instead ignore the empty line.
945939
} else {
946940
let file_line =
947-
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?;
941+
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options);
948942
page.push(file_line);
949943
}
950944

@@ -970,7 +964,7 @@ fn get_pages(
970964
// `\n` as its own line; instead ignore the empty line.
971965
} else {
972966
let file_line =
973-
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?;
967+
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options);
974968
page.push(file_line);
975969
line_num += 1;
976970
}
@@ -992,7 +986,7 @@ fn get_pages(
992986

993987
// Consider all trailing bytes as the last line.
994988
if prev < buf.len() {
995-
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..], options)?;
989+
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..], options);
996990
page.push(file_line);
997991
}
998992

@@ -1001,7 +995,7 @@ fn get_pages(
1001995
pages.push((page_num, page.clone()));
1002996
}
1003997

1004-
Ok(pages)
998+
pages
1005999
}
10061000

10071001
/// Key used to group lines together according to their file and page number.
@@ -1056,7 +1050,7 @@ fn get_file_line_groups(
10561050

10571051
// Split the text into pages and collect each line for
10581052
// subsequent grouping.
1059-
for (_, mut page) in get_pages(options, file_id, &buf)? {
1053+
for (_, mut page) in get_pages(options, file_id, &buf) {
10601054
all_lines.append(&mut page);
10611055
}
10621056
}
@@ -1291,7 +1285,8 @@ fn get_line_for_printing(
12911285
let blank_line = String::new();
12921286
let formatted_line_number = get_formatted_line_number(options, file_line.line_number, index);
12931287

1294-
let mut complete_line = format!("{formatted_line_number}{}", file_line.line_content);
1288+
let content = String::from_utf8_lossy(&file_line.line_content);
1289+
let mut complete_line = format!("{formatted_line_number}{content}");
12951290

12961291
let offset_spaces = &options.offset_spaces;
12971292

0 commit comments

Comments
 (0)