Skip to content

Commit bc58d3d

Browse files
authored
pr: don't convert to String when storing lines to print (#11327)
* pr: don't convert to String when storing lines to print * pr: apply cargo fmt * pr: add todo for non-UTF-8 support * pr: cargo fmt fix
1 parent 309efd8 commit bc58d3d

1 file changed

Lines changed: 24 additions & 34 deletions

File tree

src/uu/pr/src/pr.rs

Lines changed: 24 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct FileLine {
9191
file_id: usize,
9292
page_number: usize,
9393
line_number: usize,
94-
line_content: String,
94+
line_content: Vec<u8>,
9595
}
9696

9797
impl FileLine {
@@ -101,29 +101,24 @@ impl FileLine {
101101
line_number: usize,
102102
buf: &[u8],
103103
options: &OutputOptions,
104-
) -> Result<Self, PrError> {
105-
// TODO Don't read bytes to String just to directly write them
106-
// out again anyway.
104+
) -> Self {
107105
let line_content = if let Some(expand_tabs) = &options.expand_tabs {
108-
// Anticipate a few expandable chars to reduce reallocations
109-
let mut line_content =
110-
String::with_capacity(buf.len() + buf.len() / 20 * expand_tabs.width as usize);
111-
// validate utf correctness
112-
let s = std::str::from_utf8(buf)?;
113-
for b in s.as_bytes() {
114-
apply_expand_tab(&mut line_content, *b, expand_tabs);
106+
let mut result =
107+
Vec::with_capacity(buf.len() + buf.len() / 20 * expand_tabs.width as usize);
108+
for b in buf {
109+
apply_expand_tab(&mut result, *b, expand_tabs);
115110
}
116-
line_content
111+
result
117112
} else {
118-
String::from_utf8(buf.to_vec())?
113+
buf.to_vec()
119114
};
120115

121-
Ok(Self {
116+
Self {
122117
file_id,
123118
page_number,
124119
line_number,
125120
line_content,
126-
})
121+
}
127122
}
128123
}
129124

@@ -858,24 +853,24 @@ fn read_to_end(path: &str) -> Result<Vec<u8>, std::io::Error> {
858853
}
859854
}
860855

861-
fn apply_expand_tab(chunk: &mut String, byte: u8, expand_options: &ExpandTabsOptions) {
856+
fn apply_expand_tab(chunk: &mut Vec<u8>, byte: u8, expand_options: &ExpandTabsOptions) {
862857
if byte == expand_options.input_char as u8 {
863858
// If the byte encountered is the input char we use width to calculate
864859
// the amount of spaces needed (if no input char given we stored '\t'
865860
// in our struct)
866861
let spaces_needed =
867862
expand_options.width as usize - (chunk.len() % expand_options.width as usize);
868-
chunk.extend(std::iter::repeat_n(' ', spaces_needed));
863+
chunk.extend(std::iter::repeat_n(b' ', spaces_needed));
869864
} else if byte == TAB as u8 {
870865
// If a byte got passed to the -e flag (eg -ea1) which is not '\t' GNU
871866
// still expands it but does not use an optionally given width parameter
872867
// but does the '\t' expansion with the default value (8)
873868
let spaces_needed = 8 - (chunk.len() % 8);
874-
chunk.extend(std::iter::repeat_n(' ', spaces_needed));
869+
chunk.extend(std::iter::repeat_n(b' ', spaces_needed));
875870
} else {
876871
// This arm means the byte is neither '\t' nor the bytes to be
877872
// expanded
878-
chunk.push(byte as char);
873+
chunk.push(byte);
879874
}
880875
}
881876

@@ -885,7 +880,7 @@ fn pr(path: &str, options: &OutputOptions) -> Result<i32, PrError> {
885880
// TODO Read incrementally.
886881
let buf = read_to_end(path)?;
887882

888-
let pages = get_pages(options, 0, &buf)?;
883+
let pages = get_pages(options, 0, &buf);
889884

890885
// Split the text into pages, and then print each line in each page.
891886
for page_with_page_number in pages {
@@ -901,14 +896,7 @@ fn pr(path: &str, options: &OutputOptions) -> Result<i32, PrError> {
901896
///
902897
/// Returns a list of the form `(page_num, lines)`.
903898
///
904-
/// # Errors
905-
///
906-
/// Returns an error if the bytes are not a valid UTF-8 string.
907-
fn get_pages(
908-
options: &OutputOptions,
909-
file_id: usize,
910-
buf: &[u8],
911-
) -> Result<Vec<(usize, Vec<FileLine>)>, PrError> {
899+
fn get_pages(options: &OutputOptions, file_id: usize, buf: &[u8]) -> Vec<(usize, Vec<FileLine>)> {
912900
let start_page = options.start_page;
913901
let end_page = options.end_page;
914902
let lines_needed_per_page = lines_to_read_for_page(options);
@@ -944,7 +932,7 @@ fn get_pages(
944932
// `\f` as its own line; instead ignore the empty line.
945933
} else {
946934
let file_line =
947-
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?;
935+
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options);
948936
page.push(file_line);
949937
}
950938

@@ -970,7 +958,7 @@ fn get_pages(
970958
// `\n` as its own line; instead ignore the empty line.
971959
} else {
972960
let file_line =
973-
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?;
961+
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options);
974962
page.push(file_line);
975963
line_num += 1;
976964
}
@@ -992,7 +980,7 @@ fn get_pages(
992980

993981
// Consider all trailing bytes as the last line.
994982
if prev < buf.len() {
995-
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..], options)?;
983+
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..], options);
996984
page.push(file_line);
997985
}
998986

@@ -1001,7 +989,7 @@ fn get_pages(
1001989
pages.push((page_num, page.clone()));
1002990
}
1003991

1004-
Ok(pages)
992+
pages
1005993
}
1006994

1007995
/// Key used to group lines together according to their file and page number.
@@ -1056,7 +1044,7 @@ fn get_file_line_groups(
10561044

10571045
// Split the text into pages and collect each line for
10581046
// subsequent grouping.
1059-
for (_, mut page) in get_pages(options, file_id, &buf)? {
1047+
for (_, mut page) in get_pages(options, file_id, &buf) {
10601048
all_lines.append(&mut page);
10611049
}
10621050
}
@@ -1291,7 +1279,9 @@ fn get_line_for_printing(
12911279
let blank_line = String::new();
12921280
let formatted_line_number = get_formatted_line_number(options, file_line.line_number, index);
12931281

1294-
let mut complete_line = format!("{formatted_line_number}{}", file_line.line_content);
1282+
// TODO: support non-UTF-8 bytes (currently replaced with U+FFFD)
1283+
let content = String::from_utf8_lossy(&file_line.line_content);
1284+
let mut complete_line = format!("{formatted_line_number}{content}");
12951285

12961286
let offset_spaces = &options.offset_spaces;
12971287

0 commit comments

Comments
 (0)