@@ -91,7 +91,7 @@ struct FileLine {
9191 file_id : usize ,
9292 page_number : usize ,
9393 line_number : usize ,
94- line_content : String ,
94+ line_content : Vec < u8 > ,
9595}
9696
9797impl FileLine {
@@ -101,29 +101,24 @@ impl FileLine {
101101 line_number : usize ,
102102 buf : & [ u8 ] ,
103103 options : & OutputOptions ,
104- ) -> Result < Self , PrError > {
105- // TODO Don't read bytes to String just to directly write them
106- // out again anyway.
104+ ) -> Self {
107105 let line_content = if let Some ( expand_tabs) = & options. expand_tabs {
108- // Anticipate a few expandable chars to reduce reallocations
109- let mut line_content =
110- String :: with_capacity ( buf. len ( ) + buf. len ( ) / 20 * expand_tabs. width as usize ) ;
111- // validate utf correctness
112- let s = std:: str:: from_utf8 ( buf) ?;
113- for b in s. as_bytes ( ) {
114- apply_expand_tab ( & mut line_content, * b, expand_tabs) ;
106+ let mut result =
107+ Vec :: with_capacity ( buf. len ( ) + buf. len ( ) / 20 * expand_tabs. width as usize ) ;
108+ for b in buf {
109+ apply_expand_tab ( & mut result, * b, expand_tabs) ;
115110 }
116- line_content
111+ result
117112 } else {
118- String :: from_utf8 ( buf. to_vec ( ) ) ?
113+ buf. to_vec ( )
119114 } ;
120115
121- Ok ( Self {
116+ Self {
122117 file_id,
123118 page_number,
124119 line_number,
125120 line_content,
126- } )
121+ }
127122 }
128123}
129124
@@ -858,24 +853,24 @@ fn read_to_end(path: &str) -> Result<Vec<u8>, std::io::Error> {
858853 }
859854}
860855
861- fn apply_expand_tab ( chunk : & mut String , byte : u8 , expand_options : & ExpandTabsOptions ) {
856+ fn apply_expand_tab ( chunk : & mut Vec < u8 > , byte : u8 , expand_options : & ExpandTabsOptions ) {
862857 if byte == expand_options. input_char as u8 {
863858 // If the byte encountered is the input char we use width to calculate
864859 // the amount of spaces needed (if no input char given we stored '\t'
865860 // in our struct)
866861 let spaces_needed =
867862 expand_options. width as usize - ( chunk. len ( ) % expand_options. width as usize ) ;
868- chunk. extend ( std:: iter:: repeat_n ( ' ' , spaces_needed) ) ;
863+ chunk. extend ( std:: iter:: repeat_n ( b ' ', spaces_needed) ) ;
869864 } else if byte == TAB as u8 {
870865 // If a byte got passed to the -e flag (eg -ea1) which is not '\t' GNU
871866 // still expands it but does not use an optionally given width parameter
872867 // but does the '\t' expansion with the default value (8)
873868 let spaces_needed = 8 - ( chunk. len ( ) % 8 ) ;
874- chunk. extend ( std:: iter:: repeat_n ( ' ' , spaces_needed) ) ;
869+ chunk. extend ( std:: iter:: repeat_n ( b ' ', spaces_needed) ) ;
875870 } else {
876871 // This arm means the byte is neither '\t' nor the bytes to be
877872 // expanded
878- chunk. push ( byte as char ) ;
873+ chunk. push ( byte) ;
879874 }
880875}
881876
@@ -885,7 +880,7 @@ fn pr(path: &str, options: &OutputOptions) -> Result<i32, PrError> {
885880 // TODO Read incrementally.
886881 let buf = read_to_end ( path) ?;
887882
888- let pages = get_pages ( options, 0 , & buf) ? ;
883+ let pages = get_pages ( options, 0 , & buf) ;
889884
890885 // Split the text into pages, and then print each line in each page.
891886 for page_with_page_number in pages {
@@ -901,14 +896,7 @@ fn pr(path: &str, options: &OutputOptions) -> Result<i32, PrError> {
901896///
902897/// Returns a list of the form `(page_num, lines)`.
903898///
904- /// # Errors
905- ///
906- /// Returns an error if the bytes are not a valid UTF-8 string.
907- fn get_pages (
908- options : & OutputOptions ,
909- file_id : usize ,
910- buf : & [ u8 ] ,
911- ) -> Result < Vec < ( usize , Vec < FileLine > ) > , PrError > {
899+ fn get_pages ( options : & OutputOptions , file_id : usize , buf : & [ u8 ] ) -> Vec < ( usize , Vec < FileLine > ) > {
912900 let start_page = options. start_page ;
913901 let end_page = options. end_page ;
914902 let lines_needed_per_page = lines_to_read_for_page ( options) ;
@@ -944,7 +932,7 @@ fn get_pages(
944932 // `\f` as its own line; instead ignore the empty line.
945933 } else {
946934 let file_line =
947- FileLine :: from_buf ( file_id, page_num, line_num, & buf[ prev..i] , options) ? ;
935+ FileLine :: from_buf ( file_id, page_num, line_num, & buf[ prev..i] , options) ;
948936 page. push ( file_line) ;
949937 }
950938
@@ -970,7 +958,7 @@ fn get_pages(
970958 // `\n` as its own line; instead ignore the empty line.
971959 } else {
972960 let file_line =
973- FileLine :: from_buf ( file_id, page_num, line_num, & buf[ prev..i] , options) ? ;
961+ FileLine :: from_buf ( file_id, page_num, line_num, & buf[ prev..i] , options) ;
974962 page. push ( file_line) ;
975963 line_num += 1 ;
976964 }
@@ -992,7 +980,7 @@ fn get_pages(
992980
993981 // Consider all trailing bytes as the last line.
994982 if prev < buf. len ( ) {
995- let file_line = FileLine :: from_buf ( file_id, page_num, line_num, & buf[ prev..] , options) ? ;
983+ let file_line = FileLine :: from_buf ( file_id, page_num, line_num, & buf[ prev..] , options) ;
996984 page. push ( file_line) ;
997985 }
998986
@@ -1001,7 +989,7 @@ fn get_pages(
1001989 pages. push ( ( page_num, page. clone ( ) ) ) ;
1002990 }
1003991
1004- Ok ( pages)
992+ pages
1005993}
1006994
1007995/// Key used to group lines together according to their file and page number.
@@ -1056,7 +1044,7 @@ fn get_file_line_groups(
10561044
10571045 // Split the text into pages and collect each line for
10581046 // subsequent grouping.
1059- for ( _, mut page) in get_pages ( options, file_id, & buf) ? {
1047+ for ( _, mut page) in get_pages ( options, file_id, & buf) {
10601048 all_lines. append ( & mut page) ;
10611049 }
10621050 }
@@ -1291,7 +1279,9 @@ fn get_line_for_printing(
12911279 let blank_line = String :: new ( ) ;
12921280 let formatted_line_number = get_formatted_line_number ( options, file_line. line_number , index) ;
12931281
1294- let mut complete_line = format ! ( "{formatted_line_number}{}" , file_line. line_content) ;
1282+ // TODO: support non-UTF-8 bytes (currently replaced with U+FFFD)
1283+ let content = String :: from_utf8_lossy ( & file_line. line_content ) ;
1284+ let mut complete_line = format ! ( "{formatted_line_number}{content}" ) ;
12951285
12961286 let offset_spaces = & options. offset_spaces ;
12971287
0 commit comments