@@ -9,11 +9,13 @@ use std::ffi::OsString;
99use std:: fs:: File ;
1010use std:: io:: { BufRead , BufReader , Stdin , Write , stdin, stdout} ;
1111use std:: iter:: Cycle ;
12+ use std:: os:: unix:: ffi:: OsStringExt ;
1213use std:: path:: Path ;
1314use std:: rc:: Rc ;
1415use std:: slice:: Iter ;
1516use uucore:: error:: { UResult , USimpleError } ;
1617use uucore:: format_usage;
18+ use uucore:: i18n:: charmap:: mb_char_len;
1719use uucore:: line_ending:: LineEnding ;
1820use uucore:: translate;
1921
@@ -29,7 +31,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
2931 let matches = uucore:: clap_localization:: handle_clap_result ( uu_app ( ) , args) ?;
3032
3133 let serial = matches. get_flag ( options:: SERIAL ) ;
32- let delimiters = matches. get_one :: < String > ( options:: DELIMITER ) . unwrap ( ) ;
34+ let delimiters = matches. get_one :: < OsString > ( options:: DELIMITER ) . unwrap ( ) ;
3335 let files = matches
3436 . get_many :: < OsString > ( options:: FILE )
3537 . unwrap ( )
@@ -61,7 +63,8 @@ pub fn uu_app() -> Command {
6163 . help ( translate ! ( "paste-help-delimiter" ) )
6264 . value_name ( "LIST" )
6365 . default_value ( "\t " )
64- . hide_default_value ( true ) ,
66+ . hide_default_value ( true )
67+ . value_parser ( clap:: value_parser!( OsString ) ) ,
6568 )
6669 . arg (
6770 Arg :: new ( options:: FILE )
@@ -84,7 +87,7 @@ pub fn uu_app() -> Command {
8487fn paste (
8588 filenames : Vec < OsString > ,
8689 serial : bool ,
87- delimiters : & str ,
90+ delimiters : & OsString ,
8891 line_ending : LineEnding ,
8992) -> UResult < ( ) > {
9093 let unescaped_and_encoded_delimiters = parse_delimiters ( delimiters) ?;
@@ -185,65 +188,42 @@ fn paste(
185188 Ok ( ( ) )
186189}
187190
188- fn parse_delimiters ( delimiters : & str ) -> UResult < Box < [ Box < [ u8 ] > ] > > {
189- /// A single backslash char
190- const BACKSLASH : char = '\\' ;
191-
192- fn add_one_byte_single_char_delimiter ( vec : & mut Vec < Box < [ u8 ] > > , byte : u8 ) {
193- vec. push ( Box :: new ( [ byte] ) ) ;
194- }
195-
196- // a buffer of length four is large enough to encode any char
197- let mut buffer = [ 0 ; 4 ] ;
198-
199- let mut add_single_char_delimiter = |vec : & mut Vec < Box < [ u8 ] > > , ch : char | {
200- let delimiter_encoded = ch. encode_utf8 ( & mut buffer) ;
201-
202- vec. push ( Box :: < [ u8 ] > :: from ( delimiter_encoded. as_bytes ( ) ) ) ;
203- } ;
204-
205- let mut vec = Vec :: < Box < [ u8 ] > > :: with_capacity ( delimiters. len ( ) ) ;
206-
207- let mut chars = delimiters. chars ( ) ;
208-
209- // Unescape all special characters
210- while let Some ( char) = chars. next ( ) {
211- match char {
212- BACKSLASH => match chars. next ( ) {
213- // "Empty string (not a null character)"
214- // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
215- Some ( '0' ) => {
216- vec. push ( Box :: < [ u8 ; 0 ] > :: new ( [ ] ) ) ;
217- }
218- // "\\" to "\" (U+005C)
219- Some ( BACKSLASH ) => {
220- add_one_byte_single_char_delimiter ( & mut vec, b'\\' ) ;
221- }
222- // "\n" to U+000A
223- Some ( 'n' ) => {
224- add_one_byte_single_char_delimiter ( & mut vec, b'\n' ) ;
225- }
226- // "\t" to U+0009
227- Some ( 't' ) => {
228- add_one_byte_single_char_delimiter ( & mut vec, b'\t' ) ;
229- }
230- Some ( other_char) => {
231- // "If any other characters follow the <backslash>, the results are unspecified."
232- // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
233- // However, other implementations remove the backslash
234- // See "test_posix_unspecified_delimiter"
235- add_single_char_delimiter ( & mut vec, other_char) ;
236- }
237- None => {
238- return Err ( USimpleError :: new (
239- 1 ,
240- translate ! ( "paste-error-delimiter-unescaped-backslash" , "delimiters" => delimiters) ,
241- ) ) ;
191+ fn parse_delimiters ( delimiters : & OsString ) -> UResult < Box < [ Box < [ u8 ] > ] > > {
192+ let bytes = delimiters. clone ( ) . into_vec ( ) ;
193+ let mut vec = Vec :: < Box < [ u8 ] > > :: with_capacity ( bytes. len ( ) ) ;
194+ let mut i = 0 ;
195+
196+ while i < bytes. len ( ) {
197+ if bytes[ i] == b'\\' {
198+ i += 1 ;
199+ if i >= bytes. len ( ) {
200+ return Err ( USimpleError :: new (
201+ 1 ,
202+ translate ! ( "paste-error-delimiter-unescaped-backslash" , "delimiters" => delimiters. to_string_lossy( ) ) ,
203+ ) ) ;
204+ }
205+ match bytes[ i] {
206+ b'0' => vec. push ( Box :: new ( [ ] ) ) ,
207+ b'\\' => vec. push ( Box :: new ( [ b'\\' ] ) ) ,
208+ b'n' => vec. push ( Box :: new ( [ b'\n' ] ) ) ,
209+ b't' => vec. push ( Box :: new ( [ b'\t' ] ) ) ,
210+ b'b' => vec. push ( Box :: new ( [ b'\x08' ] ) ) ,
211+ b'f' => vec. push ( Box :: new ( [ b'\x0C' ] ) ) ,
212+ b'r' => vec. push ( Box :: new ( [ b'\r' ] ) ) ,
213+ b'v' => vec. push ( Box :: new ( [ b'\x0B' ] ) ) ,
214+ _ => {
215+ // Unknown escape: strip backslash, use the following character(s)
216+ let len = mb_char_len ( & bytes[ i..] ) ;
217+ vec. push ( Box :: from ( & bytes[ i..i + len] ) ) ;
218+ i += len;
219+ continue ;
242220 }
243- } ,
244- non_backslash_char => {
245- add_single_char_delimiter ( & mut vec, non_backslash_char) ;
246221 }
222+ i += 1 ;
223+ } else {
224+ let len = mb_char_len ( & bytes[ i..] ) ;
225+ vec. push ( Box :: from ( & bytes[ i..i + len] ) ) ;
226+ i += len;
247227 }
248228 }
249229
0 commit comments