@@ -14,6 +14,7 @@ use std::rc::Rc;
1414use std:: slice:: Iter ;
1515use uucore:: error:: { UResult , USimpleError } ;
1616use uucore:: format_usage;
17+ use uucore:: i18n:: charmap:: mb_char_len;
1718use uucore:: line_ending:: LineEnding ;
1819use uucore:: translate;
1920
@@ -29,7 +30,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
2930 let matches = uucore:: clap_localization:: handle_clap_result ( uu_app ( ) , args) ?;
3031
3132 let serial = matches. get_flag ( options:: SERIAL ) ;
32- let delimiters = matches. get_one :: < String > ( options:: DELIMITER ) . unwrap ( ) ;
33+ let delimiters = matches. get_one :: < OsString > ( options:: DELIMITER ) . unwrap ( ) ;
3334 let files = matches
3435 . get_many :: < OsString > ( options:: FILE )
3536 . unwrap ( )
@@ -61,7 +62,8 @@ pub fn uu_app() -> Command {
6162 . help ( translate ! ( "paste-help-delimiter" ) )
6263 . value_name ( "LIST" )
6364 . default_value ( "\t " )
64- . hide_default_value ( true ) ,
65+ . hide_default_value ( true )
66+ . value_parser ( clap:: value_parser!( OsString ) ) ,
6567 )
6668 . arg (
6769 Arg :: new ( options:: FILE )
@@ -84,7 +86,7 @@ pub fn uu_app() -> Command {
8486fn paste (
8587 filenames : Vec < OsString > ,
8688 serial : bool ,
87- delimiters : & str ,
89+ delimiters : & OsString ,
8890 line_ending : LineEnding ,
8991) -> UResult < ( ) > {
9092 let unescaped_and_encoded_delimiters = parse_delimiters ( delimiters) ?;
@@ -185,65 +187,42 @@ fn paste(
185187 Ok ( ( ) )
186188}
187189
188- fn parse_delimiters ( delimiters : & str ) -> UResult < Box < [ Box < [ u8 ] > ] > > {
189- /// A single backslash char
190- const BACKSLASH : char = '\\' ;
191-
192- fn add_one_byte_single_char_delimiter ( vec : & mut Vec < Box < [ u8 ] > > , byte : u8 ) {
193- vec. push ( Box :: new ( [ byte] ) ) ;
194- }
195-
196- // a buffer of length four is large enough to encode any char
197- let mut buffer = [ 0 ; 4 ] ;
198-
199- let mut add_single_char_delimiter = |vec : & mut Vec < Box < [ u8 ] > > , ch : char | {
200- let delimiter_encoded = ch. encode_utf8 ( & mut buffer) ;
201-
202- vec. push ( Box :: < [ u8 ] > :: from ( delimiter_encoded. as_bytes ( ) ) ) ;
203- } ;
204-
205- let mut vec = Vec :: < Box < [ u8 ] > > :: with_capacity ( delimiters. len ( ) ) ;
206-
207- let mut chars = delimiters. chars ( ) ;
208-
209- // Unescape all special characters
210- while let Some ( char) = chars. next ( ) {
211- match char {
212- BACKSLASH => match chars. next ( ) {
213- // "Empty string (not a null character)"
214- // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
215- Some ( '0' ) => {
216- vec. push ( Box :: < [ u8 ; 0 ] > :: new ( [ ] ) ) ;
217- }
218- // "\\" to "\" (U+005C)
219- Some ( BACKSLASH ) => {
220- add_one_byte_single_char_delimiter ( & mut vec, b'\\' ) ;
221- }
222- // "\n" to U+000A
223- Some ( 'n' ) => {
224- add_one_byte_single_char_delimiter ( & mut vec, b'\n' ) ;
225- }
226- // "\t" to U+0009
227- Some ( 't' ) => {
228- add_one_byte_single_char_delimiter ( & mut vec, b'\t' ) ;
229- }
230- Some ( other_char) => {
231- // "If any other characters follow the <backslash>, the results are unspecified."
232- // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
233- // However, other implementations remove the backslash
234- // See "test_posix_unspecified_delimiter"
235- add_single_char_delimiter ( & mut vec, other_char) ;
236- }
237- None => {
238- return Err ( USimpleError :: new (
239- 1 ,
240- translate ! ( "paste-error-delimiter-unescaped-backslash" , "delimiters" => delimiters) ,
241- ) ) ;
190+ fn parse_delimiters ( delimiters : & OsString ) -> UResult < Box < [ Box < [ u8 ] > ] > > {
191+ let bytes = uucore:: os_string_to_vec ( delimiters. clone ( ) ) ?;
192+ let mut vec = Vec :: < Box < [ u8 ] > > :: with_capacity ( bytes. len ( ) ) ;
193+ let mut i = 0 ;
194+
195+ while i < bytes. len ( ) {
196+ if bytes[ i] == b'\\' {
197+ i += 1 ;
198+ if i >= bytes. len ( ) {
199+ return Err ( USimpleError :: new (
200+ 1 ,
201+ translate ! ( "paste-error-delimiter-unescaped-backslash" , "delimiters" => delimiters. to_string_lossy( ) ) ,
202+ ) ) ;
203+ }
204+ match bytes[ i] {
205+ b'0' => vec. push ( Box :: new ( [ ] ) ) ,
206+ b'\\' => vec. push ( Box :: new ( [ b'\\' ] ) ) ,
207+ b'n' => vec. push ( Box :: new ( [ b'\n' ] ) ) ,
208+ b't' => vec. push ( Box :: new ( [ b'\t' ] ) ) ,
209+ b'b' => vec. push ( Box :: new ( [ b'\x08' ] ) ) ,
210+ b'f' => vec. push ( Box :: new ( [ b'\x0C' ] ) ) ,
211+ b'r' => vec. push ( Box :: new ( [ b'\r' ] ) ) ,
212+ b'v' => vec. push ( Box :: new ( [ b'\x0B' ] ) ) ,
213+ _ => {
214+ // Unknown escape: strip backslash, use the following character(s)
215+ let len = mb_char_len ( & bytes[ i..] ) ;
216+ vec. push ( Box :: from ( & bytes[ i..i + len] ) ) ;
217+ i += len;
218+ continue ;
242219 }
243- } ,
244- non_backslash_char => {
245- add_single_char_delimiter ( & mut vec, non_backslash_char) ;
246220 }
221+ i += 1 ;
222+ } else {
223+ let len = mb_char_len ( & bytes[ i..] ) ;
224+ vec. push ( Box :: from ( & bytes[ i..i + len] ) ) ;
225+ i += len;
247226 }
248227 }
249228
0 commit comments