diff --git a/src/uu/split/src/filenames.rs b/src/uu/split/src/filenames.rs index 83b43943bed..68dc35aa215 100644 --- a/src/uu/split/src/filenames.rs +++ b/src/uu/split/src/filenames.rs @@ -334,7 +334,7 @@ impl<'a> FilenameIterator<'a> { } impl Iterator for FilenameIterator<'_> { - type Item = String; + type Item = OsString; fn next(&mut self) -> Option { if self.first_iteration { @@ -344,12 +344,10 @@ impl Iterator for FilenameIterator<'_> { } // The first and third parts are just taken directly from the // struct parameters unchanged. - Some(format!( - "{}{}{}", - self.prefix.to_string_lossy(), - self.number, - self.additional_suffix.to_string_lossy() - )) + let mut filename = self.prefix.to_os_string(); + filename.push(self.number.to_string()); + filename.push(self.additional_suffix); + Some(filename) } } @@ -512,4 +510,29 @@ mod tests { let it = FilenameIterator::new(std::ffi::OsStr::new("chunk_"), &suffix); assert!(it.is_err()); } + #[test] + #[cfg(unix)] + fn test_filename_iterator_preserves_non_utf8_bytes() { + use std::ffi::OsStr; + use std::os::unix::ffi::{OsStrExt, OsStringExt}; + + let suffix = Suffix { + stype: SuffixType::Alphabetic, + length: 2, + start: 0, + auto_widening: false, + additional: std::ffi::OsString::from_vec(vec![0xFE]), + }; + + let mut it = FilenameIterator::new(OsStr::from_bytes(b"p\xFF"), &suffix) + .expect("valid fixed-width filename iterator"); + assert_eq!( + it.next().expect("first chunk filename exists").into_vec(), + b"p\xFFaa\xFE".to_vec() + ); + assert_eq!( + it.next().expect("second chunk filename exists").into_vec(), + b"p\xFFab\xFE".to_vec() + ); + } } diff --git a/src/uu/split/src/platform/unix.rs b/src/uu/split/src/platform/unix.rs index 656bd0109be..c9e0964cc31 100644 --- a/src/uu/split/src/platform/unix.rs +++ b/src/uu/split/src/platform/unix.rs @@ -3,11 +3,12 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. use std::env; -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::io::{BufWriter, Error, Result}; use std::io::{ErrorKind, Write}; use std::path::Path; use std::process::{Child, Command, Stdio}; +use uucore::display::Quotable; use uucore::error::USimpleError; use uucore::fs; use uucore::fs::FileInformation; @@ -45,12 +46,12 @@ struct WithEnvVarSet { /// Env var key previous_var_key: String, /// Previous value set to this key - previous_var_value: std::result::Result, + previous_var_value: Option, } impl WithEnvVarSet { /// Save previous value assigned to key, set key=value - fn new(key: &str, value: &str) -> Self { - let previous_env_value = env::var(key); + fn new(key: &str, value: &OsStr) -> Self { + let previous_env_value = env::var_os(key); unsafe { env::set_var(key, value); } @@ -64,7 +65,7 @@ impl WithEnvVarSet { impl Drop for WithEnvVarSet { /// Restore previous value now that this is being dropped by context fn drop(&mut self) { - if let Ok(ref prev_value) = self.previous_var_value { + if let Some(prev_value) = &self.previous_var_value { unsafe { env::set_var(&self.previous_var_key, prev_value); } @@ -82,7 +83,7 @@ impl FilterWriter { /// /// * `command` - The shell command to execute /// * `filepath` - Path of the output file (forwarded to command as $FILE) - fn new(command: &str, filepath: &str) -> Result { + fn new(command: &str, filepath: &OsStr) -> Result { // set $FILE, save previous value (if there was one) let _with_env_var_set = WithEnvVarSet::new("FILE", filepath); @@ -127,7 +128,7 @@ impl Drop for FilterWriter { /// Instantiate either a file writer or a "write to shell process's stdin" writer pub fn instantiate_current_writer( filter: Option<&str>, - filename: &str, + filename: &OsStr, is_new: bool, ) -> Result>> { match filter { @@ -138,24 +139,25 @@ pub fn instantiate_current_writer( .write(true) .create(true) .truncate(true) - .open(Path::new(&filename)) + .open(Path::new(filename)) .map_err(|e| match e.kind() { ErrorKind::IsADirectory => Error::other( - translate!("split-error-is-a-directory", "dir" => filename), + translate!("split-error-is-a-directory", "dir" => filename.quote()), ), _ => Error::other( - translate!("split-error-unable-to-open-file", "file" => filename), + translate!("split-error-unable-to-open-file", "file" => filename.quote()), ), })? } else { // re-open file that we previously created to append to it std::fs::OpenOptions::new() .append(true) - .open(Path::new(&filename)) + .open(Path::new(filename)) .map_err(|_| { - Error::other( - translate!("split-error-unable-to-reopen-file", "file" => filename), - ) + Error::other(translate!( + "split-error-unable-to-reopen-file", + "file" => filename.quote() + )) })? }; Ok(BufWriter::new(Box::new(file) as Box)) diff --git a/src/uu/split/src/platform/windows.rs b/src/uu/split/src/platform/windows.rs index 6693e4fe909..3efef2240b9 100644 --- a/src/uu/split/src/platform/windows.rs +++ b/src/uu/split/src/platform/windows.rs @@ -6,6 +6,7 @@ use std::ffi::OsStr; use std::io::{BufWriter, Error, Result}; use std::io::{ErrorKind, Write}; use std::path::Path; +use uucore::display::Quotable; use uucore::fs; use uucore::translate; @@ -15,7 +16,7 @@ use uucore::translate; /// a file writer pub fn instantiate_current_writer( _filter: Option<&str>, - filename: &str, + filename: &OsStr, is_new: bool, ) -> Result>> { let file = if is_new { @@ -24,22 +25,24 @@ pub fn instantiate_current_writer( .write(true) .create(true) .truncate(true) - .open(Path::new(&filename)) + .open(Path::new(filename)) .map_err(|e| match e.kind() { - ErrorKind::IsADirectory => { - Error::other(translate!("split-error-is-a-directory", "dir" => filename)) - } - _ => { - Error::other(translate!("split-error-unable-to-open-file", "file" => filename)) - } + ErrorKind::IsADirectory => Error::other( + translate!("split-error-is-a-directory", "dir" => filename.quote()), + ), + _ => Error::other( + translate!("split-error-unable-to-open-file", "file" => filename.quote()), + ), })? } else { // re-open file that we previously created to append to it std::fs::OpenOptions::new() .append(true) - .open(Path::new(&filename)) + .open(Path::new(filename)) .map_err(|_| { - Error::other(translate!("split-error-unable-to-reopen-file", "file" => filename)) + Error::other( + translate!("split-error-unable-to-reopen-file", "file" => filename.quote()), + ) })? }; Ok(BufWriter::new(Box::new(file) as Box)) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 923f66ac134..31b1ec3c9ea 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -14,7 +14,7 @@ use crate::filenames::{FilenameIterator, Suffix, SuffixError}; use crate::strategy::{NumberType, Strategy, StrategyError}; use clap::{Arg, ArgAction, ArgMatches, Command, ValueHint, parser::ValueSource}; use std::env; -use std::ffi::OsString; +use std::ffi::{OsStr, OsString}; use std::fs::{File, metadata}; use std::io; use std::io::{BufRead, BufReader, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write, stdin}; @@ -540,10 +540,10 @@ impl Settings { fn instantiate_current_writer( &self, - filename: &str, + filename: &OsStr, is_new: bool, ) -> io::Result>> { - if platform::paths_refer_to_same_file(&self.input, filename.as_ref()) { + if platform::paths_refer_to_same_file(&self.input, filename) { return Err(io::Error::other( translate!("split-error-would-overwrite-input", "file" => filename.quote()), )); @@ -920,7 +920,7 @@ impl Write for LineChunkWriter<'_> { /// Output file parameters struct OutFile { - filename: String, + filename: OsString, maybe_writer: Option>>, is_new: bool, } @@ -974,7 +974,7 @@ impl ManageOutFiles for OutFiles { let maybe_writer = if is_writer_optional { None } else { - let instantiated = settings.instantiate_current_writer(filename.as_str(), true); + let instantiated = settings.instantiate_current_writer(&filename, true); // If there was an error instantiating the writer for a file, // it could be due to hitting the system limit of open files, // so record it as None and let [`get_writer`] function handle closing/re-opening @@ -1011,7 +1011,7 @@ impl ManageOutFiles for OutFiles { // might "steel" the freed fd and open a file on its side. Then it would be beneficial // if split would be able to close another fd before cancellation. 'loop1: loop { - let filename_to_open = self[idx].filename.as_str(); + let filename_to_open = &self[idx].filename; let file_to_open_is_new = self[idx].is_new; let maybe_writer = settings.instantiate_current_writer(filename_to_open, file_to_open_is_new); diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 73402dfa7c2..aa7c73ff9b8 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -2021,61 +2021,74 @@ fn test_split_non_utf8_paths() { #[test] #[cfg(target_os = "linux")] -fn test_split_non_utf8_prefix() { - use std::os::unix::ffi::OsStrExt; +fn test_split_non_utf8_prefix_is_byte_preserving() { + use std::ffi::OsStr; + use std::os::unix::ffi::{OsStrExt, OsStringExt}; + let (at, mut ucmd) = at_and_ucmd!(); + at.write("input.txt", "AB"); - at.write("input.txt", "line1\nline2\nline3\nline4\n"); + let invalid_prefix_bytes = b"p\xFF"; + at.write("p�aa", "keep-aa"); + at.write("p�ab", "keep-ab"); - let prefix = std::ffi::OsStr::from_bytes(b"\xFF\xFE"); - ucmd.arg("input.txt").arg(prefix).succeeds(); + ucmd.args(&["-b", "1", "input.txt"]) + .arg(OsStr::from_bytes(invalid_prefix_bytes)) + .succeeds(); - // Check that split files were created (functionality works) - // The actual filename may be converted due to lossy conversion, but the command should succeed - let entries: Vec<_> = fs::read_dir(at.as_string()).unwrap().collect(); - let split_files = entries - .iter() - .filter_map(|e| e.as_ref().ok()) - .filter(|entry| { - let name = entry.file_name(); - let name_str = name.to_string_lossy(); - name_str.starts_with("�") || name_str.len() > 2 // split files should exist + let mut produced_split_files: Vec> = fs::read_dir(at.as_string()) + .expect("temporary split directory should be readable") + .filter_map(|entry| { + let entry = entry.ok()?; + let name = entry.file_name().into_vec(); + if name.starts_with(invalid_prefix_bytes) { + Some(name) + } else { + None + } }) - .count(); - assert!( - split_files > 0, - "Expected at least one split file to be created" + .collect(); + produced_split_files.sort(); + + assert_eq!( + produced_split_files, + vec![b"p\xFFaa".to_vec(), b"p\xFFab".to_vec()] ); + assert_eq!(at.read("p�aa"), "keep-aa"); + assert_eq!(at.read("p�ab"), "keep-ab"); } #[test] #[cfg(target_os = "linux")] -fn test_split_non_utf8_additional_suffix() { - use std::os::unix::ffi::OsStrExt; - let (at, mut ucmd) = at_and_ucmd!(); +fn test_split_non_utf8_additional_suffix_is_byte_preserving() { + use std::ffi::OsStr; + use std::os::unix::ffi::{OsStrExt, OsStringExt}; - at.write("input.txt", "line1\nline2\nline3\nline4\n"); + let (at, mut ucmd) = at_and_ucmd!(); + at.write("input.txt", "AB"); - let suffix = std::ffi::OsStr::from_bytes(b"\xFF\xFE"); - ucmd.args(&["input.txt", "--additional-suffix"]) - .arg(suffix) + let suffix_bytes = b"\xFF\xFE"; + ucmd.args(&["-b", "1", "input.txt", "--additional-suffix"]) + .arg(OsStr::from_bytes(suffix_bytes)) .succeeds(); - // Check that split files were created (functionality works) - // The actual filename may be converted due to lossy conversion, but the command should succeed - let entries: Vec<_> = fs::read_dir(at.as_string()).unwrap().collect(); - let split_files = entries - .iter() - .filter_map(|e| e.as_ref().ok()) - .filter(|entry| { - let name = entry.file_name(); - let name_str = name.to_string_lossy(); - name_str.ends_with("�") || name_str.starts_with('x') // split files should exist + let mut produced_with_suffix: Vec> = fs::read_dir(at.as_string()) + .expect("temporary split directory should be readable") + .filter_map(|entry| { + let entry = entry.ok()?; + let name = entry.file_name().into_vec(); + if name.starts_with(b"xa") && name.ends_with(suffix_bytes) { + Some(name) + } else { + None + } }) - .count(); - assert!( - split_files > 0, - "Expected at least one split file to be created" + .collect(); + produced_with_suffix.sort(); + + assert_eq!( + produced_with_suffix, + vec![b"xaa\xFF\xFE".to_vec(), b"xab\xFF\xFE".to_vec()] ); } @@ -2089,5 +2102,5 @@ fn test_split_directory_already_exists() { ucmd.args(&["file"]) .fails_with_code(1) .no_stdout() - .stderr_is("split: xaa: Is a directory\n"); + .stderr_is("split: 'xaa': Is a directory\n"); }