Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions src/uu/split/src/filenames.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ impl<'a> FilenameIterator<'a> {
}

impl Iterator for FilenameIterator<'_> {
type Item = String;
type Item = OsString;

fn next(&mut self) -> Option<Self::Item> {
if self.first_iteration {
Expand All @@ -344,12 +344,10 @@ impl Iterator for FilenameIterator<'_> {
}
// The first and third parts are just taken directly from the
// struct parameters unchanged.
Some(format!(
"{}{}{}",
self.prefix.to_string_lossy(),
self.number,
self.additional_suffix.to_string_lossy()
))
let mut filename = self.prefix.to_os_string();
filename.push(self.number.to_string());
filename.push(self.additional_suffix);
Some(filename)
}
}

Expand Down Expand Up @@ -512,4 +510,29 @@ mod tests {
let it = FilenameIterator::new(std::ffi::OsStr::new("chunk_"), &suffix);
assert!(it.is_err());
}
#[test]
#[cfg(unix)]
fn test_filename_iterator_preserves_non_utf8_bytes() {
use std::ffi::OsStr;
use std::os::unix::ffi::{OsStrExt, OsStringExt};

let suffix = Suffix {
stype: SuffixType::Alphabetic,
length: 2,
start: 0,
auto_widening: false,
additional: std::ffi::OsString::from_vec(vec![0xFE]),
};

let mut it = FilenameIterator::new(OsStr::from_bytes(b"p\xFF"), &suffix)
.expect("valid fixed-width filename iterator");
assert_eq!(
it.next().expect("first chunk filename exists").into_vec(),
b"p\xFFaa\xFE".to_vec()
);
assert_eq!(
it.next().expect("second chunk filename exists").into_vec(),
b"p\xFFab\xFE".to_vec()
);
}
}
30 changes: 16 additions & 14 deletions src/uu/split/src/platform/unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::env;
use std::ffi::OsStr;
use std::ffi::{OsStr, OsString};
use std::io::{BufWriter, Error, Result};
use std::io::{ErrorKind, Write};
use std::path::Path;
use std::process::{Child, Command, Stdio};
use uucore::display::Quotable;
use uucore::error::USimpleError;
use uucore::fs;
use uucore::fs::FileInformation;
Expand Down Expand Up @@ -45,12 +46,12 @@ struct WithEnvVarSet {
/// Env var key
previous_var_key: String,
/// Previous value set to this key
previous_var_value: std::result::Result<String, env::VarError>,
previous_var_value: Option<OsString>,
}
impl WithEnvVarSet {
/// Save previous value assigned to key, set key=value
fn new(key: &str, value: &str) -> Self {
let previous_env_value = env::var(key);
fn new(key: &str, value: &OsStr) -> Self {
let previous_env_value = env::var_os(key);
unsafe {
env::set_var(key, value);
}
Expand All @@ -64,7 +65,7 @@ impl WithEnvVarSet {
impl Drop for WithEnvVarSet {
/// Restore previous value now that this is being dropped by context
fn drop(&mut self) {
if let Ok(ref prev_value) = self.previous_var_value {
if let Some(prev_value) = &self.previous_var_value {
unsafe {
env::set_var(&self.previous_var_key, prev_value);
}
Expand All @@ -82,7 +83,7 @@ impl FilterWriter {
///
/// * `command` - The shell command to execute
/// * `filepath` - Path of the output file (forwarded to command as $FILE)
fn new(command: &str, filepath: &str) -> Result<Self> {
fn new(command: &str, filepath: &OsStr) -> Result<Self> {
// set $FILE, save previous value (if there was one)
let _with_env_var_set = WithEnvVarSet::new("FILE", filepath);

Expand Down Expand Up @@ -127,7 +128,7 @@ impl Drop for FilterWriter {
/// Instantiate either a file writer or a "write to shell process's stdin" writer
pub fn instantiate_current_writer(
filter: Option<&str>,
filename: &str,
filename: &OsStr,
is_new: bool,
) -> Result<BufWriter<Box<dyn Write>>> {
match filter {
Expand All @@ -138,24 +139,25 @@ pub fn instantiate_current_writer(
.write(true)
.create(true)
.truncate(true)
.open(Path::new(&filename))
.open(Path::new(filename))
.map_err(|e| match e.kind() {
ErrorKind::IsADirectory => Error::other(
translate!("split-error-is-a-directory", "dir" => filename),
translate!("split-error-is-a-directory", "dir" => filename.quote()),
),
_ => Error::other(
translate!("split-error-unable-to-open-file", "file" => filename),
translate!("split-error-unable-to-open-file", "file" => filename.quote()),
),
})?
} else {
// re-open file that we previously created to append to it
std::fs::OpenOptions::new()
.append(true)
.open(Path::new(&filename))
.open(Path::new(filename))
.map_err(|_| {
Error::other(
translate!("split-error-unable-to-reopen-file", "file" => filename),
)
Error::other(translate!(
"split-error-unable-to-reopen-file",
"file" => filename.quote()
))
})?
};
Ok(BufWriter::new(Box::new(file) as Box<dyn Write>))
Expand Down
23 changes: 13 additions & 10 deletions src/uu/split/src/platform/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::ffi::OsStr;
use std::io::{BufWriter, Error, Result};
use std::io::{ErrorKind, Write};
use std::path::Path;
use uucore::display::Quotable;
use uucore::fs;
use uucore::translate;

Expand All @@ -15,7 +16,7 @@ use uucore::translate;
/// a file writer
pub fn instantiate_current_writer(
_filter: Option<&str>,
filename: &str,
filename: &OsStr,
is_new: bool,
) -> Result<BufWriter<Box<dyn Write>>> {
let file = if is_new {
Expand All @@ -24,22 +25,24 @@ pub fn instantiate_current_writer(
.write(true)
.create(true)
.truncate(true)
.open(Path::new(&filename))
.open(Path::new(filename))
.map_err(|e| match e.kind() {
ErrorKind::IsADirectory => {
Error::other(translate!("split-error-is-a-directory", "dir" => filename))
}
_ => {
Error::other(translate!("split-error-unable-to-open-file", "file" => filename))
}
ErrorKind::IsADirectory => Error::other(
translate!("split-error-is-a-directory", "dir" => filename.quote()),
),
_ => Error::other(
translate!("split-error-unable-to-open-file", "file" => filename.quote()),
),
})?
} else {
// re-open file that we previously created to append to it
std::fs::OpenOptions::new()
.append(true)
.open(Path::new(&filename))
.open(Path::new(filename))
.map_err(|_| {
Error::other(translate!("split-error-unable-to-reopen-file", "file" => filename))
Error::other(
translate!("split-error-unable-to-reopen-file", "file" => filename.quote()),
)
})?
};
Ok(BufWriter::new(Box::new(file) as Box<dyn Write>))
Expand Down
12 changes: 6 additions & 6 deletions src/uu/split/src/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::filenames::{FilenameIterator, Suffix, SuffixError};
use crate::strategy::{NumberType, Strategy, StrategyError};
use clap::{Arg, ArgAction, ArgMatches, Command, ValueHint, parser::ValueSource};
use std::env;
use std::ffi::OsString;
use std::ffi::{OsStr, OsString};
use std::fs::{File, metadata};
use std::io;
use std::io::{BufRead, BufReader, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write, stdin};
Expand Down Expand Up @@ -540,10 +540,10 @@ impl Settings {

fn instantiate_current_writer(
&self,
filename: &str,
filename: &OsStr,
is_new: bool,
) -> io::Result<BufWriter<Box<dyn Write>>> {
if platform::paths_refer_to_same_file(&self.input, filename.as_ref()) {
if platform::paths_refer_to_same_file(&self.input, filename) {
return Err(io::Error::other(
translate!("split-error-would-overwrite-input", "file" => filename.quote()),
));
Expand Down Expand Up @@ -920,7 +920,7 @@ impl Write for LineChunkWriter<'_> {

/// Output file parameters
struct OutFile {
filename: String,
filename: OsString,
maybe_writer: Option<BufWriter<Box<dyn Write>>>,
is_new: bool,
}
Expand Down Expand Up @@ -974,7 +974,7 @@ impl ManageOutFiles for OutFiles {
let maybe_writer = if is_writer_optional {
None
} else {
let instantiated = settings.instantiate_current_writer(filename.as_str(), true);
let instantiated = settings.instantiate_current_writer(&filename, true);
// If there was an error instantiating the writer for a file,
// it could be due to hitting the system limit of open files,
// so record it as None and let [`get_writer`] function handle closing/re-opening
Expand Down Expand Up @@ -1011,7 +1011,7 @@ impl ManageOutFiles for OutFiles {
// might "steel" the freed fd and open a file on its side. Then it would be beneficial
// if split would be able to close another fd before cancellation.
'loop1: loop {
let filename_to_open = self[idx].filename.as_str();
let filename_to_open = &self[idx].filename;
let file_to_open_is_new = self[idx].is_new;
let maybe_writer =
settings.instantiate_current_writer(filename_to_open, file_to_open_is_new);
Expand Down
95 changes: 54 additions & 41 deletions tests/by-util/test_split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2021,61 +2021,74 @@ fn test_split_non_utf8_paths() {

#[test]
#[cfg(target_os = "linux")]
fn test_split_non_utf8_prefix() {
use std::os::unix::ffi::OsStrExt;
fn test_split_non_utf8_prefix_is_byte_preserving() {
use std::ffi::OsStr;
use std::os::unix::ffi::{OsStrExt, OsStringExt};

let (at, mut ucmd) = at_and_ucmd!();
at.write("input.txt", "AB");

at.write("input.txt", "line1\nline2\nline3\nline4\n");
let invalid_prefix_bytes = b"p\xFF";
at.write("p�aa", "keep-aa");
at.write("p�ab", "keep-ab");

let prefix = std::ffi::OsStr::from_bytes(b"\xFF\xFE");
ucmd.arg("input.txt").arg(prefix).succeeds();
ucmd.args(&["-b", "1", "input.txt"])
.arg(OsStr::from_bytes(invalid_prefix_bytes))
.succeeds();

// Check that split files were created (functionality works)
// The actual filename may be converted due to lossy conversion, but the command should succeed
let entries: Vec<_> = fs::read_dir(at.as_string()).unwrap().collect();
let split_files = entries
.iter()
.filter_map(|e| e.as_ref().ok())
.filter(|entry| {
let name = entry.file_name();
let name_str = name.to_string_lossy();
name_str.starts_with("�") || name_str.len() > 2 // split files should exist
let mut produced_split_files: Vec<Vec<u8>> = fs::read_dir(at.as_string())
.expect("temporary split directory should be readable")
.filter_map(|entry| {
let entry = entry.ok()?;
let name = entry.file_name().into_vec();
if name.starts_with(invalid_prefix_bytes) {
Some(name)
} else {
None
}
})
.count();
assert!(
split_files > 0,
"Expected at least one split file to be created"
.collect();
produced_split_files.sort();

assert_eq!(
produced_split_files,
vec![b"p\xFFaa".to_vec(), b"p\xFFab".to_vec()]
);
assert_eq!(at.read("p�aa"), "keep-aa");
assert_eq!(at.read("p�ab"), "keep-ab");
}

#[test]
#[cfg(target_os = "linux")]
fn test_split_non_utf8_additional_suffix() {
use std::os::unix::ffi::OsStrExt;
let (at, mut ucmd) = at_and_ucmd!();
fn test_split_non_utf8_additional_suffix_is_byte_preserving() {
use std::ffi::OsStr;
use std::os::unix::ffi::{OsStrExt, OsStringExt};

at.write("input.txt", "line1\nline2\nline3\nline4\n");
let (at, mut ucmd) = at_and_ucmd!();
at.write("input.txt", "AB");

let suffix = std::ffi::OsStr::from_bytes(b"\xFF\xFE");
ucmd.args(&["input.txt", "--additional-suffix"])
.arg(suffix)
let suffix_bytes = b"\xFF\xFE";
ucmd.args(&["-b", "1", "input.txt", "--additional-suffix"])
.arg(OsStr::from_bytes(suffix_bytes))
.succeeds();

// Check that split files were created (functionality works)
// The actual filename may be converted due to lossy conversion, but the command should succeed
let entries: Vec<_> = fs::read_dir(at.as_string()).unwrap().collect();
let split_files = entries
.iter()
.filter_map(|e| e.as_ref().ok())
.filter(|entry| {
let name = entry.file_name();
let name_str = name.to_string_lossy();
name_str.ends_with("�") || name_str.starts_with('x') // split files should exist
let mut produced_with_suffix: Vec<Vec<u8>> = fs::read_dir(at.as_string())
.expect("temporary split directory should be readable")
.filter_map(|entry| {
let entry = entry.ok()?;
let name = entry.file_name().into_vec();
if name.starts_with(b"xa") && name.ends_with(suffix_bytes) {
Some(name)
} else {
None
}
})
.count();
assert!(
split_files > 0,
"Expected at least one split file to be created"
.collect();
produced_with_suffix.sort();

assert_eq!(
produced_with_suffix,
vec![b"xaa\xFF\xFE".to_vec(), b"xab\xFF\xFE".to_vec()]
);
}

Expand All @@ -2089,5 +2102,5 @@ fn test_split_directory_already_exists() {
ucmd.args(&["file"])
.fails_with_code(1)
.no_stdout()
.stderr_is("split: xaa: Is a directory\n");
.stderr_is("split: 'xaa': Is a directory\n");
}
Loading