Skip to content

Commit 17757b4

Browse files
committed
💥 handle CR/CRLF in -T/--files-from
1 parent 176cb4f commit 17757b4

5 files changed

Lines changed: 219 additions & 7 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cli/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ itertools = "0.14.0"
3333
jiff = "0.2.18"
3434
libc = "0.2.180"
3535
log = "0.4.29"
36+
memchr = "2.7.4"
3637
memmap2 = { version = "0.9.9", optional = true }
3738
nom = "8.0.0"
3839
parse_datetime = "0.13.3"

cli/src/ext/read_buf.rs

Lines changed: 143 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
1+
use memchr::memchr2;
12
use std::io::{self, BufRead};
23

4+
/// Converts a byte vector to a UTF-8 String, mapping encoding errors to `io::Error`.
5+
#[inline]
6+
fn bytes_to_string(bytes: Vec<u8>) -> io::Result<String> {
7+
String::from_utf8(bytes).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
8+
}
9+
310
#[derive(Clone, Debug)]
411
pub(crate) struct Delimited<'d, B> {
512
buf: B,
@@ -45,16 +52,82 @@ impl<'d, B: BufRead> Iterator for DelimitedString<'d, B> {
4552

4653
#[inline]
4754
fn next(&mut self) -> Option<Self::Item> {
48-
match self.inner.next()? {
49-
Err(e) => Some(Err(e)),
50-
Ok(bytes) => match String::from_utf8(bytes) {
51-
Ok(s) => Some(Ok(s)),
52-
Err(err) => Some(Err(io::Error::new(io::ErrorKind::InvalidData, err))),
53-
},
55+
Some(self.inner.next()?.and_then(bytes_to_string))
56+
}
57+
}
58+
59+
#[derive(Clone, Debug)]
60+
pub(crate) struct LineBreakDelimited<B> {
61+
buf: B,
62+
pending: Vec<u8>,
63+
done: bool,
64+
}
65+
66+
impl<B: BufRead> LineBreakDelimited<B> {
67+
#[inline]
68+
fn new(buf: B) -> Self {
69+
Self {
70+
buf,
71+
pending: Vec::new(),
72+
done: false,
73+
}
74+
}
75+
}
76+
77+
impl<B: BufRead> Iterator for LineBreakDelimited<B> {
78+
type Item = io::Result<Vec<u8>>;
79+
80+
#[inline]
81+
fn next(&mut self) -> Option<Self::Item> {
82+
if self.done {
83+
return None;
84+
}
85+
loop {
86+
let buf = match self.buf.fill_buf() {
87+
Ok(buf) => buf,
88+
Err(err) => return Some(Err(err)),
89+
};
90+
91+
if buf.is_empty() {
92+
self.done = true;
93+
return if self.pending.is_empty() {
94+
None
95+
} else {
96+
Some(Ok(std::mem::take(&mut self.pending)))
97+
};
98+
}
99+
100+
match memchr2(b'\n', b'\r', buf) {
101+
Some(idx) => {
102+
self.pending.extend_from_slice(&buf[..idx]);
103+
self.buf.consume(idx + 1);
104+
return Some(Ok(std::mem::take(&mut self.pending)));
105+
}
106+
None => {
107+
let len = buf.len();
108+
self.pending.extend_from_slice(buf);
109+
self.buf.consume(len);
110+
}
111+
}
54112
}
55113
}
56114
}
57115

116+
/// Adapter that wraps the line-break iterator and yields UTF-8 Strings.
117+
#[derive(Debug)]
118+
pub(crate) struct LineBreakDelimitedString<B> {
119+
inner: LineBreakDelimited<B>,
120+
}
121+
122+
impl<B: BufRead> Iterator for LineBreakDelimitedString<B> {
123+
type Item = io::Result<String>;
124+
125+
#[inline]
126+
fn next(&mut self) -> Option<Self::Item> {
127+
Some(self.inner.next()?.and_then(bytes_to_string))
128+
}
129+
}
130+
58131
/// Extension trait to add same utility methods to [`BufRead`].
59132
pub(crate) trait BufReadExt {
60133
/// Splits the reader by a delimiter, yielding Vec<u8>.
@@ -79,6 +152,21 @@ pub(crate) trait BufReadExt {
79152
inner: self.delimit_by(delimiter.as_bytes()),
80153
}
81154
}
155+
156+
/// Splits lines on CR, CRLF, or LF, yielding Strings.
157+
///
158+
/// Both `\r` and `\n` are treated as individual line terminators.
159+
/// This means `\r\n` (CRLF) will yield an empty string between the CR and LF.
160+
/// Use `.filter()` to remove empty lines if this is not desired.
161+
#[inline]
162+
fn split_lines(self) -> LineBreakDelimitedString<Self>
163+
where
164+
Self: Sized + BufRead,
165+
{
166+
LineBreakDelimitedString {
167+
inner: LineBreakDelimited::new(self),
168+
}
169+
}
82170
}
83171

84172
impl<B: BufRead> BufReadExt for B {}
@@ -162,4 +250,53 @@ mod tests {
162250
assert_eq!(delimited.next().unwrap().unwrap(), "a||");
163251
assert_eq!(delimited.next().unwrap().unwrap(), "|b|");
164252
}
253+
254+
#[test]
255+
fn split_lines_splits_mixed_endings() {
256+
let input = b"f\rd1/f1\r\nd1/d2/f4\nd1/d2/f6";
257+
let got = BufReader::new(&input[..])
258+
.split_lines()
259+
.collect::<io::Result<Vec<_>>>()
260+
.unwrap();
261+
assert_eq!(got, vec!["f", "d1/f1", "", "d1/d2/f4", "d1/d2/f6"]);
262+
}
263+
264+
#[test]
265+
fn split_lines_splits_crlf() {
266+
let input = b"\r\n";
267+
let got = BufReader::new(&input[..])
268+
.split_lines()
269+
.collect::<io::Result<Vec<_>>>()
270+
.unwrap();
271+
assert_eq!(got, vec!["", ""]);
272+
}
273+
274+
#[test]
275+
fn split_lines_empty_input() {
276+
let input = b"";
277+
let got = BufReader::new(&input[..])
278+
.split_lines()
279+
.collect::<io::Result<Vec<_>>>()
280+
.unwrap();
281+
assert!(got.is_empty());
282+
}
283+
284+
#[test]
285+
fn split_lines_invalid_utf8_returns_error() {
286+
let input = b"valid\n\xff\xfe\n";
287+
let mut iter = BufReader::new(&input[..]).split_lines();
288+
assert_eq!(iter.next().unwrap().unwrap(), "valid");
289+
let err = iter.next().unwrap().unwrap_err();
290+
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
291+
}
292+
293+
#[test]
294+
fn split_lines_consecutive_delimiters() {
295+
let input = b"\n\n\n";
296+
let got = BufReader::new(&input[..])
297+
.split_lines()
298+
.collect::<io::Result<Vec<_>>>()
299+
.unwrap();
300+
assert_eq!(got, vec!["", "", ""]);
301+
}
165302
}

cli/src/utils/io.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@ pub(crate) fn is_pna<R: io::Read>(mut reader: R) -> io::Result<bool> {
99

1010
#[inline]
1111
pub(crate) fn read_to_lines<R: io::BufRead>(reader: R) -> io::Result<Vec<String>> {
12-
reader.lines().collect()
12+
reader
13+
.split_lines()
14+
.filter(|line| !line.as_ref().is_ok_and(|s| s.is_empty()))
15+
.collect()
1316
}
1417

1518
/// Reads a reader and splits its contents on null characters ('\0'), returning a Vec<String>.
@@ -75,4 +78,18 @@ mod tests {
7578
let got = read_to_nul(io::BufReader::new(&input[..])).unwrap();
7679
assert_eq!(got, vec!["d1/d2/f3", "d1/d2/f5"]);
7780
}
81+
82+
#[test]
83+
fn read_to_lines_supports_cr_crlf_lf() {
84+
let input = b"f\rd1/f1\r\nd1/d2/f4\nd1/d2/f6";
85+
let got = read_to_lines(io::BufReader::new(&input[..])).unwrap();
86+
assert_eq!(got, vec!["f", "d1/f1", "d1/d2/f4", "d1/d2/f6"]);
87+
}
88+
89+
#[test]
90+
fn read_to_lines_ignores_empty_lines() {
91+
let input = b"\n\r\n";
92+
let got = read_to_lines(io::BufReader::new(&input[..])).unwrap();
93+
assert!(got.is_empty());
94+
}
7895
}

cli/tests/cli/create/files_from.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,59 @@ fn create_with_files_from() {
7272
)
7373
.unwrap();
7474
}
75+
76+
#[test]
77+
fn create_with_files_from_crlf() {
78+
setup();
79+
TestResources::extract_in("raw/", "create_with_files_from_crlf/src/").unwrap();
80+
81+
// Use CRLF line endings (Windows-style)
82+
let list_path = "create_with_files_from_crlf/files.txt";
83+
fs::write(
84+
list_path,
85+
"create_with_files_from_crlf/src/raw/empty.txt\r\ncreate_with_files_from_crlf/src/raw/text.txt\r\n",
86+
)
87+
.unwrap();
88+
89+
cli::Cli::try_parse_from([
90+
"pna",
91+
"--quiet",
92+
"c",
93+
"create_with_files_from_crlf/test.pna",
94+
"--overwrite",
95+
"--files-from",
96+
list_path,
97+
"--unstable",
98+
])
99+
.unwrap()
100+
.execute()
101+
.unwrap();
102+
}
103+
104+
#[test]
105+
fn create_with_files_from_cr() {
106+
setup();
107+
TestResources::extract_in("raw/", "create_with_files_from_cr/src/").unwrap();
108+
109+
// Use CR line endings (old Mac-style)
110+
let list_path = "create_with_files_from_cr/files.txt";
111+
fs::write(
112+
list_path,
113+
"create_with_files_from_cr/src/raw/empty.txt\rcreate_with_files_from_cr/src/raw/text.txt\r",
114+
)
115+
.unwrap();
116+
117+
cli::Cli::try_parse_from([
118+
"pna",
119+
"--quiet",
120+
"c",
121+
"create_with_files_from_cr/test.pna",
122+
"--overwrite",
123+
"--files-from",
124+
list_path,
125+
"--unstable",
126+
])
127+
.unwrap()
128+
.execute()
129+
.unwrap();
130+
}

0 commit comments

Comments
 (0)