Skip to content

Commit e5b34d1

Browse files
committed
✨ Add mtree CRLF/content compatibility normalization
Normalize mtree input before parsing with mtree2 to handle bsdtar-style manifests that use CRLF line endings or the non-standard `content=` keyword shorthand.
1 parent 74989f3 commit e5b34d1

2 files changed

Lines changed: 218 additions & 1 deletion

File tree

cli/src/command/core/mtree.rs

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ pub(crate) fn transform_mtree_entries<R: Read>(
3030
filter: &PathFilter<'_>,
3131
time_filters: &TimeFilters,
3232
) -> io::Result<Vec<io::Result<Option<NormalEntry>>>> {
33+
let normalized = normalize_mtree_input(reader)?;
3334
// Use empty cwd to avoid mtree2 joining paths with current working directory
34-
let mtree = MTree::from_reader_with_cwd(reader, PathBuf::new());
35+
let mtree = MTree::from_reader_with_cwd(io::Cursor::new(normalized), PathBuf::new());
3536
let mut results = Vec::new();
3637

3738
for entry_result in mtree {
@@ -85,6 +86,108 @@ pub(crate) fn transform_mtree_entries<R: Read>(
8586
Ok(results)
8687
}
8788

89+
/// Normalizes mtree input so bsdtar-style manifests can be parsed by mtree2.
90+
///
91+
/// - Converts CRLF/CR line endings to LF
92+
/// - Rewrites `content=` to `contents=` (`content=` is a non-standard shorthand
93+
/// accepted by bsdtar but not recognized by mtree2)
94+
fn normalize_mtree_input(mut reader: impl Read) -> io::Result<Vec<u8>> {
95+
let mut raw = Vec::new();
96+
reader.read_to_end(&mut raw)?;
97+
let normalized_line_endings = normalize_line_endings(&raw);
98+
Ok(rewrite_content_keyword_alias(&normalized_line_endings))
99+
}
100+
101+
fn normalize_line_endings(input: &[u8]) -> Vec<u8> {
102+
let mut out = Vec::with_capacity(input.len());
103+
let mut idx = 0;
104+
while idx < input.len() {
105+
if input[idx] == b'\r' {
106+
if idx + 1 < input.len() && input[idx + 1] == b'\n' {
107+
idx += 1;
108+
}
109+
out.push(b'\n');
110+
} else {
111+
out.push(input[idx]);
112+
}
113+
idx += 1;
114+
}
115+
out
116+
}
117+
118+
fn rewrite_content_keyword_alias(input: &[u8]) -> Vec<u8> {
119+
let mut out = Vec::with_capacity(input.len());
120+
let mut line_start = 0;
121+
while line_start < input.len() {
122+
let mut line_end = line_start;
123+
while line_end < input.len() && input[line_end] != b'\n' {
124+
line_end += 1;
125+
}
126+
127+
rewrite_content_keyword_line(&input[line_start..line_end], &mut out);
128+
if line_end < input.len() {
129+
out.push(b'\n');
130+
line_end += 1;
131+
}
132+
line_start = line_end;
133+
}
134+
out
135+
}
136+
137+
/// Rewrites `content=` tokens to `contents=` within a single physical line.
138+
///
139+
/// Skips comment lines and preserves the first token (entry name or directive like `/set`).
140+
/// `/set` directives are intentionally processed because they carry keyword defaults.
141+
///
142+
/// Limitation: operates on physical lines, so `content=` as the first token on a
143+
/// backslash-continuation line will not be rewritten. This is acceptable because such
144+
/// formatting is uncommon, and the case was already broken before normalization (mtree2
145+
/// does not recognize `content=`).
146+
fn rewrite_content_keyword_line(line: &[u8], out: &mut Vec<u8>) {
147+
if line.is_empty() || line[0] == b'#' {
148+
out.extend_from_slice(line);
149+
return;
150+
}
151+
152+
let mut idx = 0;
153+
while idx < line.len() && is_mtree_whitespace(line[idx]) {
154+
idx += 1;
155+
}
156+
while idx < line.len() && !is_mtree_whitespace(line[idx]) {
157+
idx += 1;
158+
}
159+
out.extend_from_slice(&line[..idx]);
160+
161+
while idx < line.len() {
162+
let ws_start = idx;
163+
while idx < line.len() && is_mtree_whitespace(line[idx]) {
164+
idx += 1;
165+
}
166+
out.extend_from_slice(&line[ws_start..idx]);
167+
168+
let token_start = idx;
169+
while idx < line.len() && !is_mtree_whitespace(line[idx]) {
170+
idx += 1;
171+
}
172+
if token_start == idx {
173+
break;
174+
}
175+
176+
let token = &line[token_start..idx];
177+
if token.starts_with(b"content=") {
178+
out.extend_from_slice(b"contents=");
179+
out.extend_from_slice(&token[b"content=".len()..]);
180+
} else {
181+
out.extend_from_slice(token);
182+
}
183+
}
184+
}
185+
186+
#[inline]
187+
fn is_mtree_whitespace(byte: u8) -> bool {
188+
matches!(byte, b' ' | b'\t')
189+
}
190+
88191
/// Creates a single archive entry from an mtree entry.
89192
fn create_entry_from_mtree(
90193
mtree_entry: &MtreeEntry,
@@ -434,4 +537,53 @@ mod tests {
434537
assert_eq!(entry2.path().to_str(), Some("file2.txt"));
435538
assert!(entry2.optional(), "file2.txt should be marked as optional");
436539
}
540+
541+
#[test]
542+
fn normalize_mtree_input_converts_crlf_and_content_alias() {
543+
let input = b"#mtree\r\nf type=file content=bar/foo\r\n";
544+
let normalized = normalize_mtree_input(&input[..]).unwrap();
545+
assert_eq!(normalized, b"#mtree\nf type=file contents=bar/foo\n");
546+
}
547+
548+
#[test]
549+
fn normalize_mtree_input_keeps_existing_contents_keyword() {
550+
let input = b"#mtree\nf type=file contents=bar/foo\n";
551+
let normalized = normalize_mtree_input(&input[..]).unwrap();
552+
assert_eq!(normalized, input);
553+
}
554+
555+
#[test]
556+
fn normalize_mtree_input_preserves_first_token() {
557+
let input = b"#mtree\ncontent=file type=file contents=bar/foo\n";
558+
let normalized = normalize_mtree_input(&input[..]).unwrap();
559+
assert_eq!(normalized, input);
560+
}
561+
562+
#[test]
563+
fn normalize_mtree_input_handles_wrapped_crlf_line() {
564+
let input = b"#mtree\r\nf uname=\\\r\nroot content=bar/foo\r\n";
565+
let normalized = normalize_mtree_input(&input[..]).unwrap();
566+
assert_eq!(normalized, b"#mtree\nf uname=\\\nroot contents=bar/foo\n");
567+
}
568+
569+
#[test]
570+
fn normalize_line_endings_converts_standalone_cr() {
571+
let input = b"#mtree\rf type=file content=bar/foo\r";
572+
let normalized = normalize_mtree_input(&input[..]).unwrap();
573+
assert_eq!(normalized, b"#mtree\nf type=file contents=bar/foo\n");
574+
}
575+
576+
#[test]
577+
fn normalize_mtree_input_rewrites_tab_separated_content() {
578+
let input = b"#mtree\nf\ttype=file\tcontent=bar/foo\n";
579+
let normalized = normalize_mtree_input(&input[..]).unwrap();
580+
assert_eq!(normalized, b"#mtree\nf\ttype=file\tcontents=bar/foo\n");
581+
}
582+
583+
#[test]
584+
fn normalize_mtree_input_handles_no_trailing_newline() {
585+
let input = b"#mtree\nf type=file content=bar/foo";
586+
let normalized = normalize_mtree_input(&input[..]).unwrap();
587+
assert_eq!(normalized, b"#mtree\nf type=file contents=bar/foo");
588+
}
437589
}

cli/tests/cli/stdio/mtree.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,71 @@ fn stdio_mtree_contents_keyword() {
157157
assert_eq!(entry_names.len(), 1);
158158
}
159159

160+
/// Precondition: An mtree manifest uses CRLF line endings, wrapped lines, and `content=` alias.
161+
/// Action: Create and extract archive from the mtree manifest.
162+
/// Expectation: Parsing succeeds and entries are created with expected payloads.
163+
#[test]
164+
fn stdio_mtree_crlf_wrapped_and_content_alias() {
165+
setup();
166+
167+
let base = PathBuf::from("stdio_mtree_crlf_wrapped_and_content_alias");
168+
fs::create_dir_all(base.join("bar")).unwrap();
169+
fs::write(base.join("bar/foo"), "abc").unwrap();
170+
fs::write(base.join("bar/goo"), "xyz").unwrap();
171+
172+
fs::write(
173+
base.join("manifest.mtree"),
174+
"#mtree\r\nf type=file uname=\\\r\nroot gname=root mode=0755 content=bar/foo\r\ng type=file uname=root gname=root mode=0755 content=bar/goo\r\n",
175+
)
176+
.unwrap();
177+
178+
let output_archive = base.join("output.pna");
179+
cargo_bin_cmd!("pna")
180+
.args([
181+
"--quiet",
182+
"experimental",
183+
"stdio",
184+
"--create",
185+
"--unstable",
186+
"--overwrite",
187+
"-f",
188+
output_archive.to_str().unwrap(),
189+
"-C",
190+
base.to_str().unwrap(),
191+
"@manifest.mtree",
192+
])
193+
.assert()
194+
.success();
195+
196+
let entry_names: HashSet<String> = get_archive_entry_names(&output_archive)
197+
.into_iter()
198+
.collect();
199+
assert!(entry_names.contains("f"), "Missing f");
200+
assert!(entry_names.contains("g"), "Missing g");
201+
assert_eq!(entry_names.len(), 2);
202+
203+
let out_dir = base.join("out");
204+
fs::create_dir_all(&out_dir).unwrap();
205+
cargo_bin_cmd!("pna")
206+
.args([
207+
"--quiet",
208+
"experimental",
209+
"stdio",
210+
"--extract",
211+
"--unstable",
212+
"--overwrite",
213+
"-f",
214+
output_archive.to_str().unwrap(),
215+
"--out-dir",
216+
out_dir.to_str().unwrap(),
217+
])
218+
.assert()
219+
.success();
220+
221+
assert_eq!(fs::read(out_dir.join("f")).unwrap(), b"abc");
222+
assert_eq!(fs::read(out_dir.join("g")).unwrap(), b"xyz");
223+
}
224+
160225
/// Precondition: An mtree manifest specifies directory and file entries.
161226
/// Action: Create archive from the mtree manifest.
162227
/// Expectation: The archive contains both directory and file entries.

0 commit comments

Comments
 (0)