Skip to content

Commit acaeb31

Browse files
committed
fix!: reject mtree specifications on archive entry points
libarchive's mtree format handler matches arbitrary free-form text, so inputs as innocuous as a plain gunzip'd text file were being parsed as single-entry mtree "archives". This defeats the guarantee added alongside the raw-handler removal, where archive entry points are supposed to reliably reject non-archive bytes. - After each successful archive_read_next_header, check archive_format and treat an mtree match the same way we treat the raw handler: return an Extraction error instead of yielding bogus entries - Wire the check into list_archive_files, list_archive_entries, uncompress_archive, uncompress_archive_file, and ArchiveIterator (covering their _with_encoding and async siblings via the shared helpers) - Expose archive_format plus ARCHIVE_FORMAT_BASE_MASK / ARCHIVE_FORMAT_MTREE through the FFI bindings and generate-ffi script - Extend CHANGES.md and the crate-level docs to describe the mtree rejection alongside the existing raw-handler note - Add regression tests that feed tests/fixtures/file.txt.gz through list_archive_files and ArchiveIterator to confirm both surfaces reject the mtree interpretation
1 parent a4a18d9 commit acaeb31

6 files changed

Lines changed: 67 additions & 11 deletions

File tree

CHANGES.md

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,16 @@
55
## [Unreleased] - ReleaseDate
66

77
* **Breaking:** libarchive's "raw" format handler is no longer registered on
8-
the archive code paths. `list_archive_files`, `list_archive_entries`,
9-
`uncompress_archive`, `uncompress_archive_file`, `ArchiveIterator`, and
10-
their `_with_encoding` and async variants now return an error for input
11-
that isn't a real archive, instead of treating it as a single-entry
12-
archive named `data`. `uncompress_data` still supports raw streams
13-
(that is its purpose). Callers that want the old iterator behavior can
14-
opt back in with `ArchiveIteratorBuilder::raw_format(true)` [#77]
8+
the archive code paths, and entries produced by the "mtree" handler are
9+
rejected (libarchive matches mtree on arbitrary free-form text, so a plain
10+
gunzip'd text file used to parse as an mtree archive). `list_archive_files`,
11+
`list_archive_entries`, `uncompress_archive`, `uncompress_archive_file`,
12+
`ArchiveIterator`, and their `_with_encoding` and async variants now return
13+
an error for input that isn't a real archive, instead of treating it as a
14+
single-entry archive named `data` or as an mtree specification.
15+
`uncompress_data` still supports raw streams (that is its purpose).
16+
Callers that want the old iterator behavior for non-archive bytes can opt
17+
back in with `ArchiveIteratorBuilder::raw_format(true)` [#77]
1518
* Rewind the source reader at the start of every seekable entry point
1619
(`list_archive_files`, `list_archive_entries`, `uncompress_archive`,
1720
`uncompress_archive_file`, `ArchiveIterator`, and their `_with_encoding`

scripts/generate-ffi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ bindgen \
4040
--allowlist-var "ARCHIVE_EXTRACT_OWNER" \
4141
--allowlist-var "ARCHIVE_EXTRACT_FFLAGS" \
4242
--allowlist-var "ARCHIVE_EXTRACT_XATTR" \
43+
--allowlist-var "ARCHIVE_FORMAT_BASE_MASK" \
44+
--allowlist-var "ARCHIVE_FORMAT_MTREE" \
45+
--allowlist-function "archive_format" \
4346
--allowlist-function "archive_read_new" \
4447
--allowlist-function "archive_read_set_seek_callback" \
4548
--allowlist-function "archive_read_support_filter_all" \

src/ffi/generated.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ pub(crate) const ARCHIVE_EXTRACT_TIME: u32 = 4;
1515
pub(crate) const ARCHIVE_EXTRACT_ACL: u32 = 32;
1616
pub(crate) const ARCHIVE_EXTRACT_FFLAGS: u32 = 64;
1717
pub(crate) const ARCHIVE_EXTRACT_XATTR: u32 = 128;
18+
pub(crate) const ARCHIVE_FORMAT_BASE_MASK: ::std::os::raw::c_int = 0xff0000;
19+
pub(crate) const ARCHIVE_FORMAT_MTREE: ::std::os::raw::c_int = 0x80000;
1820
pub(crate) type __dev_t = ::std::os::raw::c_ulong;
1921
pub(crate) type __uid_t = ::std::os::raw::c_uint;
2022
pub(crate) type __gid_t = ::std::os::raw::c_uint;
@@ -158,6 +160,9 @@ extern "C" {
158160
extern "C" {
159161
pub(crate) fn archive_errno(arg1: *mut archive) -> ::std::os::raw::c_int;
160162
}
163+
extern "C" {
164+
pub(crate) fn archive_format(arg1: *mut archive) -> ::std::os::raw::c_int;
165+
}
161166
extern "C" {
162167
pub(crate) fn archive_error_string(arg1: *mut archive) -> *const ::std::os::raw::c_char;
163168
}

src/iterator.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,9 @@ impl<R: Read + Seek> ArchiveIterator<R> {
380380
match ffi::archive_read_next_header(self.archive_reader, &mut self.archive_entry) {
381381
ffi::ARCHIVE_EOF => ArchiveContents::EndOfEntry,
382382
ffi::ARCHIVE_OK | ffi::ARCHIVE_WARN => {
383+
if let Err(e) = crate::reject_mtree_format(self.archive_reader) {
384+
return ArchiveContents::Err(e);
385+
}
383386
let _utf8_guard = ffi::WindowsUTF8LocaleGuard::new();
384387
let cstr = CStr::from_ptr(ffi::archive_entry_pathname(self.archive_entry));
385388
let file_name = match (self.decode)(cstr.to_bytes()) {

src/lib.rs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@
5252
//! Archive-listing and archive-extraction entry points (`list_archive_files`,
5353
//! `list_archive_entries`, `uncompress_archive`, `uncompress_archive_file`,
5454
//! `ArchiveIterator`, and their async/`_with_encoding` siblings) no longer
55-
//! register libarchive's "raw" format handler. They return an error for
56-
//! input that isn't a real archive instead of yielding a single entry
57-
//! called `data`, so callers can reliably distinguish archives from other
58-
//! files.
55+
//! accept libarchive's two promiscuous format handlers: "raw" (which
56+
//! matches arbitrary bytes) and "mtree" (which matches free-form text).
57+
//! They return an error for input that isn't a real archive, so callers
58+
//! can reliably distinguish archives from other files.
5959
//!
6060
//! Use [`uncompress_data`] for decompressing a single stream (gzip, xz, …)
6161
//! — it continues to support raw input because that is its purpose. For
@@ -241,6 +241,7 @@ where
241241
ffi::ARCHIVE_EOF => return Ok(entries),
242242
value => archive_result(value, archive_reader)?,
243243
}
244+
reject_mtree_format(archive_reader)?;
244245

245246
let _utf8_guard = ffi::WindowsUTF8LocaleGuard::new();
246247
let cstr = libarchive_entry_pathname(entry)?;
@@ -363,6 +364,7 @@ where
363364
ffi::ARCHIVE_EOF => return Ok(()),
364365
value => archive_result(value, archive_reader)?,
365366
}
367+
reject_mtree_format(archive_reader)?;
366368

367369
let _utf8_guard = ffi::WindowsUTF8LocaleGuard::new();
368370
let cstr = libarchive_entry_pathname(entry)?;
@@ -478,6 +480,7 @@ where
478480
}
479481
value => archive_result(value, archive_reader)?,
480482
}
483+
reject_mtree_format(archive_reader)?;
481484

482485
let _utf8_guard = ffi::WindowsUTF8LocaleGuard::new();
483486
let cstr = libarchive_entry_pathname(entry)?;
@@ -681,6 +684,23 @@ fn sanitize_destination_path(dest: &Path) -> Result<&Path> {
681684
})
682685
}
683686

687+
// libarchive's mtree format handler parses free-form text specifications and
688+
// willingly matches innocuous content (a plain gunzip'd text file, for
689+
// instance). Treat an mtree match the same way we treat raw: not an archive.
690+
// Must be called after a successful `archive_read_next_header`, which is
691+
// when libarchive populates the format code.
692+
pub(crate) unsafe fn reject_mtree_format(archive_reader: *mut ffi::archive) -> Result<()> {
693+
if ffi::archive_format(archive_reader) & ffi::ARCHIVE_FORMAT_BASE_MASK
694+
== ffi::ARCHIVE_FORMAT_MTREE
695+
{
696+
return Err(Error::Extraction {
697+
code: None,
698+
details: "mtree specifications are not treated as archives".to_string(),
699+
});
700+
}
701+
Ok(())
702+
}
703+
684704
fn libarchive_copy_data(
685705
archive_reader: *mut ffi::archive,
686706
archive_writer: *mut ffi::archive,

tests/integration_test.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,6 +1416,28 @@ fn iterator_default_rejects_non_archive_bytes() {
14161416
);
14171417
}
14181418

1419+
#[test]
1420+
fn list_archive_files_rejects_plain_gzip_as_mtree() {
1421+
let source = std::fs::File::open("tests/fixtures/file.txt.gz").unwrap();
1422+
assert!(
1423+
list_archive_files(source).is_err(),
1424+
"a plain gzip'd text stream must not be parsed as an mtree archive",
1425+
);
1426+
}
1427+
1428+
#[test]
1429+
fn iterator_default_rejects_plain_gzip_as_mtree() {
1430+
let source = std::fs::File::open("tests/fixtures/file.txt.gz").unwrap();
1431+
let saw_err = match ArchiveIterator::from_read(source) {
1432+
Err(_) => true,
1433+
Ok(iter) => iter.into_iter().any(|c| matches!(c, ArchiveContents::Err(_))),
1434+
};
1435+
assert!(
1436+
saw_err,
1437+
"strict iterator must surface an error on mtree-looking input"
1438+
);
1439+
}
1440+
14191441
#[test]
14201442
fn iterator_raw_format_opt_in_accepts_non_archive_bytes() {
14211443
let source = Cursor::new(NON_ARCHIVE_BYTES);

0 commit comments

Comments
 (0)