Skip to content

Commit 715e03e

Browse files
committed
feat: expose ArchiveIteratorBuilder::mtree_format opt-out
libarchive's mtree format handler is permissive — it matches free-form text such as a plain gunzip'd text file and yields bogus entries for input that isn't really an mtree specification. The default behavior preserves libarchive's output (including on the raw entry points), but strict callers iterating with ArchiveIterator may want to reject the match instead of acting on the invalid entries. - Add ArchiveIteratorBuilder::mtree_format(bool); default true preserves libarchive's permissive behavior, false rejects entries whose archive_format base mask indicates ARCHIVE_FORMAT_MTREE - Gate the rejection inside ArchiveIterator::unsafe_next_header on the new flag, delegating to a module-private reject_mtree_format helper only when the caller opted out - Expose archive_format plus ARCHIVE_FORMAT_BASE_MASK / ARCHIVE_FORMAT_MTREE through the FFI bindings and generate-ffi script so the rejection can consult libarchive's format code - Extend CHANGES.md and the crate-level docs to describe the iterator-only opt-out - Add an integration test that feeds tests/fixtures/file.txt.gz through the iterator with mtree_format(false) and asserts that the surface errors out
1 parent 33cc6c2 commit 715e03e

6 files changed

Lines changed: 76 additions & 11 deletions

File tree

CHANGES.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@
99
`uncompress_archive`, `uncompress_archive_file`, `ArchiveIterator`, and
1010
their `_with_encoding` and async variants now return an error for input
1111
that isn't a real archive, instead of treating it as a single-entry
12-
archive named `data`. `uncompress_data` still supports raw streams
13-
(that is its purpose). Callers that want the old iterator behavior can
14-
opt back in with `ArchiveIteratorBuilder::raw_format(true)` [#77]
12+
archive named `data`. `uncompress_data` still supports raw streams (that
13+
is its purpose). Callers that want the old iterator behavior for
14+
non-archive bytes can opt back in with
15+
`ArchiveIteratorBuilder::raw_format(true)`. Callers iterating with
16+
`ArchiveIterator` can additionally opt out of libarchive's permissive
17+
mtree matching with `ArchiveIteratorBuilder::mtree_format(false)` [#77]
1518
* Rewind the source reader at the start of every seekable entry point
1619
(`list_archive_files`, `list_archive_entries`, `uncompress_archive`,
1720
`uncompress_archive_file`, `ArchiveIterator`, and their `_with_encoding`

scripts/generate-ffi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ bindgen \
4040
--allowlist-var "ARCHIVE_EXTRACT_OWNER" \
4141
--allowlist-var "ARCHIVE_EXTRACT_FFLAGS" \
4242
--allowlist-var "ARCHIVE_EXTRACT_XATTR" \
43+
--allowlist-var "ARCHIVE_FORMAT_BASE_MASK" \
44+
--allowlist-var "ARCHIVE_FORMAT_MTREE" \
45+
--allowlist-function "archive_format" \
4346
--allowlist-function "archive_read_new" \
4447
--allowlist-function "archive_read_set_seek_callback" \
4548
--allowlist-function "archive_read_support_filter_all" \

src/ffi/generated.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ pub(crate) const ARCHIVE_EXTRACT_TIME: u32 = 4;
1515
pub(crate) const ARCHIVE_EXTRACT_ACL: u32 = 32;
1616
pub(crate) const ARCHIVE_EXTRACT_FFLAGS: u32 = 64;
1717
pub(crate) const ARCHIVE_EXTRACT_XATTR: u32 = 128;
18+
pub(crate) const ARCHIVE_FORMAT_BASE_MASK: ::std::os::raw::c_int = 0xff0000;
19+
pub(crate) const ARCHIVE_FORMAT_MTREE: ::std::os::raw::c_int = 0x80000;
1820
pub(crate) type __dev_t = ::std::os::raw::c_ulong;
1921
pub(crate) type __uid_t = ::std::os::raw::c_uint;
2022
pub(crate) type __gid_t = ::std::os::raw::c_uint;
@@ -158,6 +160,9 @@ extern "C" {
158160
extern "C" {
159161
pub(crate) fn archive_errno(arg1: *mut archive) -> ::std::os::raw::c_int;
160162
}
163+
extern "C" {
164+
pub(crate) fn archive_format(arg1: *mut archive) -> ::std::os::raw::c_int;
165+
}
161166
extern "C" {
162167
pub(crate) fn archive_error_string(arg1: *mut archive) -> *const ::std::os::raw::c_char;
163168
}

src/iterator.rs

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ pub struct ArchiveIterator<R: Read + Seek> {
7979
current_is_dir: bool,
8080
closed: bool,
8181
error: bool,
82+
mtree_format: bool,
8283
filter: Option<Box<EntryFilterCallbackFn>>,
8384

8485
_pipe: Box<HeapReadSeekerPipe<R>>,
@@ -176,6 +177,7 @@ impl<R: Read + Seek> ArchiveIterator<R> {
176177
filter: Option<Box<EntryFilterCallbackFn>>,
177178
password: Option<ArchivePassword>,
178179
raw_format: bool,
180+
mtree_format: bool,
179181
) -> Result<ArchiveIterator<R>>
180182
where
181183
R: Read + Seek,
@@ -252,6 +254,7 @@ impl<R: Read + Seek> ArchiveIterator<R> {
252254
current_is_dir: false,
253255
closed: false,
254256
error: false,
257+
mtree_format,
255258
filter,
256259

257260
_pipe: pipe,
@@ -307,7 +310,7 @@ impl<R: Read + Seek> ArchiveIterator<R> {
307310
where
308311
R: Read + Seek,
309312
{
310-
Self::new(source, decode, None, None, false)
313+
Self::new(source, decode, None, None, false, true)
311314
}
312315

313316
/// Iterate over the contents of an archive, streaming the contents of each
@@ -346,7 +349,7 @@ impl<R: Read + Seek> ArchiveIterator<R> {
346349
where
347350
R: Read + Seek,
348351
{
349-
Self::new(source, crate::decode_utf8, None, None, false)
352+
Self::new(source, crate::decode_utf8, None, None, false, true)
350353
}
351354

352355
/// Close the iterator, freeing up the associated resources.
@@ -380,6 +383,11 @@ impl<R: Read + Seek> ArchiveIterator<R> {
380383
match ffi::archive_read_next_header(self.archive_reader, &mut self.archive_entry) {
381384
ffi::ARCHIVE_EOF => ArchiveContents::EndOfEntry,
382385
ffi::ARCHIVE_OK | ffi::ARCHIVE_WARN => {
386+
if !self.mtree_format {
387+
if let Err(e) = reject_mtree_format(self.archive_reader) {
388+
return ArchiveContents::Err(e);
389+
}
390+
}
383391
let _utf8_guard = ffi::WindowsUTF8LocaleGuard::new();
384392
let cstr = CStr::from_ptr(ffi::archive_entry_pathname(self.archive_entry));
385393
let file_name = match (self.decode)(cstr.to_bytes()) {
@@ -428,6 +436,20 @@ impl<R: Read + Seek> ArchiveIterator<R> {
428436
}
429437
}
430438

439+
// Must be called after a successful `archive_read_next_header`, since
440+
// libarchive only populates the format code once a header has been read.
441+
unsafe fn reject_mtree_format(archive_reader: *mut ffi::archive) -> Result<()> {
442+
if ffi::archive_format(archive_reader) & ffi::ARCHIVE_FORMAT_BASE_MASK
443+
== ffi::ARCHIVE_FORMAT_MTREE
444+
{
445+
return Err(Error::Extraction {
446+
code: None,
447+
details: "mtree specifications are not treated as archives".to_string(),
448+
});
449+
}
450+
Ok(())
451+
}
452+
431453
unsafe extern "C" fn libarchive_heap_seek_callback<R: Read + Seek>(
432454
_: *mut ffi::archive,
433455
client_data: *mut c_void,
@@ -483,6 +505,7 @@ where
483505
filter: Option<Box<EntryFilterCallbackFn>>,
484506
password: Option<ArchivePassword>,
485507
raw_format: bool,
508+
mtree_format: bool,
486509
}
487510

488511
/// A builder to generate an archive iterator over the contents of an
@@ -523,6 +546,7 @@ where
523546
filter: None,
524547
password: None,
525548
raw_format: false,
549+
mtree_format: true,
526550
}
527551
}
528552

@@ -560,6 +584,16 @@ where
560584
self
561585
}
562586

587+
/// Accept entries from libarchive's "mtree" format handler (default).
588+
///
589+
/// libarchive's mtree parser is permissive and will match free-form
590+
/// text (a plain gunzip'd text file is enough); pass `false` to
591+
/// reject those matches and error out instead.
592+
pub fn mtree_format(mut self, enable: bool) -> ArchiveIteratorBuilder<R> {
593+
self.mtree_format = enable;
594+
self
595+
}
596+
563597
/// Finish the builder and generate the configured `ArchiveIterator`.
564598
pub fn build(self) -> Result<ArchiveIterator<R>> {
565599
ArchiveIterator::new(
@@ -568,6 +602,7 @@ where
568602
self.filter,
569603
self.password,
570604
self.raw_format,
605+
self.mtree_format,
571606
)
572607
}
573608
}

src/lib.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,16 @@
5252
//! Archive-listing and archive-extraction entry points (`list_archive_files`,
5353
//! `list_archive_entries`, `uncompress_archive`, `uncompress_archive_file`,
5454
//! `ArchiveIterator`, and their async/`_with_encoding` siblings) no longer
55-
//! register libarchive's "raw" format handler. They return an error for
56-
//! input that isn't a real archive instead of yielding a single entry
57-
//! called `data`, so callers can reliably distinguish archives from other
58-
//! files.
55+
//! register libarchive's "raw" format handler, so input that isn't a real
56+
//! archive errors out instead of yielding a single `data` entry.
5957
//!
6058
//! Use [`uncompress_data`] for decompressing a single stream (gzip, xz, …)
6159
//! — it continues to support raw input because that is its purpose. For
6260
//! streaming iteration that should accept arbitrary bytes, opt back in
63-
//! with [`ArchiveIteratorBuilder::raw_format`].
61+
//! with [`ArchiveIteratorBuilder::raw_format`]. libarchive's "mtree"
62+
//! handler remains enabled on all entry points; pass
63+
//! `ArchiveIteratorBuilder::mtree_format(false)` to the iterator if you
64+
//! need to reject mtree matches.
6465
6566
#[cfg(feature = "async_support")]
6667
pub mod async_support;

tests/integration_test.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1408,14 +1408,32 @@ fn iterator_default_rejects_non_archive_bytes() {
14081408
let source = Cursor::new(NON_ARCHIVE_BYTES);
14091409
let saw_err = match ArchiveIterator::from_read(source) {
14101410
Err(_) => true,
1411-
Ok(iter) => iter.into_iter().any(|c| matches!(c, ArchiveContents::Err(_))),
1411+
Ok(iter) => iter
1412+
.into_iter()
1413+
.any(|c| matches!(c, ArchiveContents::Err(_))),
14121414
};
14131415
assert!(
14141416
saw_err,
14151417
"strict iterator must surface an error on non-archive input"
14161418
);
14171419
}
14181420

1421+
#[test]
1422+
fn iterator_mtree_format_opt_out_rejects_gzip_text() {
1423+
let source = std::fs::File::open("tests/fixtures/file.txt.gz").unwrap();
1424+
let saw_err = match ArchiveIteratorBuilder::new(source)
1425+
.mtree_format(false)
1426+
.build()
1427+
{
1428+
Err(_) => true,
1429+
Ok(mut iter) => iter.any(|c| matches!(c, ArchiveContents::Err(_))),
1430+
};
1431+
assert!(
1432+
saw_err,
1433+
"mtree_format(false) must reject libarchive's permissive mtree match on plain text"
1434+
);
1435+
}
1436+
14191437
#[test]
14201438
fn iterator_raw_format_opt_in_accepts_non_archive_bytes() {
14211439
let source = Cursor::new(NON_ARCHIVE_BYTES);

0 commit comments

Comments
 (0)