Skip to content

Commit a4a18d9

Browse files
committed
feat!: require real archives on archive entry points
- Stop registering libarchive's "raw" format handler in list_archive_files, list_archive_entries, uncompress_archive, uncompress_archive_file, ArchiveIterator, and their _with_encoding and async variants, so non-archive input now errors instead of yielding a single "data" entry. - Add ArchiveIteratorBuilder::raw_format(bool) so callers that relied on the old permissive behavior can opt back in explicitly. - Keep uncompress_data unchanged since handling raw compressed streams (gzip, xz, ...) is its purpose. - Document the stricter behavior in the crate docs and CHANGES.md, update affected tests to opt in where needed, and bump the version to 0.16.0 to reflect the breaking change.
1 parent 0139bff commit a4a18d9

5 files changed

Lines changed: 116 additions & 16 deletions

File tree

CHANGES.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44

55
## [Unreleased] - ReleaseDate
66

7+
* **Breaking:** libarchive's "raw" format handler is no longer registered on
8+
the archive code paths. `list_archive_files`, `list_archive_entries`,
9+
`uncompress_archive`, `uncompress_archive_file`, `ArchiveIterator`, and
10+
their `_with_encoding` and async variants now return an error for input
11+
that isn't a real archive, instead of treating it as a single-entry
12+
archive named `data`. `uncompress_data` still supports raw streams
13+
(that is its purpose). Callers that want the old iterator behavior can
14+
opt back in with `ArchiveIteratorBuilder::raw_format(true)` [#77]
715
* Rewind the source reader at the start of every seekable entry point
816
(`list_archive_files`, `list_archive_entries`, `uncompress_archive`,
917
`uncompress_archive_file`, `ArchiveIterator`, and their `_with_encoding`

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "compress-tools"
3-
version = "0.15.1"
3+
version = "0.16.0"
44
authors = ["Jonathas-Conceicao <jadoliveira@inf.ufpel.edu.br>"]
55
description = "Utility functions for compressed and archive files handling"
66
repository = "https://github.com/OSSystems/compress-tools-rs"

src/iterator.rs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ impl<R: Read + Seek> ArchiveIterator<R> {
175175
decode: DecodeCallback,
176176
filter: Option<Box<EntryFilterCallbackFn>>,
177177
password: Option<ArchivePassword>,
178+
raw_format: bool,
178179
) -> Result<ArchiveIterator<R>>
179180
where
180181
R: Read + Seek,
@@ -204,10 +205,12 @@ impl<R: Read + Seek> ArchiveIterator<R> {
204205
archive_reader,
205206
)?;
206207

207-
archive_result(
208-
ffi::archive_read_support_format_raw(archive_reader),
209-
archive_reader,
210-
)?;
208+
if raw_format {
209+
archive_result(
210+
ffi::archive_read_support_format_raw(archive_reader),
211+
archive_reader,
212+
)?;
213+
}
211214

212215
archive_result(
213216
ffi::archive_read_set_seek_callback(
@@ -304,7 +307,7 @@ impl<R: Read + Seek> ArchiveIterator<R> {
304307
where
305308
R: Read + Seek,
306309
{
307-
Self::new(source, decode, None, None)
310+
Self::new(source, decode, None, None, false)
308311
}
309312

310313
/// Iterate over the contents of an archive, streaming the contents of each
@@ -343,7 +346,7 @@ impl<R: Read + Seek> ArchiveIterator<R> {
343346
where
344347
R: Read + Seek,
345348
{
346-
Self::new(source, crate::decode_utf8, None, None)
349+
Self::new(source, crate::decode_utf8, None, None, false)
347350
}
348351

349352
/// Close the iterator, freeing up the associated resources.
@@ -479,6 +482,7 @@ where
479482
decoder: DecodeCallback,
480483
filter: Option<Box<EntryFilterCallbackFn>>,
481484
password: Option<ArchivePassword>,
485+
raw_format: bool,
482486
}
483487

484488
/// A builder to generate an archive iterator over the contents of an
@@ -518,6 +522,7 @@ where
518522
decoder: crate::decode_utf8,
519523
filter: None,
520524
password: None,
525+
raw_format: false,
521526
}
522527
}
523528

@@ -544,8 +549,25 @@ where
544549
self
545550
}
546551

552+
/// Enable libarchive's "raw" format handler, which parses any byte
553+
/// stream as a single-entry archive with pathname `data`.
554+
///
555+
/// Disabled by default so the iterator rejects input that isn't a real
556+
/// archive. Enable it only when you intentionally want to iterate over
557+
/// arbitrary non-archive streams (e.g. a standalone gzip file).
558+
pub fn raw_format(mut self, enable: bool) -> ArchiveIteratorBuilder<R> {
559+
self.raw_format = enable;
560+
self
561+
}
562+
547563
/// Finish the builder and generate the configured `ArchiveIterator`.
548564
pub fn build(self) -> Result<ArchiveIterator<R>> {
549-
ArchiveIterator::new(self.source, self.decoder, self.filter, self.password)
565+
ArchiveIterator::new(
566+
self.source,
567+
self.decoder,
568+
self.filter,
569+
self.password,
570+
self.raw_format,
571+
)
550572
}
551573
}

src/lib.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,21 @@
4646
//! # Ok(())
4747
//! # }
4848
//! ```
49+
//!
50+
//! # Strict archive parsing
51+
//!
52+
//! Archive-listing and archive-extraction entry points (`list_archive_files`,
53+
//! `list_archive_entries`, `uncompress_archive`, `uncompress_archive_file`,
54+
//! `ArchiveIterator`, and their async/`_with_encoding` siblings) no longer
55+
//! register libarchive's "raw" format handler. They return an error for
56+
//! input that isn't a real archive instead of yielding a single entry
57+
//! called `data`, so callers can reliably distinguish archives from other
58+
//! files.
59+
//!
60+
//! Use [`uncompress_data`] for decompressing a single stream (gzip, xz, …)
61+
//! — it continues to support raw input because that is its purpose. For
62+
//! streaming iteration that should accept arbitrary bytes, opt back in
63+
//! with [`ArchiveIteratorBuilder::raw_format`].
4964
5065
#[cfg(feature = "async_support")]
5166
pub mod async_support;
@@ -526,11 +541,6 @@ where
526541
archive_reader,
527542
)?;
528543

529-
archive_result(
530-
ffi::archive_read_support_format_raw(archive_reader),
531-
archive_reader,
532-
)?;
533-
534544
archive_result(
535545
ffi::archive_read_set_seek_callback(archive_reader, Some(libarchive_seek_callback)),
536546
archive_reader,

tests/integration_test.rs

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,11 @@ fn iterate_zip_with_cjk_pathname() {
10641064
fn iterate_truncated_archive() {
10651065
let source = std::fs::File::open("tests/fixtures/truncated.log.gz").unwrap();
10661066

1067-
for content in ArchiveIterator::from_read(source).unwrap() {
1067+
for content in ArchiveIteratorBuilder::new(source)
1068+
.raw_format(true)
1069+
.build()
1070+
.unwrap()
1071+
{
10681072
if let ArchiveContents::Err(Error::Unknown) = content {
10691073
return;
10701074
}
@@ -1076,7 +1080,11 @@ fn iterate_truncated_archive() {
10761080
fn uncompress_bytes_helper(bytes: &[u8]) {
10771081
let wrapper = Cursor::new(bytes);
10781082

1079-
for content in ArchiveIterator::from_read(wrapper).unwrap() {
1083+
for content in ArchiveIteratorBuilder::new(wrapper)
1084+
.raw_format(true)
1085+
.build()
1086+
.unwrap()
1087+
{
10801088
if let ArchiveContents::Err(Error::Unknown) = content {
10811089
return;
10821090
}
@@ -1128,7 +1136,7 @@ fn uncompress_archive_absolute_path() {
11281136

11291137
#[test]
11301138
fn decode_failure() {
1131-
let source = std::fs::File::open("tests/fixtures/file.txt.gz").unwrap();
1139+
let source = std::fs::File::open("tests/fixtures/tree.tar").unwrap();
11321140
let decode_fail = |_bytes: &[u8]| Err(Error::Io(std::io::Error::from(ErrorKind::BrokenPipe)));
11331141

11341142
for content in ArchiveIterator::from_read_with_encoding(source, decode_fail).unwrap() {
@@ -1371,3 +1379,55 @@ fn archive_password_with_nul_byte_rejected() {
13711379
"passwords containing NUL must be rejected, not panic",
13721380
);
13731381
}
1382+
1383+
// Arbitrary binary blob that matches no libarchive format handler. Used to
1384+
// prove the "raw" handler is the thing making this parseable: strict mode
1385+
// errors, raw_format(true) accepts it as a single "data" entry.
1386+
const NON_ARCHIVE_BYTES: &[u8] = &[
1387+
0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd, 0xfc, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80,
1388+
];
1389+
1390+
#[test]
1391+
fn list_archive_files_rejects_non_archive_bytes() {
1392+
let source = Cursor::new(NON_ARCHIVE_BYTES);
1393+
assert!(
1394+
list_archive_files(source).is_err(),
1395+
"arbitrary bytes must no longer be listed as a single \"data\" entry",
1396+
);
1397+
}
1398+
1399+
#[test]
1400+
fn uncompress_archive_rejects_non_archive_bytes() {
1401+
let source = Cursor::new(NON_ARCHIVE_BYTES);
1402+
let dir = tempfile::TempDir::new().unwrap();
1403+
assert!(uncompress_archive(source, dir.path(), Ownership::Ignore).is_err());
1404+
}
1405+
1406+
#[test]
1407+
fn iterator_default_rejects_non_archive_bytes() {
1408+
let source = Cursor::new(NON_ARCHIVE_BYTES);
1409+
let saw_err = match ArchiveIterator::from_read(source) {
1410+
Err(_) => true,
1411+
Ok(iter) => iter.into_iter().any(|c| matches!(c, ArchiveContents::Err(_))),
1412+
};
1413+
assert!(
1414+
saw_err,
1415+
"strict iterator must surface an error on non-archive input"
1416+
);
1417+
}
1418+
1419+
#[test]
1420+
fn iterator_raw_format_opt_in_accepts_non_archive_bytes() {
1421+
let source = Cursor::new(NON_ARCHIVE_BYTES);
1422+
let mut names = Vec::new();
1423+
for content in ArchiveIteratorBuilder::new(source)
1424+
.raw_format(true)
1425+
.build()
1426+
.expect("raw_format(true) should accept arbitrary bytes")
1427+
{
1428+
if let ArchiveContents::StartOfEntry(name, _) = content {
1429+
names.push(name);
1430+
}
1431+
}
1432+
assert_eq!(names, vec!["data".to_string()]);
1433+
}

0 commit comments

Comments
 (0)