Skip to content

Commit a34e9bf

Browse files
committed
feat: expose per-entry sizes via list_archive_entries
Adds list_archive_entries (and _with_encoding variant) returning ArchiveEntryInfo { path, size } so callers can obtain uncompressed sizes from the archive header without extracting the contents. Mirrored across async_support, futures_support, and tokio_support. list_archive_files_with_encoding is reimplemented on top of list_archive_entries_with_encoding to share the header-walk loop. ArchiveIterator already surfaces the same stat.st_size on StartOfEntry for streaming users; its docs now call this out. Closes #134
1 parent 59dd62c commit a34e9bf

7 files changed

Lines changed: 199 additions & 9 deletions

File tree

CHANGES.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44

55
## [Unreleased] - ReleaseDate
66

7+
* Add `list_archive_entries` (and `_with_encoding` variant) returning path and
8+
uncompressed size per entry, so callers can obtain per-file sizes without
9+
extracting the archive. Mirrored in `async_support`, `futures_support`, and
10+
`tokio_support`. `ArchiveIterator`'s `StartOfEntry` already exposes the same
11+
`stat.st_size` for streaming users [#134]
712
* Fix partial unfinished archive reads [#133]
813
* Add missing `advapi32` link library on Windows builds [#141]
914
* Propagate `ENOSPC` during archive extraction instead of silently truncating files [#144]

src/async_support.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,36 @@ where
301301
wrap_async_seek_read(blocking_executor, source, crate::list_archive_files).await?
302302
}
303303

304+
/// Async version of
305+
/// [`list_archive_entries_with_encoding`](crate::
306+
/// list_archive_entries_with_encoding).
307+
pub async fn list_archive_entries_with_encoding<B, R>(
308+
blocking_executor: B,
309+
source: R,
310+
decode: DecodeCallback,
311+
) -> Result<Vec<crate::ArchiveEntryInfo>>
312+
where
313+
B: BlockingExecutor,
314+
R: AsyncRead + AsyncSeek + Unpin,
315+
{
316+
wrap_async_seek_read(blocking_executor, source, move |source| {
317+
crate::list_archive_entries_with_encoding(source, decode)
318+
})
319+
.await?
320+
}
321+
322+
/// Async version of [`list_archive_entries`](crate::list_archive_entries).
323+
pub async fn list_archive_entries<B, R>(
324+
blocking_executor: B,
325+
source: R,
326+
) -> Result<Vec<crate::ArchiveEntryInfo>>
327+
where
328+
B: BlockingExecutor,
329+
R: AsyncRead + AsyncSeek + Unpin,
330+
{
331+
wrap_async_seek_read(blocking_executor, source, crate::list_archive_entries).await?
332+
}
333+
304334
/// Async version of [`uncompress_data`](crate::uncompress_data).
305335
pub async fn uncompress_data<B, R, W>(blocking_executor: B, source: R, target: W) -> Result<usize>
306336
where

src/futures_support.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,28 @@ where
4747
async_support::list_archive_files(FUTURES_BLOCKING_EXECUTOR, source).await
4848
}
4949

50+
/// Async version of
51+
/// [`list_archive_entries_with_encoding`](crate::
52+
/// list_archive_entries_with_encoding).
53+
pub async fn list_archive_entries_with_encoding<R>(
54+
source: R,
55+
decode: DecodeCallback,
56+
) -> Result<Vec<crate::ArchiveEntryInfo>>
57+
where
58+
R: AsyncRead + AsyncSeek + Unpin,
59+
{
60+
async_support::list_archive_entries_with_encoding(FUTURES_BLOCKING_EXECUTOR, source, decode)
61+
.await
62+
}
63+
64+
/// Async version of [`list_archive_entries`](crate::list_archive_entries).
65+
pub async fn list_archive_entries<R>(source: R) -> Result<Vec<crate::ArchiveEntryInfo>>
66+
where
67+
R: AsyncRead + AsyncSeek + Unpin,
68+
{
69+
async_support::list_archive_entries(FUTURES_BLOCKING_EXECUTOR, source).await
70+
}
71+
5072
/// Async version of [`uncompress_data`](crate::uncompress_data).
5173
pub async fn uncompress_data<R, W>(source: R, target: W) -> Result<usize>
5274
where

src/iterator.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,10 @@ impl<R: Read + Seek> ArchiveIterator<R> {
259259
/// Iterate over the contents of an archive, streaming the contents of each
260260
/// entry in small chunks.
261261
///
262+
/// The [`ArchiveContents::StartOfEntry`] variant carries the entry's
263+
/// `stat` struct, so `stat.st_size` gives the uncompressed size reported
264+
/// by the archive header without having to consume the data chunks.
265+
///
262266
/// ```no_run
263267
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
264268
/// use compress_tools::*;
@@ -273,7 +277,10 @@ impl<R: Read + Seek> ArchiveIterator<R> {
273277
///
274278
/// for content in &mut iter {
275279
/// match content {
276-
/// ArchiveContents::StartOfEntry(s, _) => name = s,
280+
/// ArchiveContents::StartOfEntry(s, stat) => {
281+
/// name = s;
282+
/// println!("header reports {} bytes for {}", stat.st_size, name);
283+
/// }
277284
/// ArchiveContents::DataChunk(v) => size += v.len(),
278285
/// ArchiveContents::EndOfEntry => {
279286
/// println!("Entry {} was {} bytes", name, size);

src/lib.rs

Lines changed: 88 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ pub struct stat {
101101
pub st_ctime: libc::time_t,
102102
}
103103

104+
/// Path and uncompressed size for a single archive entry.
105+
///
106+
/// `size` comes from the archive header and may be `0` for formats that do
107+
/// not record it there (some raw compressed streams, ZIP entries using a
108+
/// data descriptor). Tar and standard ZIP populate it reliably.
109+
#[derive(Clone, Debug)]
110+
pub struct ArchiveEntryInfo {
111+
pub path: String,
112+
pub size: u64,
113+
}
114+
104115
/// Determine the ownership behavior when unpacking the archive.
105116
#[derive(Clone, Copy, Debug)]
106117
pub enum Ownership {
@@ -146,6 +157,61 @@ pub(crate) fn decode_utf8(bytes: &[u8]) -> Result<String> {
146157
/// # }
147158
/// ```
148159
pub fn list_archive_files_with_encoding<R>(source: R, decode: DecodeCallback) -> Result<Vec<String>>
160+
where
161+
R: Read + Seek,
162+
{
163+
Ok(list_archive_entries_with_encoding(source, decode)?
164+
.into_iter()
165+
.map(|e| e.path)
166+
.collect())
167+
}
168+
169+
/// Get all files in a archive using `source` as a reader.
170+
/// # Example
171+
///
172+
/// ```no_run
173+
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
174+
/// use compress_tools::*;
175+
/// use std::fs::File;
176+
///
177+
/// let mut source = File::open("tree.tar")?;
178+
///
179+
/// let file_list = list_archive_files(&mut source)?;
180+
/// # Ok(())
181+
/// # }
182+
/// ```
183+
pub fn list_archive_files<R>(source: R) -> Result<Vec<String>>
184+
where
185+
R: Read + Seek,
186+
{
187+
list_archive_files_with_encoding(source, decode_utf8)
188+
}
189+
190+
/// Get entry metadata (path and uncompressed size) for every entry in an
191+
/// archive without extracting their contents.
192+
///
193+
/// See [`ArchiveEntryInfo`] for caveats on `size` reporting across formats.
194+
///
195+
/// # Example
196+
///
197+
/// ```no_run
198+
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
199+
/// use compress_tools::*;
200+
/// use std::fs::File;
201+
///
202+
/// let mut source = File::open("tree.tar")?;
203+
/// let decode_utf8 = |bytes: &[u8]| Ok(std::str::from_utf8(bytes)?.to_owned());
204+
///
205+
/// for entry in list_archive_entries_with_encoding(&mut source, decode_utf8)? {
206+
/// println!("{}: {} bytes", entry.path, entry.size);
207+
/// }
208+
/// # Ok(())
209+
/// # }
210+
/// ```
211+
pub fn list_archive_entries_with_encoding<R>(
212+
source: R,
213+
decode: DecodeCallback,
214+
) -> Result<Vec<ArchiveEntryInfo>>
149215
where
150216
R: Read + Seek,
151217
{
@@ -154,23 +220,28 @@ where
154220
Ownership::Ignore,
155221
source,
156222
|archive_reader, _, mut entry| unsafe {
157-
let mut file_list = Vec::new();
223+
let mut entries = Vec::new();
158224
loop {
159225
match ffi::archive_read_next_header(archive_reader, &mut entry) {
160-
ffi::ARCHIVE_EOF => return Ok(file_list),
226+
ffi::ARCHIVE_EOF => return Ok(entries),
161227
value => archive_result(value, archive_reader)?,
162228
}
163229

164230
let _utf8_guard = ffi::WindowsUTF8LocaleGuard::new();
165231
let cstr = libarchive_entry_pathname(entry)?;
166-
let file_name = decode(cstr.to_bytes())?;
167-
file_list.push(file_name);
232+
let path = decode(cstr.to_bytes())?;
233+
let size = libarchive_entry_size(entry);
234+
entries.push(ArchiveEntryInfo { path, size });
168235
}
169236
},
170237
)
171238
}
172239

173-
/// Get all files in a archive using `source` as a reader.
240+
/// Get entry metadata (path and uncompressed size) for every entry in an
241+
/// archive without extracting their contents.
242+
///
243+
/// See [`ArchiveEntryInfo`] for caveats on `size` reporting across formats.
244+
///
174245
/// # Example
175246
///
176247
/// ```no_run
@@ -180,15 +251,17 @@ where
180251
///
181252
/// let mut source = File::open("tree.tar")?;
182253
///
183-
/// let file_list = list_archive_files(&mut source)?;
254+
/// for entry in list_archive_entries(&mut source)? {
255+
/// println!("{}: {} bytes", entry.path, entry.size);
256+
/// }
184257
/// # Ok(())
185258
/// # }
186259
/// ```
187-
pub fn list_archive_files<R>(source: R) -> Result<Vec<String>>
260+
pub fn list_archive_entries<R>(source: R) -> Result<Vec<ArchiveEntryInfo>>
188261
where
189262
R: Read + Seek,
190263
{
191-
list_archive_files_with_encoding(source, decode_utf8)
264+
list_archive_entries_with_encoding(source, decode_utf8)
192265
}
193266

194267
/// Uncompress a file using the `source` need as reader and the `target` as a
@@ -621,6 +694,13 @@ fn libarchive_copy_data(
621694
}
622695
}
623696

697+
fn libarchive_entry_size(entry: *mut ffi::archive_entry) -> u64 {
698+
// `st_size` is `i32` on Windows (see the `stat` struct above) and `i64`
699+
// on Unix. Widen through `i64` to keep the cast platform-agnostic.
700+
let size = unsafe { (*ffi::archive_entry_stat(entry)).st_size } as i64;
701+
size.max(0) as u64
702+
}
703+
624704
fn libarchive_entry_pathname<'a>(entry: *mut ffi::archive_entry) -> Result<&'a CStr> {
625705
let pathname = unsafe { ffi::archive_entry_pathname(entry) };
626706
if pathname.is_null() {

src/tokio_support.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,32 @@ where
5353
async_support::list_archive_files(TOKIO_BLOCKING_EXECUTOR, source.compat()).await
5454
}
5555

56+
/// Async version of
57+
/// [`list_archive_entries_with_encoding`](crate::
58+
/// list_archive_entries_with_encoding).
59+
pub async fn list_archive_entries_with_encoding<R>(
60+
source: R,
61+
decode: DecodeCallback,
62+
) -> Result<Vec<crate::ArchiveEntryInfo>>
63+
where
64+
R: AsyncRead + AsyncSeek + Unpin,
65+
{
66+
async_support::list_archive_entries_with_encoding(
67+
TOKIO_BLOCKING_EXECUTOR,
68+
source.compat(),
69+
decode,
70+
)
71+
.await
72+
}
73+
74+
/// Async version of [`list_archive_entries`](crate::list_archive_entries).
75+
pub async fn list_archive_entries<R>(source: R) -> Result<Vec<crate::ArchiveEntryInfo>>
76+
where
77+
R: AsyncRead + AsyncSeek + Unpin,
78+
{
79+
async_support::list_archive_entries(TOKIO_BLOCKING_EXECUTOR, source.compat()).await
80+
}
81+
5682
/// Async version of [`uncompress_data`](crate::uncompress_data).
5783
pub async fn uncompress_data<R, W>(source: R, target: W) -> Result<usize>
5884
where

tests/integration_test.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,26 @@ fn successfully_list_archive_files() {
153153
);
154154
}
155155

156+
#[test]
157+
fn successfully_list_archive_entries() {
158+
let source = std::fs::File::open("tests/fixtures/tree.tar").unwrap();
159+
160+
let entries = list_archive_entries(source).unwrap();
161+
let observed: Vec<(String, u64)> = entries.into_iter().map(|e| (e.path, e.size)).collect();
162+
163+
assert_eq!(
164+
observed,
165+
vec![
166+
("tree/".to_string(), 0),
167+
("tree/branch1/".to_string(), 0),
168+
("tree/branch1/leaf".to_string(), 12),
169+
("tree/branch2/".to_string(), 0),
170+
("tree/branch2/leaf".to_string(), 14),
171+
],
172+
"entry list (path, size) did not match"
173+
);
174+
}
175+
156176
#[test]
157177
fn list_archive_zip() {
158178
let source = std::fs::File::open("tests/fixtures/test.zip").unwrap();

0 commit comments

Comments
 (0)