|
| 1 | +use std::collections::HashSet; |
| 2 | +use std::io::Read; |
| 3 | +use std::path::Component; |
| 4 | +use std::path::Path; |
| 5 | +use std::path::PathBuf; |
| 6 | + |
| 7 | +use flate2::read::GzDecoder; |
| 8 | + |
| 9 | +/// Unpack a gzipped tarball into `dir`, dropping its top-level directory and marking |
| 10 | +/// every file read only |
| 11 | +/// |
| 12 | +/// CRAN package tarballs and R source tarballs both wrap their content in a single |
| 13 | +/// top-level directory (`{package}/` or `R-{version}/`). We strip it so the content lands |
| 14 | +/// directly under `dir`. |
| 15 | +/// |
| 16 | +/// Files are marked as read only to discourage accidental edits. |
| 17 | +pub(crate) fn extract(reader: impl Read, dir: &Path) -> anyhow::Result<()> { |
| 18 | + let gz = GzDecoder::new(reader); |
| 19 | + let mut archive = tar::Archive::new(gz); |
| 20 | + |
| 21 | + // Parent directories we've already created |
| 22 | + let mut created: HashSet<PathBuf> = HashSet::new(); |
| 23 | + |
| 24 | + for entry in archive.entries()? { |
| 25 | + let mut entry = entry?; |
| 26 | + let is_file = entry.header().entry_type().is_file(); |
| 27 | + |
| 28 | + let path = entry.path()?.into_owned(); |
| 29 | + let Some(relative) = strip_top_level(&path) else { |
| 30 | + // The top-level directory entry itself, or an unsafe path, nothing to unpack |
| 31 | + continue; |
| 32 | + }; |
| 33 | + |
| 34 | + let destination = dir.join(relative); |
| 35 | + |
| 36 | + // We must create parent directories before unpacking into them. We remember ones |
| 37 | + // we've already created to avoid thousands of redundant `create_dir_all()` calls. |
| 38 | + if let Some(parent) = destination.parent() { |
| 39 | + if !created.contains(parent) { |
| 40 | + std::fs::create_dir_all(parent)?; |
| 41 | + created.insert(parent.to_path_buf()); |
| 42 | + } |
| 43 | + } |
| 44 | + |
| 45 | + entry.unpack(&destination)?; |
| 46 | + |
| 47 | + if is_file { |
| 48 | + oak_fs::permissions::set_readonly(&destination)?; |
| 49 | + } |
| 50 | + } |
| 51 | + |
| 52 | + Ok(()) |
| 53 | +} |
| 54 | + |
| 55 | +/// Strip the single top-level directory from a tarball entry path |
| 56 | +/// |
| 57 | +/// Returns `None` for the top-level directory entry itself, or for any unsafe path |
| 58 | +/// (absolute, or containing `..`) that could escape the destination. |
| 59 | +fn strip_top_level(path: &Path) -> Option<&Path> { |
| 60 | + let mut components = path.components(); |
| 61 | + components.next()?; |
| 62 | + |
| 63 | + let rest = components.as_path(); |
| 64 | + |
| 65 | + if rest.as_os_str().is_empty() { |
| 66 | + // The top-level directory entry itself |
| 67 | + return None; |
| 68 | + } |
| 69 | + |
| 70 | + if !rest.components().all(|c| matches!(c, Component::Normal(_))) { |
| 71 | + // Something would be strange here! |
| 72 | + return None; |
| 73 | + } |
| 74 | + |
| 75 | + Some(rest) |
| 76 | +} |
0 commit comments