Skip to content

Commit 336f2dd

Browse files
authored
fix(dl): verify download integrity (#168)
1 parent c9db71d commit 336f2dd

10 files changed

Lines changed: 227 additions & 18 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ semver = "1.0.27"
5858
serde = { version = "1.0.228", features = ["derive"] }
5959
serde_json = { version = "1.0.149", features = ["indexmap"] }
6060
serial_test = "3.3.1"
61+
sha2 = "0.10.9"
6162
soar-config = { version = "0.7.0", path = "crates/soar-config" }
6263
soar-core = { version = "0.15.0", path = "crates/soar-core" }
6364
soar-db = { version = "0.5.1", path = "crates/soar-db" }

crates/soar-core/src/package/install.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -576,11 +576,13 @@ impl PackageInstaller {
576576
callback(Progress::Aborted);
577577
}
578578
// Return error after max retries
579-
return Err(last_error.unwrap_or_else(|| {
580-
DownloadError::Multiple {
581-
errors: vec!["Download failed after 5 retries".into()],
582-
}
583-
}))?;
579+
return Err(last_error
580+
.unwrap_or_else(|| {
581+
DownloadError::Multiple {
582+
errors: vec!["Download failed after 5 retries".into()],
583+
}
584+
})
585+
.into());
584586
}
585587
match dl.clone().execute() {
586588
Ok(_) => {
@@ -605,7 +607,7 @@ impl PackageInstaller {
605607
}
606608
last_error = Some(err);
607609
} else {
608-
return Err(err)?;
610+
return Err(err.into());
609611
}
610612
}
611613
}
@@ -634,6 +636,10 @@ impl PackageInstaller {
634636
.extract(should_extract)
635637
.extract_to(&extract_dir);
636638

639+
if let Some(ref bsum) = self.package.bsum {
640+
dl = dl.checksum(bsum);
641+
}
642+
637643
if let Some(ref cb) = self.progress_callback {
638644
let cb = cb.clone();
639645
dl = dl.progress(move |p| {

crates/soar-dl/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ percent-encoding = { workspace = true }
1717
regex = { workspace = true }
1818
serde = { workspace = true }
1919
serde_json = { workspace = true }
20+
sha2 = { workspace = true }
2021
soar-utils = { workspace = true }
2122
thiserror = { workspace = true }
2223
tracing = { workspace = true }

crates/soar-dl/src/download.rs

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ pub struct Download {
3333
pub extract_to: Option<PathBuf>,
3434
pub on_progress: Option<Arc<dyn Fn(Progress) + Send + Sync>>,
3535
pub ghcr_blob: bool,
36+
pub expected_checksum: Option<String>,
3637
}
3738

3839
impl Download {
@@ -63,9 +64,25 @@ impl Download {
6364
extract_to: None,
6465
on_progress: None,
6566
ghcr_blob: false,
67+
expected_checksum: None,
6668
}
6769
}
6870

71+
/// Sets the expected blake3 checksum (hex) to verify the downloaded file
72+
/// against before it is made executable or extracted.
73+
///
74+
/// # Examples
75+
///
76+
/// ```
77+
/// use soar_dl::download::Download;
78+
///
79+
/// let _ = Download::new("https://example.com/file").checksum("abcdef123456");
80+
/// ```
81+
pub fn checksum(mut self, checksum: impl Into<String>) -> Self {
82+
self.expected_checksum = Some(checksum.into());
83+
self
84+
}
85+
6986
/// Turns on GHCR blob support.
7087
///
7188
/// When enabled, the `Authorization` header is set to `Bearer QQ==`
@@ -257,10 +274,16 @@ impl Download {
257274
// Only skip if there's no resume info (complete download)
258275
// If resume info exists, it's a partial download that should continue
259276
if resume_info.is_none() {
260-
debug!(path = %output_path.display(), "file exists, skipping download");
261-
return Ok(output_path);
277+
if self.verify_checksum(&output_path).is_ok() {
278+
debug!(path = %output_path.display(), "file exists, skipping download");
279+
return Ok(output_path);
280+
}
281+
warn!(path = %output_path.display(), "cached file failed checksum, re-downloading");
282+
fs::remove_file(&output_path)?;
283+
resume_info = None;
284+
} else {
285+
debug!(path = %output_path.display(), "file exists but is partial, resuming download");
262286
}
263-
debug!(path = %output_path.display(), "file exists but is partial, resuming download");
264287
}
265288
OverwriteMode::Force => {
266289
debug!(path = %output_path.display(), "file exists, forcing overwrite");
@@ -287,6 +310,11 @@ impl Download {
287310

288311
self.download_to_file(&output_path, resume_info)?;
289312

313+
if let Err(e) = self.verify_checksum(&output_path) {
314+
fs::remove_file(&output_path).ok();
315+
return Err(e);
316+
}
317+
290318
if is_elf(&output_path) {
291319
trace!(path = %output_path.display(), "detected ELF binary, setting executable permissions");
292320
std::fs::set_permissions(&output_path, Permissions::from_mode(0o755))?;
@@ -310,6 +338,22 @@ impl Download {
310338
Ok(output_path)
311339
}
312340

341+
fn verify_checksum(&self, path: &Path) -> Result<(), DownloadError> {
342+
let Some(ref expected) = self.expected_checksum else {
343+
return Ok(());
344+
};
345+
let actual = soar_utils::hash::calculate_checksum(path)
346+
.map_err(|e| DownloadError::Io(std::io::Error::other(e.to_string())))?;
347+
if actual.eq_ignore_ascii_case(expected) {
348+
Ok(())
349+
} else {
350+
Err(DownloadError::ChecksumMismatch {
351+
expected: expected.clone(),
352+
got: actual,
353+
})
354+
}
355+
}
356+
313357
/// Streams the HTTP response body for this download's URL to standard output.
314358
///
315359
/// # Examples
@@ -502,3 +546,43 @@ fn prompt_overwrite(path: &Path) -> std::io::Result<bool> {
502546

503547
Ok(matches!(line.trim().to_lowercase().as_str(), "y" | "yes"))
504548
}
549+
550+
#[cfg(test)]
551+
mod tests {
552+
use std::io::Write;
553+
554+
use super::*;
555+
556+
fn temp_with(contents: &[u8]) -> tempfile::NamedTempFile {
557+
let mut f = tempfile::NamedTempFile::new().unwrap();
558+
f.write_all(contents).unwrap();
559+
f.flush().unwrap();
560+
f
561+
}
562+
563+
#[test]
564+
fn verify_checksum_ok_when_absent_or_matching() {
565+
let f = temp_with(b"hello soar");
566+
let expected = soar_utils::hash::calculate_checksum(f.path()).unwrap();
567+
568+
let dl = Download::new("https://example.com/x");
569+
assert!(dl.verify_checksum(f.path()).is_ok());
570+
571+
let dl = Download::new("https://example.com/x").checksum(expected.to_uppercase());
572+
assert!(dl.verify_checksum(f.path()).is_ok());
573+
}
574+
575+
#[test]
576+
fn verify_checksum_rejects_mismatch() {
577+
let f = temp_with(b"hello soar");
578+
let dl = Download::new("https://example.com/x").checksum("deadbeef");
579+
match dl.verify_checksum(f.path()) {
580+
Err(DownloadError::ChecksumMismatch {
581+
expected, ..
582+
}) => {
583+
assert_eq!(expected, "deadbeef");
584+
}
585+
other => panic!("expected ChecksumMismatch, got {other:?}"),
586+
}
587+
}
588+
}

crates/soar-dl/src/error.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ pub enum DownloadError {
4545
#[diagnostic(code(soar_dl::unsafe_layer_path))]
4646
UnsafeLayerPath { title: String },
4747

48+
#[error("Checksum mismatch: expected {expected}, got {got}")]
49+
#[diagnostic(code(soar_dl::checksum_mismatch))]
50+
ChecksumMismatch { expected: String, got: String },
51+
52+
#[error("Digest mismatch: expected {expected}, got {got}")]
53+
#[diagnostic(code(soar_dl::digest_mismatch))]
54+
DigestMismatch { expected: String, got: String },
55+
4856
#[error("Invalid response from server")]
4957
#[diagnostic(code(soar_dl::invalid_response))]
5058
InvalidResponse,

crates/soar-dl/src/oci.rs

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use std::{
1111
};
1212

1313
use serde::Deserialize;
14+
use sha2::{Digest, Sha256};
1415
use soar_utils::fs::is_elf;
1516
use tracing::{debug, trace};
1617
use ureq::http::header::{ACCEPT, AUTHORIZATION, ETAG, IF_RANGE, RANGE};
@@ -153,14 +154,46 @@ impl OciLayer {
153154
/// Only the final path component of the title is used, so titles such as
154155
/// `../../etc/passwd`, `/etc/passwd`, or `..` cannot escape `output_dir`.
155156
fn safe_layer_path(output_dir: &Path, title: &str) -> Result<PathBuf, DownloadError> {
156-
let name = Path::new(title)
157-
.file_name()
158-
.ok_or_else(|| DownloadError::UnsafeLayerPath {
157+
let name = Path::new(title).file_name().ok_or_else(|| {
158+
DownloadError::UnsafeLayerPath {
159159
title: title.to_string(),
160-
})?;
160+
}
161+
})?;
161162
Ok(output_dir.join(name))
162163
}
163164

165+
/// Verifies the file at `path` against an OCI content-addressable digest
166+
/// (e.g. `sha256:<hex>`). On mismatch the file is removed and an error returned.
167+
fn verify_layer_digest(path: &Path, digest: &str) -> Result<(), DownloadError> {
168+
let expected = digest.strip_prefix("sha256:").unwrap_or(digest);
169+
170+
let mut file = File::open(path)?;
171+
let mut hasher = Sha256::new();
172+
let mut buffer = [0u8; 8192];
173+
loop {
174+
let n = file.read(&mut buffer)?;
175+
if n == 0 {
176+
break;
177+
}
178+
hasher.update(&buffer[..n]);
179+
}
180+
let got: String = hasher
181+
.finalize()
182+
.iter()
183+
.map(|b| format!("{b:02x}"))
184+
.collect();
185+
186+
if got.eq_ignore_ascii_case(expected) {
187+
Ok(())
188+
} else {
189+
std::fs::remove_file(path).ok();
190+
Err(DownloadError::DigestMismatch {
191+
expected: expected.to_string(),
192+
got,
193+
})
194+
}
195+
}
196+
164197
#[derive(Clone)]
165198
pub struct OciDownload {
166199
reference: OciReference,
@@ -483,6 +516,7 @@ impl OciDownload {
483516
if path.is_file() {
484517
if let Ok(metadata) = path.metadata() {
485518
if metadata.len() == layer.size {
519+
verify_layer_digest(&path, &layer.digest)?;
486520
downloaded += layer.size;
487521
if let Some(ref cb) = self.on_progress {
488522
cb(Progress::Chunk {
@@ -590,6 +624,10 @@ impl OciDownload {
590624
if path.is_file() {
591625
if let Ok(metadata) = path.metadata() {
592626
if metadata.len() == layer.size {
627+
if let Err(e) = verify_layer_digest(&path, &layer.digest) {
628+
errors.lock().unwrap().push(format!("{e}"));
629+
continue;
630+
}
593631
let current = downloaded
594632
.fetch_add(layer.size, Ordering::Relaxed)
595633
+ layer.size;
@@ -766,6 +804,8 @@ impl OciDownload {
766804

767805
let path = dl.execute()?;
768806

807+
verify_layer_digest(&path, &self.reference.tag)?;
808+
769809
Ok(vec![path])
770810
}
771811

@@ -957,6 +997,8 @@ fn download_layer_impl(
957997
}
958998
}
959999

1000+
verify_layer_digest(path, &layer.digest)?;
1001+
9601002
if is_elf(path) {
9611003
trace!(path = %path.display(), "setting executable permissions on ELF binary");
9621004
std::fs::set_permissions(path, Permissions::from_mode(0o755))?;
@@ -975,6 +1017,37 @@ fn download_layer_impl(
9751017
mod tests {
9761018
use super::*;
9771019

1020+
#[test]
1021+
fn verify_layer_digest_accepts_correct_and_rejects_wrong() {
1022+
use std::io::Write;
1023+
1024+
let dir = tempfile::tempdir().unwrap();
1025+
let path = dir.path().join("layer.bin");
1026+
{
1027+
let mut f = File::create(&path).unwrap();
1028+
f.write_all(b"layer-bytes").unwrap();
1029+
}
1030+
1031+
let mut hasher = Sha256::new();
1032+
hasher.update(b"layer-bytes");
1033+
let hex: String = hasher
1034+
.finalize()
1035+
.iter()
1036+
.map(|b| format!("{b:02x}"))
1037+
.collect();
1038+
1039+
assert!(verify_layer_digest(&path, &format!("sha256:{hex}")).is_ok());
1040+
assert!(path.exists());
1041+
1042+
match verify_layer_digest(&path, "sha256:00ff") {
1043+
Err(DownloadError::DigestMismatch {
1044+
..
1045+
}) => {}
1046+
other => panic!("expected DigestMismatch, got {other:?}"),
1047+
}
1048+
assert!(!path.exists(), "file should be removed on digest mismatch");
1049+
}
1050+
9781051
#[test]
9791052
fn safe_layer_path_keeps_plain_name_inside_output_dir() {
9801053
let out = Path::new("/tmp/soar-out");

0 commit comments

Comments
 (0)