Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ci/fixture-images.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,9 @@ docker://registry.access.redhat.com/ubi10/ubi:10.1-1772441712 docker://ghcr.io/c
# centos-bootc stream10, pinned by manifest digest.
# Used in: crates/integration-tests/src/tests/digest_stability.rs
docker://quay.io/centos-bootc/centos-bootc@sha256:d1913e3d616b9acb7fc2e3331be8baf844048bca2681a23d34e53e75eb18f3d0 docker://ghcr.io/composefs/ci-fixture-centos-bootc:stream10-d1913e3d

# Ubuntu 26.04 (resolute), pinned by manifest digest.
# Uses umoci/PAX format tars with GNU record padding — specifically exercises
# the trailing-padding preservation fix in split_async().
# Used in: crates/integration-tests/src/tests/digest_stability.rs
docker://docker.io/library/ubuntu@sha256:d31acef2a964b6df1f2b7e20a1525c4f2378024e087a4f8a8a9a4247e6a79573 docker://ghcr.io/composefs/ci-fixture-ubuntu-resolute:26.04-d31acef2
64 changes: 61 additions & 3 deletions crates/composefs-integration-tests/src/tests/digest_stability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,17 @@ use crate::{cfsctl, integration_test};
struct ContainerImage {
/// Human-readable label for test output.
label: &'static str,
/// OCI image reference (docker:// prefix).
/// Primary OCI image reference — the ghcr.io mirror (docker:// prefix).
///
/// The mirror is populated by the `mirror-fixture-images.yml` workflow after
/// a PR that adds a new entry to `ci/fixture-images.txt` is merged to main.
/// During the PR itself the mirror does not exist yet, so the test falls back
/// to `upstream_ref` when this pull fails.
image_ref: &'static str,
/// Upstream OCI image reference used as a fallback when `image_ref` is
/// unavailable (e.g. a PR that adds a new mirror entry before it has been
/// pushed). Should be pinned by digest for reproducibility.
upstream_ref: &'static str,
/// Expected composefs image ID without `--bootable`.
expected_id: &'static str,
/// Expected composefs image ID with `--bootable`, or `None` if the
Expand All @@ -25,6 +34,7 @@ struct ContainerImage {
const UBI10: ContainerImage = ContainerImage {
label: "ubi10",
image_ref: "docker://ghcr.io/composefs/ci-fixture-ubi10:10.1-1772441712",
upstream_ref: "docker://registry.access.redhat.com/ubi10/ubi:10.1-1772441712",
expected_id: "ff8dad033a3e6015d63d6b00c16918da27bf96cc8ddd824e521549db01013227\
87c30a3f49e5716f8f6052d78b46308dfaaccf0dfc504d26fe58d468810c0b0e",
expected_bootable_id: None,
Expand All @@ -37,6 +47,7 @@ const UBI10: ContainerImage = ContainerImage {
const CENTOS_BOOTC: ContainerImage = ContainerImage {
label: "centos-bootc",
image_ref: "docker://ghcr.io/composefs/ci-fixture-centos-bootc:stream10-d1913e3d",
upstream_ref: "docker://quay.io/centos-bootc/centos-bootc@sha256:d1913e3d616b9acb7fc2e3331be8baf844048bca2681a23d34e53e75eb18f3d0",
expected_id: "ad575e0570dfb74cbc837f41715d3fba890dd983d992332eaeee93493ce112ee\
50d3dc5f6f2a3214cc92412fe3ae936e2e9c0eac24ea787e83ef13c0a718a193",
expected_bootable_id: Some(
Expand All @@ -45,16 +56,63 @@ const CENTOS_BOOTC: ContainerImage = ContainerImage {
),
};

// Ubuntu 26.04 (resolute), pinned by manifest digest.
// Mirrored from docker.io/library/ubuntu via ci/fixture-images.txt.
// Ubuntu 26.04 uses umoci/PAX format tars produced by Rockcraft, which emit
// GNU-style record padding after the end-of-archive blocks. This exercises
// the trailing-padding preservation fix in split_async() — without it the
// reconstructed tar is shorter than the original and the diff_id checksum
// fails in the @digest code path of create_filesystem().
const UBUNTU_RESOLUTE: ContainerImage = ContainerImage {
label: "ubuntu-resolute",
image_ref: "docker://ghcr.io/composefs/ci-fixture-ubuntu-resolute:26.04-d31acef2",
upstream_ref: "docker://docker.io/library/ubuntu@sha256:d31acef2a964b6df1f2b7e20a1525c4f2378024e087a4f8a8a9a4247e6a79573",
expected_id: "150caabb982d7005db1a1d0480d57a95e84b160aa2b1159f9aae66e92ba07b36\
11ea38e1836eff923dc3a1a617c18494757be0f5e3db16cc7a522981b3f42d40",
expected_bootable_id: None,
};

/// All container images to test.
const CONTAINER_IMAGES: &[&ContainerImage] = &[&UBI10, &CENTOS_BOOTC];
const CONTAINER_IMAGES: &[&ContainerImage] = &[&UBI10, &CENTOS_BOOTC, &UBUNTU_RESOLUTE];

/// Return `true` if network tests should be skipped.
fn skip_network() -> bool {
std::env::var_os("COMPOSEFS_SKIP_NETWORK").is_some()
}

/// Pull an OCI image and return the config digest from the pull output.
///
/// Tries `image.image_ref` (the ghcr.io mirror) first. If that fails —
/// which is expected for PRs that add a new mirror entry before it has been
/// pushed — falls back to `image.upstream_ref` with a warning.
fn pull_image(
sh: &Shell,
cfsctl: &std::path::Path,
repo: &std::path::Path,
image: &ContainerImage,
name: &str,
) -> Result<String> {
let candidates = [(image.image_ref, false), (image.upstream_ref, true)];
let mut last_err = None;
for (image_ref, is_fallback) in candidates {
if is_fallback {
eprintln!(
"WARNING: mirror pull failed for {}; falling back to upstream {image_ref}",
image.label,
);
}
match try_pull_image(sh, cfsctl, repo, image_ref, name) {
Ok(config) => return Ok(config),
Err(e) => {
eprintln!("Pull of {image_ref} failed: {e:#}");
last_err = Some(e);
}
}
}
Err(last_err.unwrap())
}

fn try_pull_image(
sh: &Shell,
cfsctl: &std::path::Path,
repo: &std::path::Path,
Expand Down Expand Up @@ -126,7 +184,7 @@ fn test_oci_container_digest_stability() -> Result<()> {
cmd!(sh, "{cfsctl} --repo {repo} init --insecure").read()?;

eprintln!("Pulling {} (this may take a while)...", image.label);
let config = pull_image(&sh, &cfsctl, repo, image.image_ref, image.label)?;
let config = pull_image(&sh, &cfsctl, repo, image, image.label)?;

// Plain (non-bootable) image ID
let plain_id = compute_id(&sh, &cfsctl, repo, &config, false)?;
Expand Down
133 changes: 101 additions & 32 deletions crates/composefs-oci/src/tar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,26 @@ pub async fn split_async<ObjectID: FsVerityHashValue>(
}
ParseEvent::End { consumed } => {
builder.push_inline(&buf.split_to(consumed));
// GNU tar pads archives to a "record size" (typically 20×512 = 10240 bytes).
// After the two end-of-archive zero blocks (consumed above), there may be
// additional zero-padding blocks before EOF. We must store them to reproduce
// the original byte stream faithfully for diff_id checksum verification.
//
// Note: ideally tar-core would surface these extra bytes through
// ParseEvent::End::consumed so callers don't need to know about record
// granularity; this drain is a workaround until that is addressed upstream.
// See https://github.com/composefs/tar-core/pull/24 which will obviate this.
if !buf.is_empty() {
builder.push_inline(&buf.split());
}
loop {
buf.reserve(IO_BUF_CAPACITY);
let n = tar_stream.read_buf(&mut buf).await?;
if n == 0 {
break;
}
builder.push_inline(&buf.split());
}
break;
}
ParseEvent::SparseEntry { .. } => {
Expand Down Expand Up @@ -582,6 +602,56 @@ mod tests {
assert!(get_entry(&mut reader).unwrap().is_none());
}

/// Verify that a tar without any trailing record padding survives a byte-exact
/// roundtrip. This is the common case for tars produced by the Rust `tar` crate
/// and most standard tooling; it forms a baseline paired with the padding test below.
#[test]
fn test_no_record_padding_roundtrip() {
let mut tar_data = Vec::new();
{
let mut builder = Builder::new(&mut tar_data);
append_file(&mut builder, "hello.txt", b"hello world").unwrap();
builder.finish().unwrap();
}
// Confirm the Rust tar crate did not add GNU record padding.
const GNU_RECORD_SIZE: usize = 20 * 512;
assert_ne!(
tar_data.len() % GNU_RECORD_SIZE,
0,
"expected tar without GNU record padding for this test"
);
roundtrip_tar_bytes(&tar_data);
}

/// Verify that GNU-style record padding (zero bytes after the two end-of-archive
/// blocks, filling the archive out to a 20×512 record boundary) is preserved
/// byte-for-bit through split_async → cat(). Without the fix, the reconstructed
/// tar was shorter than the original, causing diff_id checksum failures for images
/// produced by umoci/Rockcraft (e.g. Ubuntu 26.04).
#[test]
fn test_gnu_record_padding_roundtrip() {
const GNU_RECORD_SIZE: usize = 20 * 512; // 10240 bytes

let mut tar_data = Vec::new();
{
let mut builder = Builder::new(&mut tar_data);
append_file(&mut builder, "hello.txt", b"hello world").unwrap();
builder.finish().unwrap();
}

// Simulate GNU record padding: extend to the next record boundary with zeros.
let remainder = tar_data.len() % GNU_RECORD_SIZE;
if remainder != 0 {
tar_data.resize(tar_data.len() + (GNU_RECORD_SIZE - remainder), 0);
}

// The tar length must now be a multiple of the record size.
assert_eq!(tar_data.len() % GNU_RECORD_SIZE, 0);

// roundtrip_tar_bytes asserts byte-exact reproduction through the splitstream.
roundtrip_tar_bytes(&tar_data);
}

#[tokio::test]
async fn test_single_small_file() {
let mut tar_data = Vec::new();
Expand Down Expand Up @@ -1387,6 +1457,37 @@ mod tests {
assert_eq!(entries[0].path, PathBuf::from(format!("/{full_path}")));
}

/// Byte-exact roundtrip: original tar bytes -> split_async -> splitstream -> cat()
/// -> assert bytes match. Catches any corruption in either the inline or
/// external code paths, including missing padding or off-by-one errors.
fn roundtrip_tar_bytes(tar_data: &[u8]) {
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let repo = create_test_repository().unwrap();
let (object_id, _stats) = split_async(tar_data, repo.clone(), TAR_LAYER_CONTENT_TYPE)
.await
.unwrap();

let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
repo.open_object(&object_id).unwrap().into(),
Some(TAR_LAYER_CONTENT_TYPE),
)
.unwrap();

let mut reassembled = Vec::new();
reader.cat(&repo, &mut reassembled).unwrap();
assert_eq!(
reassembled.len(),
tar_data.len(),
"reassembled tar length mismatch"
);
assert_eq!(
reassembled, tar_data,
"reassembled tar bytes differ from original"
);
});
}

/// Property-based tests for tar path handling.
mod proptest_tests {
use super::*;
Expand Down Expand Up @@ -1527,38 +1628,6 @@ mod tests {
tar_data
}

/// Byte-exact roundtrip: original tar -> split_async -> splitstream -> cat()
/// -> assert bytes match. Catches any corruption in either the inline or
/// external code paths, including missing padding or off-by-one errors.
fn roundtrip_tar_bytes(tar_data: &[u8]) {
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let repo = create_test_repository().unwrap();
let (object_id, _stats) =
split_async(tar_data, repo.clone(), TAR_LAYER_CONTENT_TYPE)
.await
.unwrap();

let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
repo.open_object(&object_id).unwrap().into(),
Some(TAR_LAYER_CONTENT_TYPE),
)
.unwrap();

let mut reassembled = Vec::new();
reader.cat(&repo, &mut reassembled).unwrap();
assert_eq!(
reassembled.len(),
tar_data.len(),
"reassembled tar length mismatch"
);
assert_eq!(
reassembled, tar_data,
"reassembled tar bytes differ from original"
);
});
}

proptest! {
#![proptest_config(ProptestConfig::with_cases(64))]

Expand Down