diff --git a/ci/fixture-images.txt b/ci/fixture-images.txt index 07d1de4d..3e70b911 100644 --- a/ci/fixture-images.txt +++ b/ci/fixture-images.txt @@ -16,3 +16,9 @@ docker://registry.access.redhat.com/ubi10/ubi:10.1-1772441712 docker://ghcr.io/c # centos-bootc stream10, pinned by manifest digest. # Used in: crates/integration-tests/src/tests/digest_stability.rs docker://quay.io/centos-bootc/centos-bootc@sha256:d1913e3d616b9acb7fc2e3331be8baf844048bca2681a23d34e53e75eb18f3d0 docker://ghcr.io/composefs/ci-fixture-centos-bootc:stream10-d1913e3d + +# Ubuntu 26.04 (resolute), pinned by manifest digest. +# Uses umoci/PAX format tars with GNU record padding — specifically exercises +# the trailing-padding preservation fix in split_async(). +# Used in: crates/integration-tests/src/tests/digest_stability.rs +docker://docker.io/library/ubuntu@sha256:d31acef2a964b6df1f2b7e20a1525c4f2378024e087a4f8a8a9a4247e6a79573 docker://ghcr.io/composefs/ci-fixture-ubuntu-resolute:26.04-d31acef2 diff --git a/crates/composefs-integration-tests/src/tests/digest_stability.rs b/crates/composefs-integration-tests/src/tests/digest_stability.rs index 704f7dad..11fb5b95 100644 --- a/crates/composefs-integration-tests/src/tests/digest_stability.rs +++ b/crates/composefs-integration-tests/src/tests/digest_stability.rs @@ -9,8 +9,17 @@ use crate::{cfsctl, integration_test}; struct ContainerImage { /// Human-readable label for test output. label: &'static str, - /// OCI image reference (docker:// prefix). + /// Primary OCI image reference — the ghcr.io mirror (docker:// prefix). + /// + /// The mirror is populated by the `mirror-fixture-images.yml` workflow after + /// a PR that adds a new entry to `ci/fixture-images.txt` is merged to main. + /// During the PR itself the mirror does not exist yet, so the test falls back + /// to `upstream_ref` when this pull fails. image_ref: &'static str, + /// Upstream OCI image reference used as a fallback when `image_ref` is + /// unavailable (e.g. a PR that adds a new mirror entry before it has been + /// pushed). Should be pinned by digest for reproducibility. + upstream_ref: &'static str, /// Expected composefs image ID without `--bootable`. expected_id: &'static str, /// Expected composefs image ID with `--bootable`, or `None` if the @@ -25,6 +34,7 @@ struct ContainerImage { const UBI10: ContainerImage = ContainerImage { label: "ubi10", image_ref: "docker://ghcr.io/composefs/ci-fixture-ubi10:10.1-1772441712", + upstream_ref: "docker://registry.access.redhat.com/ubi10/ubi:10.1-1772441712", expected_id: "ff8dad033a3e6015d63d6b00c16918da27bf96cc8ddd824e521549db01013227\ 87c30a3f49e5716f8f6052d78b46308dfaaccf0dfc504d26fe58d468810c0b0e", expected_bootable_id: None, @@ -37,6 +47,7 @@ const UBI10: ContainerImage = ContainerImage { const CENTOS_BOOTC: ContainerImage = ContainerImage { label: "centos-bootc", image_ref: "docker://ghcr.io/composefs/ci-fixture-centos-bootc:stream10-d1913e3d", + upstream_ref: "docker://quay.io/centos-bootc/centos-bootc@sha256:d1913e3d616b9acb7fc2e3331be8baf844048bca2681a23d34e53e75eb18f3d0", expected_id: "ad575e0570dfb74cbc837f41715d3fba890dd983d992332eaeee93493ce112ee\ 50d3dc5f6f2a3214cc92412fe3ae936e2e9c0eac24ea787e83ef13c0a718a193", expected_bootable_id: Some( @@ -45,8 +56,24 @@ const CENTOS_BOOTC: ContainerImage = ContainerImage { ), }; +// Ubuntu 26.04 (resolute), pinned by manifest digest. +// Mirrored from docker.io/library/ubuntu via ci/fixture-images.txt. +// Ubuntu 26.04 uses umoci/PAX format tars produced by Rockcraft, which emit +// GNU-style record padding after the end-of-archive blocks. This exercises +// the trailing-padding preservation fix in split_async() — without it the +// reconstructed tar is shorter than the original and the diff_id checksum +// fails in the @digest code path of create_filesystem(). +const UBUNTU_RESOLUTE: ContainerImage = ContainerImage { + label: "ubuntu-resolute", + image_ref: "docker://ghcr.io/composefs/ci-fixture-ubuntu-resolute:26.04-d31acef2", + upstream_ref: "docker://docker.io/library/ubuntu@sha256:d31acef2a964b6df1f2b7e20a1525c4f2378024e087a4f8a8a9a4247e6a79573", + expected_id: "150caabb982d7005db1a1d0480d57a95e84b160aa2b1159f9aae66e92ba07b36\ + 11ea38e1836eff923dc3a1a617c18494757be0f5e3db16cc7a522981b3f42d40", + expected_bootable_id: None, +}; + /// All container images to test. -const CONTAINER_IMAGES: &[&ContainerImage] = &[&UBI10, &CENTOS_BOOTC]; +const CONTAINER_IMAGES: &[&ContainerImage] = &[&UBI10, &CENTOS_BOOTC, &UBUNTU_RESOLUTE]; /// Return `true` if network tests should be skipped. fn skip_network() -> bool { @@ -54,7 +81,38 @@ fn skip_network() -> bool { } /// Pull an OCI image and return the config digest from the pull output. +/// +/// Tries `image.image_ref` (the ghcr.io mirror) first. If that fails — +/// which is expected for PRs that add a new mirror entry before it has been +/// pushed — falls back to `image.upstream_ref` with a warning. fn pull_image( + sh: &Shell, + cfsctl: &std::path::Path, + repo: &std::path::Path, + image: &ContainerImage, + name: &str, +) -> Result { + let candidates = [(image.image_ref, false), (image.upstream_ref, true)]; + let mut last_err = None; + for (image_ref, is_fallback) in candidates { + if is_fallback { + eprintln!( + "WARNING: mirror pull failed for {}; falling back to upstream {image_ref}", + image.label, + ); + } + match try_pull_image(sh, cfsctl, repo, image_ref, name) { + Ok(config) => return Ok(config), + Err(e) => { + eprintln!("Pull of {image_ref} failed: {e:#}"); + last_err = Some(e); + } + } + } + Err(last_err.unwrap()) +} + +fn try_pull_image( sh: &Shell, cfsctl: &std::path::Path, repo: &std::path::Path, @@ -126,7 +184,7 @@ fn test_oci_container_digest_stability() -> Result<()> { cmd!(sh, "{cfsctl} --repo {repo} init --insecure").read()?; eprintln!("Pulling {} (this may take a while)...", image.label); - let config = pull_image(&sh, &cfsctl, repo, image.image_ref, image.label)?; + let config = pull_image(&sh, &cfsctl, repo, image, image.label)?; // Plain (non-bootable) image ID let plain_id = compute_id(&sh, &cfsctl, repo, &config, false)?; diff --git a/crates/composefs-oci/src/tar.rs b/crates/composefs-oci/src/tar.rs index 9d72fb07..1fde0246 100644 --- a/crates/composefs-oci/src/tar.rs +++ b/crates/composefs-oci/src/tar.rs @@ -203,6 +203,26 @@ pub async fn split_async( } ParseEvent::End { consumed } => { builder.push_inline(&buf.split_to(consumed)); + // GNU tar pads archives to a "record size" (typically 20×512 = 10240 bytes). + // After the two end-of-archive zero blocks (consumed above), there may be + // additional zero-padding blocks before EOF. We must store them to reproduce + // the original byte stream faithfully for diff_id checksum verification. + // + // Note: ideally tar-core would surface these extra bytes through + // ParseEvent::End::consumed so callers don't need to know about record + // granularity; this drain is a workaround until that is addressed upstream. + // See https://github.com/composefs/tar-core/pull/24 which will obviate this. + if !buf.is_empty() { + builder.push_inline(&buf.split()); + } + loop { + buf.reserve(IO_BUF_CAPACITY); + let n = tar_stream.read_buf(&mut buf).await?; + if n == 0 { + break; + } + builder.push_inline(&buf.split()); + } break; } ParseEvent::SparseEntry { .. } => { @@ -582,6 +602,56 @@ mod tests { assert!(get_entry(&mut reader).unwrap().is_none()); } + /// Verify that a tar without any trailing record padding survives a byte-exact + /// roundtrip. This is the common case for tars produced by the Rust `tar` crate + /// and most standard tooling; it forms a baseline paired with the padding test below. + #[test] + fn test_no_record_padding_roundtrip() { + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + append_file(&mut builder, "hello.txt", b"hello world").unwrap(); + builder.finish().unwrap(); + } + // Confirm the Rust tar crate did not add GNU record padding. + const GNU_RECORD_SIZE: usize = 20 * 512; + assert_ne!( + tar_data.len() % GNU_RECORD_SIZE, + 0, + "expected tar without GNU record padding for this test" + ); + roundtrip_tar_bytes(&tar_data); + } + + /// Verify that GNU-style record padding (zero bytes after the two end-of-archive + /// blocks, filling the archive out to a 20×512 record boundary) is preserved + /// byte-for-bit through split_async → cat(). Without the fix, the reconstructed + /// tar was shorter than the original, causing diff_id checksum failures for images + /// produced by umoci/Rockcraft (e.g. Ubuntu 26.04). + #[test] + fn test_gnu_record_padding_roundtrip() { + const GNU_RECORD_SIZE: usize = 20 * 512; // 10240 bytes + + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + append_file(&mut builder, "hello.txt", b"hello world").unwrap(); + builder.finish().unwrap(); + } + + // Simulate GNU record padding: extend to the next record boundary with zeros. + let remainder = tar_data.len() % GNU_RECORD_SIZE; + if remainder != 0 { + tar_data.resize(tar_data.len() + (GNU_RECORD_SIZE - remainder), 0); + } + + // The tar length must now be a multiple of the record size. + assert_eq!(tar_data.len() % GNU_RECORD_SIZE, 0); + + // roundtrip_tar_bytes asserts byte-exact reproduction through the splitstream. + roundtrip_tar_bytes(&tar_data); + } + #[tokio::test] async fn test_single_small_file() { let mut tar_data = Vec::new(); @@ -1387,6 +1457,37 @@ mod tests { assert_eq!(entries[0].path, PathBuf::from(format!("/{full_path}"))); } + /// Byte-exact roundtrip: original tar bytes -> split_async -> splitstream -> cat() + /// -> assert bytes match. Catches any corruption in either the inline or + /// external code paths, including missing padding or off-by-one errors. + fn roundtrip_tar_bytes(tar_data: &[u8]) { + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let repo = create_test_repository().unwrap(); + let (object_id, _stats) = split_async(tar_data, repo.clone(), TAR_LAYER_CONTENT_TYPE) + .await + .unwrap(); + + let mut reader: SplitStreamReader = SplitStreamReader::new( + repo.open_object(&object_id).unwrap().into(), + Some(TAR_LAYER_CONTENT_TYPE), + ) + .unwrap(); + + let mut reassembled = Vec::new(); + reader.cat(&repo, &mut reassembled).unwrap(); + assert_eq!( + reassembled.len(), + tar_data.len(), + "reassembled tar length mismatch" + ); + assert_eq!( + reassembled, tar_data, + "reassembled tar bytes differ from original" + ); + }); + } + /// Property-based tests for tar path handling. mod proptest_tests { use super::*; @@ -1527,38 +1628,6 @@ mod tests { tar_data } - /// Byte-exact roundtrip: original tar -> split_async -> splitstream -> cat() - /// -> assert bytes match. Catches any corruption in either the inline or - /// external code paths, including missing padding or off-by-one errors. - fn roundtrip_tar_bytes(tar_data: &[u8]) { - let rt = tokio::runtime::Runtime::new().unwrap(); - rt.block_on(async { - let repo = create_test_repository().unwrap(); - let (object_id, _stats) = - split_async(tar_data, repo.clone(), TAR_LAYER_CONTENT_TYPE) - .await - .unwrap(); - - let mut reader: SplitStreamReader = SplitStreamReader::new( - repo.open_object(&object_id).unwrap().into(), - Some(TAR_LAYER_CONTENT_TYPE), - ) - .unwrap(); - - let mut reassembled = Vec::new(); - reader.cat(&repo, &mut reassembled).unwrap(); - assert_eq!( - reassembled.len(), - tar_data.len(), - "reassembled tar length mismatch" - ); - assert_eq!( - reassembled, tar_data, - "reassembled tar bytes differ from original" - ); - }); - } - proptest! { #![proptest_config(ProptestConfig::with_cases(64))]