@@ -203,6 +203,26 @@ pub async fn split_async<ObjectID: FsVerityHashValue>(
203203 }
204204 ParseEvent :: End { consumed } => {
205205 builder. push_inline ( & buf. split_to ( consumed) ) ;
206+ // GNU tar pads archives to a "record size" (typically 20×512 = 10240 bytes).
207+ // After the two end-of-archive zero blocks (consumed above), there may be
208+ // additional zero-padding blocks before EOF. We must store them to reproduce
209+ // the original byte stream faithfully for diff_id checksum verification.
210+ //
211+ // Note: ideally tar-core would surface these extra bytes through
212+ // ParseEvent::End::consumed so callers don't need to know about record
213+ // granularity; this drain is a workaround until that is addressed upstream.
214+ // See https://github.com/composefs/tar-core/pull/24 which will obviate this.
215+ if !buf. is_empty ( ) {
216+ builder. push_inline ( & buf. split ( ) ) ;
217+ }
218+ loop {
219+ buf. reserve ( IO_BUF_CAPACITY ) ;
220+ let n = tar_stream. read_buf ( & mut buf) . await ?;
221+ if n == 0 {
222+ break ;
223+ }
224+ builder. push_inline ( & buf. split ( ) ) ;
225+ }
206226 break ;
207227 }
208228 ParseEvent :: SparseEntry { .. } => {
@@ -582,6 +602,56 @@ mod tests {
582602 assert ! ( get_entry( & mut reader) . unwrap( ) . is_none( ) ) ;
583603 }
584604
605+ /// Verify that a tar without any trailing record padding survives a byte-exact
606+ /// roundtrip. This is the common case for tars produced by the Rust `tar` crate
607+ /// and most standard tooling; it forms a baseline paired with the padding test below.
608+ #[ test]
609+ fn test_no_record_padding_roundtrip ( ) {
610+ let mut tar_data = Vec :: new ( ) ;
611+ {
612+ let mut builder = Builder :: new ( & mut tar_data) ;
613+ append_file ( & mut builder, "hello.txt" , b"hello world" ) . unwrap ( ) ;
614+ builder. finish ( ) . unwrap ( ) ;
615+ }
616+ // Confirm the Rust tar crate did not add GNU record padding.
617+ const GNU_RECORD_SIZE : usize = 20 * 512 ;
618+ assert_ne ! (
619+ tar_data. len( ) % GNU_RECORD_SIZE ,
620+ 0 ,
621+ "expected tar without GNU record padding for this test"
622+ ) ;
623+ roundtrip_tar_bytes ( & tar_data) ;
624+ }
625+
626+ /// Verify that GNU-style record padding (zero bytes after the two end-of-archive
627+ /// blocks, filling the archive out to a 20×512 record boundary) is preserved
628+ /// byte-for-bit through split_async → cat(). Without the fix, the reconstructed
629+ /// tar was shorter than the original, causing diff_id checksum failures for images
630+ /// produced by umoci/Rockcraft (e.g. Ubuntu 26.04).
631+ #[ test]
632+ fn test_gnu_record_padding_roundtrip ( ) {
633+ const GNU_RECORD_SIZE : usize = 20 * 512 ; // 10240 bytes
634+
635+ let mut tar_data = Vec :: new ( ) ;
636+ {
637+ let mut builder = Builder :: new ( & mut tar_data) ;
638+ append_file ( & mut builder, "hello.txt" , b"hello world" ) . unwrap ( ) ;
639+ builder. finish ( ) . unwrap ( ) ;
640+ }
641+
642+ // Simulate GNU record padding: extend to the next record boundary with zeros.
643+ let remainder = tar_data. len ( ) % GNU_RECORD_SIZE ;
644+ if remainder != 0 {
645+ tar_data. resize ( tar_data. len ( ) + ( GNU_RECORD_SIZE - remainder) , 0 ) ;
646+ }
647+
648+ // The tar length must now be a multiple of the record size.
649+ assert_eq ! ( tar_data. len( ) % GNU_RECORD_SIZE , 0 ) ;
650+
651+ // roundtrip_tar_bytes asserts byte-exact reproduction through the splitstream.
652+ roundtrip_tar_bytes ( & tar_data) ;
653+ }
654+
585655 #[ tokio:: test]
586656 async fn test_single_small_file ( ) {
587657 let mut tar_data = Vec :: new ( ) ;
@@ -1387,6 +1457,37 @@ mod tests {
13871457 assert_eq ! ( entries[ 0 ] . path, PathBuf :: from( format!( "/{full_path}" ) ) ) ;
13881458 }
13891459
1460+ /// Byte-exact roundtrip: original tar bytes -> split_async -> splitstream -> cat()
1461+ /// -> assert bytes match. Catches any corruption in either the inline or
1462+ /// external code paths, including missing padding or off-by-one errors.
1463+ fn roundtrip_tar_bytes ( tar_data : & [ u8 ] ) {
1464+ let rt = tokio:: runtime:: Runtime :: new ( ) . unwrap ( ) ;
1465+ rt. block_on ( async {
1466+ let repo = create_test_repository ( ) . unwrap ( ) ;
1467+ let ( object_id, _stats) = split_async ( tar_data, repo. clone ( ) , TAR_LAYER_CONTENT_TYPE )
1468+ . await
1469+ . unwrap ( ) ;
1470+
1471+ let mut reader: SplitStreamReader < Sha256HashValue > = SplitStreamReader :: new (
1472+ repo. open_object ( & object_id) . unwrap ( ) . into ( ) ,
1473+ Some ( TAR_LAYER_CONTENT_TYPE ) ,
1474+ )
1475+ . unwrap ( ) ;
1476+
1477+ let mut reassembled = Vec :: new ( ) ;
1478+ reader. cat ( & repo, & mut reassembled) . unwrap ( ) ;
1479+ assert_eq ! (
1480+ reassembled. len( ) ,
1481+ tar_data. len( ) ,
1482+ "reassembled tar length mismatch"
1483+ ) ;
1484+ assert_eq ! (
1485+ reassembled, tar_data,
1486+ "reassembled tar bytes differ from original"
1487+ ) ;
1488+ } ) ;
1489+ }
1490+
13901491 /// Property-based tests for tar path handling.
13911492 mod proptest_tests {
13921493 use super :: * ;
@@ -1527,38 +1628,6 @@ mod tests {
15271628 tar_data
15281629 }
15291630
1530- /// Byte-exact roundtrip: original tar -> split_async -> splitstream -> cat()
1531- /// -> assert bytes match. Catches any corruption in either the inline or
1532- /// external code paths, including missing padding or off-by-one errors.
1533- fn roundtrip_tar_bytes ( tar_data : & [ u8 ] ) {
1534- let rt = tokio:: runtime:: Runtime :: new ( ) . unwrap ( ) ;
1535- rt. block_on ( async {
1536- let repo = create_test_repository ( ) . unwrap ( ) ;
1537- let ( object_id, _stats) =
1538- split_async ( tar_data, repo. clone ( ) , TAR_LAYER_CONTENT_TYPE )
1539- . await
1540- . unwrap ( ) ;
1541-
1542- let mut reader: SplitStreamReader < Sha256HashValue > = SplitStreamReader :: new (
1543- repo. open_object ( & object_id) . unwrap ( ) . into ( ) ,
1544- Some ( TAR_LAYER_CONTENT_TYPE ) ,
1545- )
1546- . unwrap ( ) ;
1547-
1548- let mut reassembled = Vec :: new ( ) ;
1549- reader. cat ( & repo, & mut reassembled) . unwrap ( ) ;
1550- assert_eq ! (
1551- reassembled. len( ) ,
1552- tar_data. len( ) ,
1553- "reassembled tar length mismatch"
1554- ) ;
1555- assert_eq ! (
1556- reassembled, tar_data,
1557- "reassembled tar bytes differ from original"
1558- ) ;
1559- } ) ;
1560- }
1561-
15621631 proptest ! {
15631632 #![ proptest_config( ProptestConfig :: with_cases( 64 ) ) ]
15641633
0 commit comments