Skip to content

Commit d90b9ed

Browse files
MagicalTuxclaude
andcommitted
feat(zstd): validate Content_Checksum frames (XXH64)
The zstd decoder rejected any frame whose Frame_Header set Content_Checksum_Flag with Error::Unsupported, because no XXH64 was implemented. Since the zstd CLI writes a content checksum by default, default `zstd` output only decoded with `--no-check`. Add a streaming XXH64 (canonical, seed 0; verified against reference vectors and against checksums produced by the zstd CLI). The decoder now feeds every decompressed byte through it at each emit site (raw, RLE, and compressed blocks) and validates the 4-byte little-endian trailer — the low 32 bits of XXH64 over the decompressed content — at end of frame, reporting Error::ChecksumMismatch on a mismatch. Replaces the obsolete decode_rejects_checksum_flag test with decode_validates_correct_checksum / decode_rejects_bad_checksum, and adds XXH64 unit tests. Verified end-to-end against `zstd` CLI output across levels 1-19 (checksummed and --no-check). Our encoder still does not emit a content checksum. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 3f62896 commit d90b9ed

5 files changed

Lines changed: 278 additions & 27 deletions

File tree

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- *(zstd)* decode and validate frames carrying a `Content_Checksum` (the `zstd`
13+
CLI writes one by default). Previously any such frame was refused with
14+
`Unsupported`, so default `zstd` output only decoded with `--no-check`. Adds a
15+
streaming XXH64 implementation; the decompressed output is hashed and checked
16+
against the 4-byte frame trailer, reporting `ChecksumMismatch` on corruption.
17+
1018
### Fixed
1119

1220
- *(decoder bridge)* a decoder that buffers a whole block internally (notably

src/zstd/decoder.rs

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
//! `Compressed_Block` (Block_Type=2). See the module-level `mod.rs` docs for
55
//! a full list of supported literal / sequence sub-modes.
66
//!
7-
//! The decoder also refuses frames whose Frame_Header sets the
8-
//! `Content_Checksum_Flag` — we do not implement XXH64 in this crate, so we
9-
//! cannot validate the trailing 4-byte checksum.
7+
//! Frames whose Frame_Header sets the `Content_Checksum_Flag` are decoded and
8+
//! the trailing 4-byte XXH64 checksum is validated against the decompressed
9+
//! output (see [`crate::zstd`]).
1010
1111
use alloc::vec::Vec;
1212

1313
use crate::error::Error;
1414
use crate::traits::{RawDecoder, RawProgress};
1515
use crate::zstd::literals::{LiteralsState, decode_literals};
1616
use crate::zstd::sequences::{SequencesState, decode_sequences, execute_sequences};
17+
use crate::zstd::xxhash::Xxh64;
1718

1819
const MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
1920

@@ -52,9 +53,8 @@ enum DecPhase {
5253
/// Emitting the bytes decoded out of a Compressed_Block (held in
5354
/// `emit_buf`).
5455
CompressedEmit,
55-
/// Reading 4-byte Content_Checksum trailer (only entered if we somehow
56-
/// allowed a checksummed frame — currently we refuse such frames in
57-
/// `Fhd`).
56+
/// Reading and validating the 4-byte Content_Checksum trailer (entered
57+
/// after the last block when the Frame_Header set `Content_Checksum_Flag`).
5858
ContentChecksum,
5959
/// Frame fully consumed; subsequent input is ignored (we do not handle
6060
/// concatenated frames).
@@ -116,6 +116,11 @@ pub struct Decoder {
116116
/// Carry-over state for sequence FSE tables (Repeat_Mode) and the
117117
/// previous-offsets stack.
118118
seq_state: SequencesState,
119+
120+
/// Running XXH64 over the decompressed output, fed at every emit site.
121+
/// Only consulted (and the per-byte update only performed) when
122+
/// `has_content_checksum` is set; finalized against the frame trailer.
123+
content_hash: Xxh64,
119124
}
120125

121126
impl Decoder {
@@ -142,6 +147,7 @@ impl Decoder {
142147
history_emitted: 0,
143148
lit_state: LiteralsState::default(),
144149
seq_state: SequencesState::new(),
150+
content_hash: Xxh64::new(),
145151
}
146152
}
147153

@@ -182,12 +188,6 @@ impl Decoder {
182188
self.single_segment = ss_flag != 0;
183189
self.has_content_checksum = cchk_flag != 0;
184190

185-
// We don't implement XXH64 in this build, so checksummed frames are
186-
// unsupported (per task spec).
187-
if self.has_content_checksum {
188-
return Err(self.poison(Error::Unsupported));
189-
}
190-
191191
self.dict_id_field_size = match dict_id_flag {
192192
0 => 0,
193193
1 => 1,
@@ -490,6 +490,9 @@ impl RawDecoder for Decoder {
490490
});
491491
}
492492
output[written..written + n].copy_from_slice(&input[consumed..consumed + n]);
493+
if self.has_content_checksum {
494+
self.content_hash.update(&output[written..written + n]);
495+
}
493496
// Mirror into history so subsequent Compressed_Blocks can
494497
// back-reference these bytes.
495498
self.history
@@ -526,6 +529,9 @@ impl RawDecoder for Decoder {
526529
for slot in &mut output[written..written + n] {
527530
*slot = self.rle_byte;
528531
}
532+
if self.has_content_checksum {
533+
self.content_hash.update(&output[written..written + n]);
534+
}
529535
// Mirror into history.
530536
for _ in 0..n {
531537
self.history.push(self.rle_byte);
@@ -582,22 +588,31 @@ impl RawDecoder for Decoder {
582588
output[written..written + n].copy_from_slice(
583589
&self.history[self.history_emitted..self.history_emitted + n],
584590
);
591+
if self.has_content_checksum {
592+
self.content_hash.update(&output[written..written + n]);
593+
}
585594
self.history_emitted += n;
586595
written += n;
587596
if self.history_emitted == self.history.len() {
588597
self.advance_after_block();
589598
}
590599
}
591600
DecPhase::ContentChecksum => {
592-
// Currently unreachable — we reject checksummed frames
593-
// in `parse_fhd`. Kept as a state for future XXH64 work.
601+
// The 4-byte trailer is the low 32 bits of XXH64 over the
602+
// decompressed content (little-endian). Validate it against
603+
// the running hash we fed at every emit site.
594604
if !self.fill_scratch(input, &mut consumed) {
595605
return Ok(RawProgress {
596606
consumed,
597607
written,
598608
done: false,
599609
});
600610
}
611+
let expected = u32::from_le_bytes(self.scratch[..4].try_into().unwrap());
612+
let actual = self.content_hash.digest() as u32;
613+
if expected != actual {
614+
return Err(self.poison(Error::ChecksumMismatch));
615+
}
601616
self.phase = DecPhase::Done;
602617
}
603618
DecPhase::Done => {

src/zstd/mod.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,13 @@
5050
//! RLE_Mode for sequence FSE tables, multi-frame output, content checksum,
5151
//! or dictionaries.
5252
//!
53-
//! # What does NOT work
53+
//! # Content checksum
5454
//!
55-
//! - **Content_Checksum_Flag** in the Frame_Header. The 4-byte trailer is the
56-
//! low 32 bits of XXH64 over the decompressed data; we do not ship an
57-
//! XXH64 implementation, so any frame that advertises a content checksum
58-
//! is refused with [`crate::Error::Unsupported`].
55+
//! Frames with `Content_Checksum_Flag` set (the `zstd` CLI writes one by
56+
//! default) are decoded and the 4-byte trailer — the low 32 bits of XXH64 over
57+
//! the decompressed data — is validated; a mismatch is reported as
58+
//! [`crate::Error::ChecksumMismatch`]. Our encoder does not yet emit a content
59+
//! checksum.
5960
//!
6061
//! - **Skippable_Frame** magic numbers (`0x184D2A50..=0x184D2A5F`) are
6162
//! detected and rejected as unsupported rather than silently skipped.
@@ -80,6 +81,7 @@ mod huffman;
8081
mod literals;
8182
mod matcher;
8283
mod sequences;
84+
mod xxhash;
8385

8486
pub use decoder::Decoder;
8587
pub use encoder::{Encoder, EncoderConfig};

src/zstd/xxhash.rs

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
//! Streaming XXH64, the hash zstd uses for the optional frame
2+
//! `Content_Checksum`.
3+
//!
4+
//! A zstd frame whose `Content_Checksum_Flag` is set (the `zstd` CLI writes one
5+
//! by default) appends the low 32 bits of `XXH64(decompressed_content, seed=0)`,
6+
//! little-endian, after the last block. The decoder feeds every decompressed
7+
//! byte through [`Xxh64::update`] and compares [`Xxh64::digest`] against that
8+
//! trailer.
9+
//!
10+
//! This is the canonical XXH64 (Yann Collet) with seed 0; verified against the
11+
//! reference test vectors below and, end-to-end, against checksums produced by
12+
//! the `zstd` CLI.
13+
14+
const PRIME64_1: u64 = 0x9E37_79B1_85EB_CA87;
15+
const PRIME64_2: u64 = 0xC2B2_AE3D_27D4_EB4F;
16+
const PRIME64_3: u64 = 0x1656_67B1_9E37_79F9;
17+
const PRIME64_4: u64 = 0x85EB_CA77_C2B2_AE63;
18+
const PRIME64_5: u64 = 0x27D4_EB2F_1656_67C5;
19+
20+
/// Running XXH64 state (seed fixed at 0, which is all zstd needs).
21+
#[derive(Clone)]
22+
pub(crate) struct Xxh64 {
23+
/// Four parallel accumulators, used once `total_len >= 32`.
24+
acc: [u64; 4],
25+
/// Total bytes consumed across all `update` calls.
26+
total_len: u64,
27+
/// Partial stripe carried between `update` calls (`0..32` valid bytes).
28+
buf: [u8; 32],
29+
buf_len: usize,
30+
}
31+
32+
impl Xxh64 {
33+
pub(crate) fn new() -> Self {
34+
Self {
35+
acc: [
36+
PRIME64_1.wrapping_add(PRIME64_2),
37+
PRIME64_2,
38+
0,
39+
0u64.wrapping_sub(PRIME64_1),
40+
],
41+
total_len: 0,
42+
buf: [0u8; 32],
43+
buf_len: 0,
44+
}
45+
}
46+
47+
#[inline]
48+
fn round(acc: u64, lane: u64) -> u64 {
49+
acc.wrapping_add(lane.wrapping_mul(PRIME64_2))
50+
.rotate_left(31)
51+
.wrapping_mul(PRIME64_1)
52+
}
53+
54+
#[inline]
55+
fn merge_round(acc: u64, lane: u64) -> u64 {
56+
let acc = acc ^ Self::round(0, lane);
57+
acc.wrapping_mul(PRIME64_1).wrapping_add(PRIME64_4)
58+
}
59+
60+
#[inline]
61+
fn read_u64(b: &[u8]) -> u64 {
62+
u64::from_le_bytes(b[..8].try_into().unwrap())
63+
}
64+
65+
/// Consume one full 32-byte stripe into the four accumulators.
66+
#[inline]
67+
fn process_stripe(acc: &mut [u64; 4], stripe: &[u8]) {
68+
acc[0] = Self::round(acc[0], Self::read_u64(&stripe[0..8]));
69+
acc[1] = Self::round(acc[1], Self::read_u64(&stripe[8..16]));
70+
acc[2] = Self::round(acc[2], Self::read_u64(&stripe[16..24]));
71+
acc[3] = Self::round(acc[3], Self::read_u64(&stripe[24..32]));
72+
}
73+
74+
/// Feed `data` into the running hash.
75+
pub(crate) fn update(&mut self, mut data: &[u8]) {
76+
self.total_len = self.total_len.wrapping_add(data.len() as u64);
77+
78+
// Top off a partially filled stripe first.
79+
if self.buf_len > 0 {
80+
let need = 32 - self.buf_len;
81+
if data.len() < need {
82+
self.buf[self.buf_len..self.buf_len + data.len()].copy_from_slice(data);
83+
self.buf_len += data.len();
84+
return;
85+
}
86+
let (head, rest) = data.split_at(need);
87+
self.buf[self.buf_len..].copy_from_slice(head);
88+
let buf = self.buf;
89+
Self::process_stripe(&mut self.acc, &buf);
90+
self.buf_len = 0;
91+
data = rest;
92+
}
93+
94+
// Bulk stripes straight from the input.
95+
let mut chunks = data.chunks_exact(32);
96+
for stripe in &mut chunks {
97+
Self::process_stripe(&mut self.acc, stripe);
98+
}
99+
100+
// Carry the trailing partial stripe.
101+
let rem = chunks.remainder();
102+
if !rem.is_empty() {
103+
self.buf[..rem.len()].copy_from_slice(rem);
104+
self.buf_len = rem.len();
105+
}
106+
}
107+
108+
/// Finalize without disturbing the running state, returning the full 64-bit
109+
/// digest. zstd compares the low 32 bits.
110+
pub(crate) fn digest(&self) -> u64 {
111+
let mut h = if self.total_len >= 32 {
112+
let mut h = self.acc[0]
113+
.rotate_left(1)
114+
.wrapping_add(self.acc[1].rotate_left(7))
115+
.wrapping_add(self.acc[2].rotate_left(12))
116+
.wrapping_add(self.acc[3].rotate_left(18));
117+
h = Self::merge_round(h, self.acc[0]);
118+
h = Self::merge_round(h, self.acc[1]);
119+
h = Self::merge_round(h, self.acc[2]);
120+
h = Self::merge_round(h, self.acc[3]);
121+
h
122+
} else {
123+
// Short input: only the seed-derived constant participates.
124+
PRIME64_5
125+
};
126+
127+
h = h.wrapping_add(self.total_len);
128+
129+
// Consume the leftover (< 32) bytes: 8 at a time, then 4, then 1.
130+
let mut p = &self.buf[..self.buf_len];
131+
while p.len() >= 8 {
132+
let k1 = Self::round(0, Self::read_u64(p));
133+
h = (h ^ k1)
134+
.rotate_left(27)
135+
.wrapping_mul(PRIME64_1)
136+
.wrapping_add(PRIME64_4);
137+
p = &p[8..];
138+
}
139+
if p.len() >= 4 {
140+
let k = u32::from_le_bytes(p[..4].try_into().unwrap()) as u64;
141+
h = (h ^ k.wrapping_mul(PRIME64_1))
142+
.rotate_left(23)
143+
.wrapping_mul(PRIME64_2)
144+
.wrapping_add(PRIME64_3);
145+
p = &p[4..];
146+
}
147+
for &b in p {
148+
h = (h ^ (b as u64).wrapping_mul(PRIME64_5))
149+
.rotate_left(11)
150+
.wrapping_mul(PRIME64_1);
151+
}
152+
153+
// Final avalanche.
154+
h ^= h >> 33;
155+
h = h.wrapping_mul(PRIME64_2);
156+
h ^= h >> 29;
157+
h = h.wrapping_mul(PRIME64_3);
158+
h ^= h >> 32;
159+
h
160+
}
161+
}
162+
163+
#[cfg(test)]
164+
mod tests {
165+
use super::*;
166+
167+
fn xxh64(data: &[u8]) -> u64 {
168+
let mut h = Xxh64::new();
169+
h.update(data);
170+
h.digest()
171+
}
172+
173+
#[test]
174+
fn reference_vectors() {
175+
// Canonical XXH64 vectors (seed 0) from the reference implementation.
176+
assert_eq!(xxh64(b""), 0xEF46_DB37_51D8_E999);
177+
assert_eq!(xxh64(b"a"), 0xD24E_C4F1_A98C_6E5B);
178+
assert_eq!(xxh64(b"abc"), 0x44BC_2CF5_AD77_0999);
179+
// 64 bytes ⇒ exercises the multi-stripe accumulator path.
180+
let long: alloc::vec::Vec<u8> = (0..64u8).collect();
181+
assert_eq!(xxh64(&long), 0xF7C6_7301_DB67_13F0);
182+
}
183+
184+
#[test]
185+
fn streaming_matches_one_shot() {
186+
let data: alloc::vec::Vec<u8> = (0..250u32).map(|i| (i.wrapping_mul(37)) as u8).collect();
187+
let one = xxh64(&data);
188+
// Feed in awkward chunk sizes that straddle stripe boundaries.
189+
for chunk in [1usize, 3, 7, 8, 16, 31, 32, 33] {
190+
let mut h = Xxh64::new();
191+
for part in data.chunks(chunk) {
192+
h.update(part);
193+
}
194+
assert_eq!(h.digest(), one, "chunk size {chunk}");
195+
}
196+
}
197+
}

0 commit comments

Comments
 (0)