fix: audit parser hardness — bounded sections, x509 depth, DSSE fuzz, mime-type (#98)

avrabe · claude · web-flow · commit ae3a0a0bcc2b · 2026-04-30T00:14:38.000-05:00
Closes 4 findings from the 2026-04-30 audit:
  H-1 — bound WASM SectionsIterator at MAX_SECTIONS=4096
  H-2 — bound x509 chain depth at MAX_CHAIN_DEPTH=8
  H-6 — PAYLOAD_TYPE_SLSA → application/vnd.slsa.provenance+json
  H-7 — add fuzz_dsse_envelope target with round-trip oracle

Fixes: H-1, H-2, H-6, H-7
Verifies: CR-8 (bounded resource consumption)

Co-authored-by: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -94,3 +94,10 @@ path = "fuzz_targets/fuzz_format_detection.rs"
 test = false
 doc = false
 bench = false
+
+[[bin]]
+name = "fuzz_dsse_envelope"
+path = "fuzz_targets/fuzz_dsse_envelope.rs"
+test = false
+doc = false
+bench = false
diff --git a/fuzz/fuzz_targets/fuzz_dsse_envelope.rs b/fuzz/fuzz_targets/fuzz_dsse_envelope.rs
@@ -0,0 +1,55 @@
+//! Fuzz target for DSSE envelope JSON parsing.
+//!
+//! `wsc::dsse::DsseEnvelope` is a central attestation parser: it accepts
+//! untrusted JSON whose `signatures` field is an unbounded `Vec<DsseSignature>`,
+//! and the envelope is consumed by every downstream verifier.
+//!
+//! Security concerns this target exercises:
+//! - JSON denial-of-service (deeply nested structures, oversize signatures).
+//! - serde_json error handling on malformed input.
+//! - Round-trip stability: parse → serialize → parse must yield equal
+//!   structural data, otherwise an attacker may craft an envelope whose
+//!   re-serialized form differs from the bytes that were actually verified.
+//!
+//! Oracle: not just "doesn't crash" — also a structural round-trip equality
+//! check on any successfully parsed envelope.
+
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use wsc::dsse::DsseEnvelope;
+
+fuzz_target!(|data: &[u8]| {
+    // Treat input as candidate UTF-8 JSON. Skip non-UTF-8 inputs early so
+    // the deserializer is not asked to do work on bytes that can never be
+    // valid JSON (serde_json would reject them anyway, but this keeps
+    // corpus mutations focused on JSON-shaped inputs).
+    let s = match std::str::from_utf8(data) {
+        Ok(s) => s,
+        Err(_) => return,
+    };
+
+    let envelope = match DsseEnvelope::from_json(s) {
+        Ok(e) => e,
+        Err(_) => return,
+    };
+
+    // Round-trip oracle: serialize back to JSON, parse again, and assert
+    // that the two parsed envelopes are structurally identical. A divergence
+    // here would indicate a serde quirk an attacker could exploit (e.g. a
+    // field that survives the first parse but is dropped on the second).
+    let json = envelope
+        .to_json()
+        .expect("serialization of a successfully parsed envelope must succeed");
+
+    let envelope2 = DsseEnvelope::from_json(&json)
+        .expect("re-parse of self-serialized envelope must succeed");
+
+    assert_eq!(envelope.payload, envelope2.payload);
+    assert_eq!(envelope.payload_type, envelope2.payload_type);
+    assert_eq!(envelope.signatures.len(), envelope2.signatures.len());
+    for (a, b) in envelope.signatures.iter().zip(envelope2.signatures.iter()) {
+        assert_eq!(a.keyid, b.keyid);
+        assert_eq!(a.sig, b.sig);
+    }
+});
diff --git a/src/attestation/src/dsse.rs b/src/attestation/src/dsse.rs
@@ -26,7 +26,7 @@ use serde::{Deserialize, Serialize};
 pub const PAYLOAD_TYPE_INTOTO: &str = "application/vnd.in-toto+json";
 
 /// DSSE payload type for SLSA provenance
-pub const PAYLOAD_TYPE_SLSA: &str = "application/vnd.in-toto+json";
+pub const PAYLOAD_TYPE_SLSA: &str = "application/vnd.slsa.provenance+json";
 
 /// Dead Simple Signing Envelope
 ///
diff --git a/src/lib/src/error.rs b/src/lib/src/error.rs
@@ -64,6 +64,12 @@ pub enum WSError {
     #[error("Too many certificates (max: {0})")]
     TooManyCertificates(usize),
 
+    #[error("Too many sections (max: {0})")]
+    TooManySections(usize),
+
+    #[error("Certificate chain too deep (max: {0})")]
+    ChainTooDeep(usize),
+
     #[error("Usage error: {0}")]
     UsageError(&'static str),
 
diff --git a/src/lib/src/signature/keyless/format.rs b/src/lib/src/signature/keyless/format.rs
@@ -18,6 +18,14 @@ pub const KEYLESS_SIG_TYPE: u8 = 0x02;
 /// Standard signature type identifier
 pub const STANDARD_SIG_TYPE: u8 = 0x01;
 
+/// Maximum accepted depth of an embedded X.509 certificate chain.
+///
+/// Real-world Fulcio chains are length 2–3 (leaf + intermediate(s) + root).
+/// Industry CAs ship at most 4–5. We cap at 8 — generous headroom while
+/// rejecting adversarial 1000-cert chains that would trigger heap exhaustion
+/// in `x509_parser` / WebPKI before any signature work begins.
+pub const MAX_CHAIN_DEPTH: usize = 8;
+
 /// Keyless signature custom section format
 ///
 /// Binary format (extends existing wasmsig format):
@@ -367,6 +375,13 @@ impl KeylessSignature {
             ));
         }
 
+        // SECURITY: bound chain depth before invoking x509_parser/WebPKI.
+        // An adversarial 1000-cert chain would otherwise trigger heap
+        // exhaustion during PEM/DER decoding.
+        if self.cert_chain.len() > MAX_CHAIN_DEPTH {
+            return Err(WSError::ChainTooDeep(MAX_CHAIN_DEPTH));
+        }
+
         // Load Fulcio trusted roots
         let cert_pool = CertificatePool::from_embedded_trust_root().map_err(|e| {
             WSError::CertificateError(format!("Failed to load trusted roots: {}", e))
@@ -695,6 +710,50 @@ mod tests {
         assert_eq!(deserialized.signature, sig.signature);
     }
 
+    #[test]
+    fn test_verify_cert_chain_rejects_too_deep() {
+        // A 100-cert synthetic chain must be rejected before any x509 parsing.
+        // This exercises the MAX_CHAIN_DEPTH guard in verify_cert_chain.
+        let mut sig = create_test_signature();
+        sig.cert_chain = (0..100)
+            .map(|i| {
+                format!(
+                    "-----BEGIN CERTIFICATE-----\nfake-cert-{}\n-----END CERTIFICATE-----",
+                    i
+                )
+            })
+            .collect();
+
+        let result = sig.verify_cert_chain();
+        match result {
+            Err(WSError::ChainTooDeep(max)) => assert_eq!(max, MAX_CHAIN_DEPTH),
+            Err(other) => panic!("expected ChainTooDeep, got {:?}", other),
+            Ok(_) => panic!("expected ChainTooDeep, got Ok"),
+        }
+    }
+
+    #[test]
+    fn test_verify_cert_chain_at_max_depth_proceeds_to_parser() {
+        // A chain of MAX_CHAIN_DEPTH bogus PEMs must NOT be rejected by the
+        // depth check; it should fall through to PEM/X.509 parsing and fail
+        // there. This proves the bound is at MAX_CHAIN_DEPTH+1, not below.
+        let mut sig = create_test_signature();
+        sig.cert_chain = (0..MAX_CHAIN_DEPTH)
+            .map(|i| {
+                format!(
+                    "-----BEGIN CERTIFICATE-----\nfake-cert-{}\n-----END CERTIFICATE-----",
+                    i
+                )
+            })
+            .collect();
+
+        let result = sig.verify_cert_chain();
+        // Must not be rejected by depth guard
+        assert!(!matches!(result, Err(WSError::ChainTooDeep(_))));
+        // But it must still fail (these aren't real Fulcio certs)
+        assert!(result.is_err());
+    }
+
     #[test]
     fn test_large_module_hash() {
         let mut sig = create_test_signature();
diff --git a/src/lib/src/wasm_module/mod.rs b/src/lib/src/wasm_module/mod.rs
@@ -24,6 +24,13 @@ const WASM_HEADER: [u8; 8] = [0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00];
 const WASM_COMPONENT_HEADER: [u8; 8] = [0x00, 0x61, 0x73, 0x6d, 0x0d, 0x00, 0x01, 0x00];
 pub type Header = [u8; 8];
 
+/// Maximum number of sections accepted by `SectionsIterator` before the parser
+/// aborts with `WSError::TooManySections`. 4096 is generous for any legitimate
+/// module (the wasm-tools spec recommends ~100 typical sections; the Component
+/// Model adds a handful more) while bounding worst-case work for adversarial
+/// inputs that declare millions of empty sections.
+pub const MAX_SECTIONS: usize = 4096;
+
 /// A section identifier.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 #[repr(u8)]
@@ -452,14 +459,17 @@ impl Module {
         Ok(ModuleStreamReader { reader, header })
     }
 
-    /// Return an iterator over the sections of a WebAssembly module.    
+    /// Return an iterator over the sections of a WebAssembly module.
     ///
     /// The module is read in a streaming fashion, and doesn't have to be fully loaded into memory.
+    /// The iterator caps total emitted sections at [`MAX_SECTIONS`] to prevent
+    /// adversarial modules from causing unbounded work.
     pub fn iterate<T: Read>(
         module_stream: ModuleStreamReader<T>,
     ) -> Result<SectionsIterator<T>, WSError> {
         Ok(SectionsIterator {
             reader: module_stream.reader,
+            count: 0,
         })
     }
 }
@@ -470,18 +480,31 @@ pub struct ModuleStreamReader<'t, T: Read> {
 }
 
 /// An iterator over the sections of a WebAssembly module.
+///
+/// Yields at most [`MAX_SECTIONS`] sections; the next call after the cap is
+/// reached returns `Some(Err(WSError::TooManySections(MAX_SECTIONS)))` and the
+/// iterator subsequently terminates.
 pub struct SectionsIterator<'t, T: Read> {
     reader: &'t mut T,
+    count: usize,
 }
 
 impl<'t, T: Read> Iterator for SectionsIterator<'t, T> {
     type Item = Result<Section, WSError>;
 
     fn next(&mut self) -> Option<Self::Item> {
+        if self.count >= MAX_SECTIONS {
+            // Bound iteration so a malformed module declaring millions of
+            // empty sections cannot loop the parser indefinitely.
+            return Some(Err(WSError::TooManySections(MAX_SECTIONS)));
+        }
         match Section::deserialize(self.reader) {
             Err(e) => Some(Err(e)),
             Ok(None) => None,
-            Ok(Some(section)) => Some(Ok(section)),
+            Ok(Some(section)) => {
+                self.count += 1;
+                Some(Ok(section))
+            }
         }
     }
 }
@@ -965,6 +988,39 @@ mod tests {
             "tampered component must fail verification"
         );
     }
+
+    #[test]
+    fn test_sections_iterator_max_sections_cap() {
+        // Construct a WASM module: header + (MAX_SECTIONS + 1) empty Type sections.
+        // Each empty section is two bytes: id=1 (Type), len=0.
+        // The iterator must reject once it has yielded MAX_SECTIONS sections.
+        let mut bytes = Vec::with_capacity(8 + 2 * (MAX_SECTIONS + 1));
+        bytes.extend_from_slice(&WASM_HEADER);
+        for _ in 0..(MAX_SECTIONS + 1) {
+            bytes.push(0x01); // SectionId::Type
+            bytes.push(0x00); // payload length 0
+        }
+
+        let mut reader = io::Cursor::new(&bytes);
+        let stream = Module::init_from_reader(&mut reader).expect("header parses");
+        let it = Module::iterate(stream).expect("iterator constructs");
+
+        let mut seen = 0usize;
+        let mut hit_cap = false;
+        for item in it {
+            match item {
+                Ok(_) => seen += 1,
+                Err(WSError::TooManySections(max)) => {
+                    assert_eq!(max, MAX_SECTIONS);
+                    hit_cap = true;
+                    break;
+                }
+                Err(e) => panic!("unexpected error before cap: {:?}", e),
+            }
+        }
+        assert_eq!(seen, MAX_SECTIONS, "should yield exactly MAX_SECTIONS first");
+        assert!(hit_cap, "iterator must error with TooManySections after the cap");
+    }
 }
 
 // ============================================================================