From 18547282536393a9145b37a1f746875bd30e87cc Mon Sep 17 00:00:00 2001 From: Dennis Felsing Date: Thu, 18 Jun 2026 09:25:32 +0000 Subject: [PATCH] repr: fix panic decoding an empty Avro decimal value `twos_complement_be_to_numeric_inner` read `input[0]` to determine the sign before checking that the input was non-empty, so a zero-length two's-complement byte run panicked with an out-of-bounds index. This is reachable from Avro source ingestion: a `decimal` logical type backed by `bytes` (or `fixed`) whose value is encoded as a zero-length byte array is a valid wire form, and that data arrives from external, possibly hostile, schema registries and producers. A conformant producer encodes zero as `[0x00]`, but a non-conformant one can send empty bytes, crashing the decoder and so the source. An empty big-endian two's-complement run canonically denotes 0. Guarding the sign check with `!input.is_empty()` makes the existing logic produce 0 for that case (`head == 0`, no chunks), with no other change needed. The only caller is the Avro decimal decode path. Found by the avro_decode_fuzzed_schema cargo-fuzz target. Tests: adds `test_twos_complement_empty_is_zero`, covering the wrapper at several scales and the generic inner at both numeric widths. Release note: Fix a panic in Avro-formatted sources when decoding a `decimal` column whose value was encoded as a zero-length byte array. Co-Authored-By: Claude Opus 4.8 --- src/repr/src/adt/numeric.rs | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/repr/src/adt/numeric.rs b/src/repr/src/adt/numeric.rs index 52b601cfaa193..5ca84efda41d0 100644 --- a/src/repr/src/adt/numeric.rs +++ b/src/repr/src/adt/numeric.rs @@ -399,7 +399,12 @@ pub fn twos_complement_be_to_numeric( pub fn twos_complement_be_to_numeric_inner, const N: usize>( input: &mut [u8], ) -> Result, anyhow::Error> { - let is_neg = if (input[0] & 0x80) != 0 { + // An empty big-endian two's-complement run encodes the value 0 (Avro + // permits a zero-length `bytes`/`fixed` for a `decimal`, and such data + // arrives from external schema registries). Treat it as non-negative; the + // logic below then naturally yields 0 (`head == 0`, no chunks). Without this + // guard the `input[0]` sign check panics with an out-of-bounds index. + let is_neg = if !input.is_empty() && (input[0] & 0x80) != 0 { // byte-level negate all negative values, guaranteeing all bytes are // readable as unsigned. negate_twos_complement_le(input.iter_mut().rev()); @@ -469,6 +474,32 @@ fn test_twos_complement_roundtrip() { inner("-7.2e-35"); } +#[mz_ore::test] +#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux` +fn test_twos_complement_empty_is_zero() { + // A zero-length two's-complement run encodes 0. This arrives from external + // Avro producers (an empty `bytes`/`fixed` decimal), so it must not panic on + // the `input[0]` sign check. Regression for the avro_decode_fuzzed_schema + // finding (numeric.rs out-of-bounds index on empty decimal input). + for scale in [0u8, 5, 38] { + let n = twos_complement_be_to_numeric(&mut [], scale) + .expect("empty two's-complement run decodes"); + assert!( + n.is_zero(), + "empty run at scale {scale} should be zero, got {n}" + ); + } + // Same through the generic inner at both widths used by the wrapper. + let zero_datum = + twos_complement_be_to_numeric_inner::(&mut []) + .expect("empty run decodes at datum width"); + assert!(zero_datum.is_zero()); + let zero_agg = + twos_complement_be_to_numeric_inner::(&mut []) + .expect("empty run decodes at agg width"); + assert!(zero_agg.is_zero()); +} + #[mz_ore::test] #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux` fn test_twos_comp_numeric_primitive() {