Skip to content

Commit 2bd0f58

Browse files
def-claudeantiguru
authored
repr: Harden proto/Row decoding against malformed input (#36984)
Hardens `mz-repr` proto/Row decoding against malformed/untrusted bytes — these paths are reachable from persisted state and the wire, so a panic is an availability bug. Replaces panics/asserts with proper decode errors and validates ranges (Date, CheckedTimestamp, ProtoNumeric, ProtoRange, ProtoRelationDesc, ProtoRow dict ordering, uuid parsing, leap-second truncation, Avro decimal). Found by the cargo-fuzz suite ([separate infra PR](#36982)). Each fix has a regression test. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com> Co-authored-by: Moritz Hoffmann <antiguru@gmail.com>
1 parent 042229b commit 2bd0f58

14 files changed

Lines changed: 235 additions & 48 deletions

File tree

src/adapter/src/error.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,8 @@ fn eval_error_code(err: &EvalError) -> SqlState {
426426
| InvalidRangeError::InvalidRangeBoundFlags
427427
| InvalidRangeError::NullRangeBoundFlags
428428
| InvalidRangeError::DiscontiguousUnion
429-
| InvalidRangeError::DiscontiguousDifference => SqlState::DATA_EXCEPTION,
429+
| InvalidRangeError::DiscontiguousDifference
430+
| InvalidRangeError::InvalidRangeData => SqlState::DATA_EXCEPTION,
430431
},
431432

432433
// Cardinality violations from scalar subqueries.

src/pgrepr/src/value.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ impl Value {
725725
Type::TimeTz { .. } => return Err("input of timetz types is not implemented".into()),
726726
Type::Timestamp { .. } => Value::Timestamp(strconv::parse_timestamp(s)?),
727727
Type::TimestampTz { .. } => Value::TimestampTz(strconv::parse_timestamptz(s)?),
728-
Type::Uuid => Value::Uuid(Uuid::parse_str(s)?),
728+
Type::Uuid => Value::Uuid(strconv::parse_uuid(s)?),
729729
Type::MzTimestamp => Value::MzTimestamp(strconv::parse_mz_timestamp(s)?),
730730
Type::Range { element_type } => Value::Range(strconv::parse_range(s, |elem_text| {
731731
Value::decode_text(element_type, elem_text.as_bytes()).map(Box::new)
@@ -831,7 +831,7 @@ impl Value {
831831
Type::TimestampTz { .. } => {
832832
packer.push(Datum::TimestampTz(strconv::parse_timestamptz(s)?))
833833
}
834-
Type::Uuid => packer.push(Datum::Uuid(Uuid::parse_str(s)?)),
834+
Type::Uuid => packer.push(Datum::Uuid(strconv::parse_uuid(s)?)),
835835
Type::MzTimestamp => packer.push(Datum::MzTimestamp(strconv::parse_mz_timestamp(s)?)),
836836
Type::Range { element_type } => {
837837
let range = strconv::parse_range(s, |elem_text| {

src/repr/src/adt/date.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,11 @@ impl RustType<ProtoDate> for Date {
5454
}
5555

5656
fn from_proto(proto: ProtoDate) -> Result<Self, TryFromProtoError> {
57-
Ok(Date { days: proto.days })
57+
// Go through `from_pg_epoch` so out-of-range days are rejected here.
58+
// Pushing them into a `Row` succeeds, but `read_datum` would later
59+
// panic when reconstructing the date.
60+
Date::from_pg_epoch(proto.days)
61+
.map_err(|err| TryFromProtoError::InvalidFieldError(err.to_string()))
5862
}
5963
}
6064

src/repr/src/adt/numeric.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,14 @@ pub fn twos_complement_be_to_numeric(
399399
pub fn twos_complement_be_to_numeric_inner<D: Dec<N>, const N: usize>(
400400
input: &mut [u8],
401401
) -> Result<Decimal<N>, anyhow::Error> {
402+
if input.is_empty() {
403+
// An empty byte string is not a valid two's-complement integer. Our own
404+
// encoder never emits one (zero is the single byte `0x00`), so this only
405+
// arises from untrusted input. One example is an Avro `decimal` field
406+
// whose unscaled value was encoded as zero-length `bytes`. Reject it
407+
// rather than indexing `input[0]` below and panicking.
408+
bail!("cannot parse a numeric value from an empty byte string");
409+
}
402410
let is_neg = if (input[0] & 0x80) != 0 {
403411
// byte-level negate all negative values, guaranteeing all bytes are
404412
// readable as unsigned.
@@ -469,6 +477,21 @@ fn test_twos_complement_roundtrip() {
469477
inner("-7.2e-35");
470478
}
471479

480+
#[mz_ore::test]
481+
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux`
482+
fn test_twos_complement_empty_is_error() {
483+
// An empty byte string is not a valid two's-complement integer and used to
484+
// panic with an out-of-bounds index. It only arrives from untrusted input
485+
// (e.g. an Avro `decimal` encoded as zero-length `bytes`), so it must be a
486+
// clean error, not a panic. Regression test for that fix.
487+
for scale in [0u8, 1, 38] {
488+
assert!(twos_complement_be_to_numeric(&mut [], scale).is_err());
489+
}
490+
assert!(
491+
twos_complement_be_to_numeric_inner::<Numeric, NUMERIC_DATUM_WIDTH_USIZE>(&mut []).is_err()
492+
);
493+
}
494+
472495
#[mz_ore::test]
473496
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux`
474497
fn test_twos_comp_numeric_primitive() {

src/repr/src/adt/range.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ message ProtoInvalidRangeError {
2121
google.protobuf.Empty discontiguous_union = 4;
2222
google.protobuf.Empty discontiguous_difference = 5;
2323
google.protobuf.Empty null_range_bound_flags = 6;
24+
google.protobuf.Empty invalid_range_data = 7;
2425
}
2526
}

src/repr/src/adt/range.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,10 @@ pub enum InvalidRangeError {
796796
DiscontiguousUnion,
797797
DiscontiguousDifference,
798798
NullRangeBoundFlags,
799+
/// The encoded range data is structurally invalid (e.g. a null bound,
800+
/// bounds of inconsistent types, or the wrong number of bounds). Only
801+
/// reachable by decoding untrusted/corrupted bytes.
802+
InvalidRangeData,
799803
}
800804

801805
impl Display for InvalidRangeError {
@@ -817,6 +821,7 @@ impl Display for InvalidRangeError {
817821
InvalidRangeError::NullRangeBoundFlags => {
818822
f.write_str("range constructor flags argument must not be null")
819823
}
824+
InvalidRangeError::InvalidRangeData => f.write_str("invalid range data"),
820825
}
821826
}
822827
}
@@ -847,6 +852,7 @@ impl RustType<ProtoInvalidRangeError> for InvalidRangeError {
847852
InvalidRangeError::DiscontiguousUnion => DiscontiguousUnion(()),
848853
InvalidRangeError::DiscontiguousDifference => DiscontiguousDifference(()),
849854
InvalidRangeError::NullRangeBoundFlags => NullRangeBoundFlags(()),
855+
InvalidRangeError::InvalidRangeData => InvalidRangeData(()),
850856
};
851857
ProtoInvalidRangeError { kind: Some(kind) }
852858
}
@@ -863,6 +869,7 @@ impl RustType<ProtoInvalidRangeError> for InvalidRangeError {
863869
DiscontiguousUnion(()) => InvalidRangeError::DiscontiguousUnion,
864870
DiscontiguousDifference(()) => InvalidRangeError::DiscontiguousDifference,
865871
NullRangeBoundFlags(()) => InvalidRangeError::NullRangeBoundFlags,
872+
InvalidRangeData(()) => InvalidRangeError::InvalidRangeData,
866873
}),
867874
None => Err(TryFromProtoError::missing_field(
868875
"`ProtoInvalidRangeError::kind`",

src/repr/src/adt/timestamp.rs

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -258,25 +258,22 @@ pub trait TimestampLike:
258258
}
259259

260260
fn truncate_microseconds(&self) -> Self {
261-
let time = NaiveTime::from_hms_micro_opt(
262-
self.hour(),
263-
self.minute(),
264-
self.second(),
265-
self.nanosecond() / 1_000,
266-
)
267-
.unwrap();
261+
// Use `with_nanosecond` rather than `from_hms_micro_opt`: the latter only
262+
// accepts a leap-second sub-second (>= 1s) when `sec == 59`, so a value
263+
// carrying chrono's leap representation at any other second (reachable
264+
// from a parsed `:60` literal) would be `None` and panic. `with_nanosecond`
265+
// accepts the whole [0, 2s) range, preserving the value.
266+
let time = NaiveTime::from_hms_opt(self.hour(), self.minute(), self.second())
267+
.and_then(|t| t.with_nanosecond((self.nanosecond() / 1_000) * 1_000))
268+
.expect("hour/minute/second/nanosecond came from a valid time");
268269

269270
Self::new(self.date(), time)
270271
}
271272

272273
fn truncate_milliseconds(&self) -> Self {
273-
let time = NaiveTime::from_hms_milli_opt(
274-
self.hour(),
275-
self.minute(),
276-
self.second(),
277-
self.nanosecond() / 1_000_000,
278-
)
279-
.unwrap();
274+
let time = NaiveTime::from_hms_opt(self.hour(), self.minute(), self.second())
275+
.and_then(|t| t.with_nanosecond((self.nanosecond() / 1_000_000) * 1_000_000))
276+
.expect("hour/minute/second/nanosecond came from a valid time");
280277

281278
Self::new(self.date(), time)
282279
}
@@ -901,9 +898,11 @@ impl RustType<ProtoNaiveDateTime> for CheckedTimestamp<NaiveDateTime> {
901898
}
902899

903900
fn from_proto(proto: ProtoNaiveDateTime) -> Result<Self, TryFromProtoError> {
904-
Ok(Self {
905-
t: NaiveDateTime::from_proto(proto)?,
906-
})
901+
// Go through `from_timestamplike` so out-of-range values are
902+
// rejected here. Pushing them into a `Row` succeeds, but
903+
// `read_datum` would panic when reconstructing the timestamp.
904+
CheckedTimestamp::from_timestamplike(NaiveDateTime::from_proto(proto)?)
905+
.map_err(|err| TryFromProtoError::InvalidFieldError(err.to_string()))
907906
}
908907
}
909908

@@ -913,9 +912,8 @@ impl RustType<ProtoNaiveDateTime> for CheckedTimestamp<DateTime<Utc>> {
913912
}
914913

915914
fn from_proto(proto: ProtoNaiveDateTime) -> Result<Self, TryFromProtoError> {
916-
Ok(Self {
917-
t: DateTime::<Utc>::from_proto(proto)?,
918-
})
915+
CheckedTimestamp::from_timestamplike(DateTime::<Utc>::from_proto(proto)?)
916+
.map_err(|err| TryFromProtoError::InvalidFieldError(err.to_string()))
919917
}
920918
}
921919

@@ -1117,6 +1115,27 @@ mod test {
11171115
assert_round_to_precision(high, 6, 8210266790400123457);
11181116
}
11191117

1118+
#[mz_ore::test]
1119+
fn test_round_to_precision_leap_second_off_minute() {
1120+
// Regression: parsing a `:60` literal can leave chrono's leap-second
1121+
// representation (sub-second >= 1s) on a second other than `:59` (after
1122+
// time-zone math). Rounding such a value, as the string->timestamp cast
1123+
// does, must not panic. `truncate_microseconds`/`truncate_milliseconds`
1124+
// previously rebuilt the time with `from_hms_{micro,milli}_opt`, whose
1125+
// leap sub-second range is only valid at `:59`, and unwrapped the `None`.
1126+
let leap = NaiveDate::from_ymd_opt(3, 3, 17)
1127+
.unwrap()
1128+
.and_hms_opt(12, 30, 56)
1129+
.unwrap()
1130+
.with_nanosecond(1_000_000_000)
1131+
.unwrap();
1132+
let ts = CheckedTimestamp::try_from(leap).unwrap();
1133+
for precision in [None, Some(0), Some(3), Some(6)] {
1134+
ts.round_to_precision(precision.map(TimestampPrecision))
1135+
.unwrap();
1136+
}
1137+
}
1138+
11201139
#[mz_ore::test]
11211140
fn test_precision_edge_cases() {
11221141
#[allow(clippy::disallowed_methods)] // not using enhanced panic handler in tests

src/repr/src/relation.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,16 @@ impl RustType<ProtoRelationDesc> for RelationDesc {
10341034
};
10351035
Box::new(itertools::repeat_n(val, proto.names.len()))
10361036
} else {
1037+
// Reject mismatched lengths explicitly rather than panicking via
1038+
// `zip_eq` below, since this branch is reachable from untrusted
1039+
// proto bytes.
1040+
if proto.names.len() != proto.metadata.len() {
1041+
return Err(TryFromProtoError::InvalidFieldError(format!(
1042+
"ProtoRelationDesc: names ({}) and metadata ({}) length mismatch",
1043+
proto.names.len(),
1044+
proto.metadata.len()
1045+
)));
1046+
}
10371047
Box::new(proto.metadata.into_iter())
10381048
};
10391049

src/repr/src/row.rs

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2808,17 +2808,23 @@ impl RowPacker<'_> {
28082808
let mut dataz = &self.row.data[datum_check..];
28092809
while !dataz.is_empty() {
28102810
let d = unsafe { read_datum(&mut dataz) };
2811-
assert!(d != Datum::Null, "cannot push Datum::Null into range");
2811+
// These checks only fail when decoding untrusted/corrupted bytes;
2812+
// valid callers always push consistent, non-null bounds. Return an
2813+
// error rather than asserting so a crafted proto doesn't panic.
2814+
if d == Datum::Null {
2815+
self.row.data.truncate(start);
2816+
return Err(InvalidRangeError::InvalidRangeData.into());
2817+
}
28122818

28132819
match seen {
28142820
None => seen = Some(d),
28152821
Some(seen) => {
28162822
let seen_kind = DatumKind::from(seen);
28172823
let d_kind = DatumKind::from(d);
2818-
assert!(
2819-
seen_kind == d_kind,
2820-
"range contains inconsistent data; expected {seen_kind:?} but got {d_kind:?}"
2821-
);
2824+
if seen_kind != d_kind {
2825+
self.row.data.truncate(start);
2826+
return Err(InvalidRangeError::InvalidRangeData.into());
2827+
}
28222828

28232829
if seen > d {
28242830
self.row.data.truncate(start);
@@ -2829,10 +2835,10 @@ impl RowPacker<'_> {
28292835
actual_datums += 1;
28302836
}
28312837

2832-
assert!(
2833-
actual_datums == expected_datums,
2834-
"finite values must each push exactly one value; expected {expected_datums} but got {actual_datums}"
2835-
);
2838+
if actual_datums != expected_datums {
2839+
self.row.data.truncate(start);
2840+
return Err(InvalidRangeError::InvalidRangeData.into());
2841+
}
28362842

28372843
Ok(())
28382844
}
@@ -3992,8 +3998,23 @@ mod tests {
39923998
r
39933999
}
39944000

4001+
// A finite bound whose closure pushes zero values violates the
4002+
// `push_range_with` caller contract and still panics. This is
4003+
// unreachable when decoding a `ProtoRow`: each decoded bound pushes
4004+
// exactly one datum (or fails), so only an in-process caller can hit it.
39954005
for panicking_case in [
39964006
vec![vec![Datum::Int32(1)], vec![]],
4007+
vec![vec![Datum::Int32(1), Datum::Int32(2)], vec![]],
4008+
] {
4009+
#[allow(clippy::disallowed_methods)] // not using enhanced panic handler in tests
4010+
let result = std::panic::catch_unwind(|| test_range_errors_inner(panicking_case));
4011+
assert_err!(result);
4012+
}
4013+
4014+
// Inconsistent bound counts, mismatched datum kinds, and Null bounds are
4015+
// all reachable from a crafted/corrupted `ProtoRow`, so they return an
4016+
// error instead of panicking.
4017+
for error_case in [
39974018
vec![
39984019
vec![Datum::Int32(1), Datum::Int32(2)],
39994020
vec![Datum::Int32(3)],
@@ -4002,14 +4023,14 @@ mod tests {
40024023
vec![Datum::Int32(1)],
40034024
vec![Datum::Int32(2), Datum::Int32(3)],
40044025
],
4005-
vec![vec![Datum::Int32(1), Datum::Int32(2)], vec![]],
40064026
vec![vec![Datum::Int32(1)], vec![Datum::UInt16(2)]],
40074027
vec![vec![Datum::Null], vec![Datum::Int32(2)]],
40084028
vec![vec![Datum::Int32(1)], vec![Datum::Null]],
40094029
] {
4010-
#[allow(clippy::disallowed_methods)] // not using enhanced panic handler in tests
4011-
let result = std::panic::catch_unwind(|| test_range_errors_inner(panicking_case));
4012-
assert_err!(result);
4030+
assert_eq!(
4031+
test_range_errors_inner(error_case),
4032+
Err(InvalidRangeError::InvalidRangeData)
4033+
);
40134034
}
40144035

40154036
let e = test_range_errors_inner(vec![vec![Datum::Int32(2)], vec![Datum::Int32(1)]]);

0 commit comments

Comments
 (0)