Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/adapter/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,8 @@ fn eval_error_code(err: &EvalError) -> SqlState {
| InvalidRangeError::InvalidRangeBoundFlags
| InvalidRangeError::NullRangeBoundFlags
| InvalidRangeError::DiscontiguousUnion
| InvalidRangeError::DiscontiguousDifference => SqlState::DATA_EXCEPTION,
| InvalidRangeError::DiscontiguousDifference
| InvalidRangeError::InvalidRangeData => SqlState::DATA_EXCEPTION,
},

// Cardinality violations from scalar subqueries.
Expand Down
4 changes: 2 additions & 2 deletions src/pgrepr/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ impl Value {
Type::TimeTz { .. } => return Err("input of timetz types is not implemented".into()),
Type::Timestamp { .. } => Value::Timestamp(strconv::parse_timestamp(s)?),
Type::TimestampTz { .. } => Value::TimestampTz(strconv::parse_timestamptz(s)?),
Type::Uuid => Value::Uuid(Uuid::parse_str(s)?),
Type::Uuid => Value::Uuid(strconv::parse_uuid(s)?),
Type::MzTimestamp => Value::MzTimestamp(strconv::parse_mz_timestamp(s)?),
Type::Range { element_type } => Value::Range(strconv::parse_range(s, |elem_text| {
Value::decode_text(element_type, elem_text.as_bytes()).map(Box::new)
Expand Down Expand Up @@ -831,7 +831,7 @@ impl Value {
Type::TimestampTz { .. } => {
packer.push(Datum::TimestampTz(strconv::parse_timestamptz(s)?))
}
Type::Uuid => packer.push(Datum::Uuid(Uuid::parse_str(s)?)),
Type::Uuid => packer.push(Datum::Uuid(strconv::parse_uuid(s)?)),
Type::MzTimestamp => packer.push(Datum::MzTimestamp(strconv::parse_mz_timestamp(s)?)),
Type::Range { element_type } => {
let range = strconv::parse_range(s, |elem_text| {
Expand Down
6 changes: 5 additions & 1 deletion src/repr/src/adt/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ impl RustType<ProtoDate> for Date {
}

fn from_proto(proto: ProtoDate) -> Result<Self, TryFromProtoError> {
Ok(Date { days: proto.days })
// Go through `from_pg_epoch` so out-of-range days are rejected here.
// Pushing them into a `Row` succeeds, but `read_datum` would later
// panic when reconstructing the date.
Date::from_pg_epoch(proto.days)
.map_err(|err| TryFromProtoError::InvalidFieldError(err.to_string()))
}
}

Expand Down
23 changes: 23 additions & 0 deletions src/repr/src/adt/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,14 @@ pub fn twos_complement_be_to_numeric(
pub fn twos_complement_be_to_numeric_inner<D: Dec<N>, const N: usize>(
input: &mut [u8],
) -> Result<Decimal<N>, anyhow::Error> {
if input.is_empty() {
// An empty byte string is not a valid two's-complement integer. Our own
// encoder never emits one (zero is the single byte `0x00`), so this only
// arises from untrusted input. One example is an Avro `decimal` field
// whose unscaled value was encoded as zero-length `bytes`. Reject it
// rather than indexing `input[0]` below and panicking.
bail!("cannot parse a numeric value from an empty byte string");
}
let is_neg = if (input[0] & 0x80) != 0 {
// byte-level negate all negative values, guaranteeing all bytes are
// readable as unsigned.
Expand Down Expand Up @@ -469,6 +477,21 @@ fn test_twos_complement_roundtrip() {
inner("-7.2e-35");
}

#[mz_ore::test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux`
fn test_twos_complement_empty_is_error() {
// An empty byte string is not a valid two's-complement integer and used to
// panic with an out-of-bounds index. It only arrives from untrusted input
// (e.g. an Avro `decimal` encoded as zero-length `bytes`), so it must be a
// clean error, not a panic. Regression test for that fix.
for scale in [0u8, 1, 38] {
assert!(twos_complement_be_to_numeric(&mut [], scale).is_err());
}
assert!(
twos_complement_be_to_numeric_inner::<Numeric, NUMERIC_DATUM_WIDTH_USIZE>(&mut []).is_err()
);
}

#[mz_ore::test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `decNumberFromInt32` on OS `linux`
fn test_twos_comp_numeric_primitive() {
Expand Down
1 change: 1 addition & 0 deletions src/repr/src/adt/range.proto
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ message ProtoInvalidRangeError {
google.protobuf.Empty discontiguous_union = 4;
google.protobuf.Empty discontiguous_difference = 5;
google.protobuf.Empty null_range_bound_flags = 6;
google.protobuf.Empty invalid_range_data = 7;
}
}
7 changes: 7 additions & 0 deletions src/repr/src/adt/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,10 @@ pub enum InvalidRangeError {
DiscontiguousUnion,
DiscontiguousDifference,
NullRangeBoundFlags,
/// The encoded range data is structurally invalid (e.g. a null bound,
/// bounds of inconsistent types, or the wrong number of bounds). Only
/// reachable by decoding untrusted/corrupted bytes.
InvalidRangeData,
}

impl Display for InvalidRangeError {
Expand All @@ -817,6 +821,7 @@ impl Display for InvalidRangeError {
InvalidRangeError::NullRangeBoundFlags => {
f.write_str("range constructor flags argument must not be null")
}
InvalidRangeError::InvalidRangeData => f.write_str("invalid range data"),
}
}
}
Expand Down Expand Up @@ -847,6 +852,7 @@ impl RustType<ProtoInvalidRangeError> for InvalidRangeError {
InvalidRangeError::DiscontiguousUnion => DiscontiguousUnion(()),
InvalidRangeError::DiscontiguousDifference => DiscontiguousDifference(()),
InvalidRangeError::NullRangeBoundFlags => NullRangeBoundFlags(()),
InvalidRangeError::InvalidRangeData => InvalidRangeData(()),
};
ProtoInvalidRangeError { kind: Some(kind) }
}
Expand All @@ -863,6 +869,7 @@ impl RustType<ProtoInvalidRangeError> for InvalidRangeError {
DiscontiguousUnion(()) => InvalidRangeError::DiscontiguousUnion,
DiscontiguousDifference(()) => InvalidRangeError::DiscontiguousDifference,
NullRangeBoundFlags(()) => InvalidRangeError::NullRangeBoundFlags,
InvalidRangeData(()) => InvalidRangeError::InvalidRangeData,
}),
None => Err(TryFromProtoError::missing_field(
"`ProtoInvalidRangeError::kind`",
Expand Down
59 changes: 39 additions & 20 deletions src/repr/src/adt/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,25 +258,22 @@ pub trait TimestampLike:
}

fn truncate_microseconds(&self) -> Self {
let time = NaiveTime::from_hms_micro_opt(
self.hour(),
self.minute(),
self.second(),
self.nanosecond() / 1_000,
)
.unwrap();
// Use `with_nanosecond` rather than `from_hms_micro_opt`: the latter only
// accepts a leap-second sub-second (>= 1s) when `sec == 59`, so a value
// carrying chrono's leap representation at any other second (reachable
// from a parsed `:60` literal) would be `None` and panic. `with_nanosecond`
// accepts the whole [0, 2s) range, preserving the value.
let time = NaiveTime::from_hms_opt(self.hour(), self.minute(), self.second())
.and_then(|t| t.with_nanosecond((self.nanosecond() / 1_000) * 1_000))
.expect("hour/minute/second/nanosecond came from a valid time");

Self::new(self.date(), time)
}

fn truncate_milliseconds(&self) -> Self {
let time = NaiveTime::from_hms_milli_opt(
self.hour(),
self.minute(),
self.second(),
self.nanosecond() / 1_000_000,
)
.unwrap();
let time = NaiveTime::from_hms_opt(self.hour(), self.minute(), self.second())
.and_then(|t| t.with_nanosecond((self.nanosecond() / 1_000_000) * 1_000_000))
.expect("hour/minute/second/nanosecond came from a valid time");

Self::new(self.date(), time)
}
Expand Down Expand Up @@ -901,9 +898,11 @@ impl RustType<ProtoNaiveDateTime> for CheckedTimestamp<NaiveDateTime> {
}

fn from_proto(proto: ProtoNaiveDateTime) -> Result<Self, TryFromProtoError> {
Ok(Self {
t: NaiveDateTime::from_proto(proto)?,
})
// Go through `from_timestamplike` so out-of-range values are
// rejected here. Pushing them into a `Row` succeeds, but
// `read_datum` would panic when reconstructing the timestamp.
CheckedTimestamp::from_timestamplike(NaiveDateTime::from_proto(proto)?)
.map_err(|err| TryFromProtoError::InvalidFieldError(err.to_string()))
}
}

Expand All @@ -913,9 +912,8 @@ impl RustType<ProtoNaiveDateTime> for CheckedTimestamp<DateTime<Utc>> {
}

fn from_proto(proto: ProtoNaiveDateTime) -> Result<Self, TryFromProtoError> {
Ok(Self {
t: DateTime::<Utc>::from_proto(proto)?,
})
CheckedTimestamp::from_timestamplike(DateTime::<Utc>::from_proto(proto)?)
.map_err(|err| TryFromProtoError::InvalidFieldError(err.to_string()))
}
}

Expand Down Expand Up @@ -1117,6 +1115,27 @@ mod test {
assert_round_to_precision(high, 6, 8210266790400123457);
}

#[mz_ore::test]
fn test_round_to_precision_leap_second_off_minute() {
// Regression: parsing a `:60` literal can leave chrono's leap-second
// representation (sub-second >= 1s) on a second other than `:59` (after
// time-zone math). Rounding such a value, as the string->timestamp cast
// does, must not panic. `truncate_microseconds`/`truncate_milliseconds`
// previously rebuilt the time with `from_hms_{micro,milli}_opt`, whose
// leap sub-second range is only valid at `:59`, and unwrapped the `None`.
let leap = NaiveDate::from_ymd_opt(3, 3, 17)
.unwrap()
.and_hms_opt(12, 30, 56)
.unwrap()
.with_nanosecond(1_000_000_000)
.unwrap();
let ts = CheckedTimestamp::try_from(leap).unwrap();
for precision in [None, Some(0), Some(3), Some(6)] {
ts.round_to_precision(precision.map(TimestampPrecision))
.unwrap();
}
}

#[mz_ore::test]
fn test_precision_edge_cases() {
#[allow(clippy::disallowed_methods)] // not using enhanced panic handler in tests
Expand Down
10 changes: 10 additions & 0 deletions src/repr/src/relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,16 @@ impl RustType<ProtoRelationDesc> for RelationDesc {
};
Box::new(itertools::repeat_n(val, proto.names.len()))
} else {
// Reject mismatched lengths explicitly rather than panicking via
// `zip_eq` below, since this branch is reachable from untrusted
// proto bytes.
if proto.names.len() != proto.metadata.len() {
return Err(TryFromProtoError::InvalidFieldError(format!(
"ProtoRelationDesc: names ({}) and metadata ({}) length mismatch",
proto.names.len(),
proto.metadata.len()
)));
}
Box::new(proto.metadata.into_iter())
};

Expand Down
47 changes: 34 additions & 13 deletions src/repr/src/row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2808,17 +2808,23 @@ impl RowPacker<'_> {
let mut dataz = &self.row.data[datum_check..];
while !dataz.is_empty() {
let d = unsafe { read_datum(&mut dataz) };
assert!(d != Datum::Null, "cannot push Datum::Null into range");
// These checks only fail when decoding untrusted/corrupted bytes;
// valid callers always push consistent, non-null bounds. Return an
// error rather than asserting so a crafted proto doesn't panic.
if d == Datum::Null {
self.row.data.truncate(start);
return Err(InvalidRangeError::InvalidRangeData.into());
}

match seen {
None => seen = Some(d),
Some(seen) => {
let seen_kind = DatumKind::from(seen);
let d_kind = DatumKind::from(d);
assert!(
seen_kind == d_kind,
"range contains inconsistent data; expected {seen_kind:?} but got {d_kind:?}"
);
if seen_kind != d_kind {
self.row.data.truncate(start);
return Err(InvalidRangeError::InvalidRangeData.into());
}

if seen > d {
self.row.data.truncate(start);
Expand All @@ -2829,10 +2835,10 @@ impl RowPacker<'_> {
actual_datums += 1;
}

assert!(
actual_datums == expected_datums,
"finite values must each push exactly one value; expected {expected_datums} but got {actual_datums}"
);
if actual_datums != expected_datums {
self.row.data.truncate(start);
return Err(InvalidRangeError::InvalidRangeData.into());
}

Ok(())
}
Expand Down Expand Up @@ -3992,8 +3998,23 @@ mod tests {
r
}

// A finite bound whose closure pushes zero values violates the
// `push_range_with` caller contract and still panics. This is
// unreachable when decoding a `ProtoRow`: each decoded bound pushes
// exactly one datum (or fails), so only an in-process caller can hit it.
for panicking_case in [
vec![vec![Datum::Int32(1)], vec![]],
vec![vec![Datum::Int32(1), Datum::Int32(2)], vec![]],
] {
#[allow(clippy::disallowed_methods)] // not using enhanced panic handler in tests
let result = std::panic::catch_unwind(|| test_range_errors_inner(panicking_case));
assert_err!(result);
}

// Inconsistent bound counts, mismatched datum kinds, and Null bounds are
// all reachable from a crafted/corrupted `ProtoRow`, so they return an
// error instead of panicking.
for error_case in [
vec![
vec![Datum::Int32(1), Datum::Int32(2)],
vec![Datum::Int32(3)],
Expand All @@ -4002,14 +4023,14 @@ mod tests {
vec![Datum::Int32(1)],
vec![Datum::Int32(2), Datum::Int32(3)],
],
vec![vec![Datum::Int32(1), Datum::Int32(2)], vec![]],
vec![vec![Datum::Int32(1)], vec![Datum::UInt16(2)]],
vec![vec![Datum::Null], vec![Datum::Int32(2)]],
vec![vec![Datum::Int32(1)], vec![Datum::Null]],
] {
#[allow(clippy::disallowed_methods)] // not using enhanced panic handler in tests
let result = std::panic::catch_unwind(|| test_range_errors_inner(panicking_case));
assert_err!(result);
assert_eq!(
test_range_errors_inner(error_case),
Err(InvalidRangeError::InvalidRangeData)
);
}

let e = test_range_errors_inner(vec![vec![Datum::Int32(2)], vec![Datum::Int32(1)]]);
Expand Down
Loading
Loading