Skip to content

Commit 7ed3b69

Browse files
Guard to_timestamp decimal overflow (#22307)
## Which issue does this PR close? - Closes #22213 ## Rationale for this change `to_timestamp` converted Decimal128 inputs to nanoseconds with unchecked `i128` multiplication followed by an `as i64` cast. Large Decimal128 values could overflow during nanosecond scaling, causing a panic in debug builds or a wrapped timestamp value in release builds. ## What changes are included in this PR? - Convert Decimal128-to-nanoseconds scaling to checked arithmetic. - Return a DataFusion error when the scaled value cannot fit in timestamp nanoseconds. - Cover both scalar and array Decimal128 overflow paths. - Add a sqllogictest regression for the reported query shape. ## Are these changes tested? - `cargo fmt --check` - `git diff --check` - `CARGO_TARGET_DIR=/home/sean/Projects/datafusion-runtime-set-nonascii/target CARGO_BUILD_JOBS=2 cargo test -p datafusion-functions --lib to_timestamp_decimal128` - `CARGO_TARGET_DIR=/home/sean/Projects/datafusion-runtime-set-nonascii/target CARGO_BUILD_JOBS=2 cargo clippy -p datafusion-functions --lib -- -D warnings`
1 parent 626da1e commit 7ed3b69

2 files changed

Lines changed: 66 additions & 12 deletions

File tree

datafusion/functions/src/datetime/to_timestamp.rs

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use arrow::datatypes::{
3030
TimestampNanosecondType, TimestampSecondType,
3131
};
3232
use datafusion_common::config::ConfigOptions;
33-
use datafusion_common::{Result, ScalarType, ScalarValue, exec_err};
33+
use datafusion_common::{Result, ScalarType, ScalarValue, exec_datafusion_err, exec_err};
3434
use datafusion_expr::{
3535
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl,
3636
Signature, Volatility,
@@ -332,14 +332,31 @@ impl_to_timestamp_constructors!(ToTimestampMillisFunc);
332332
impl_to_timestamp_constructors!(ToTimestampMicrosFunc);
333333
impl_to_timestamp_constructors!(ToTimestampNanosFunc);
334334

335-
fn decimal_to_nanoseconds(value: i128, scale: i8) -> i64 {
335+
fn decimal_to_nanoseconds(value: i128, scale: i8) -> Result<i64> {
336336
let nanos_exponent = 9_i16 - scale as i16;
337+
let power = 10_i128
338+
.checked_pow(nanos_exponent.unsigned_abs() as u32)
339+
.ok_or_else(|| {
340+
exec_datafusion_err!(
341+
"Decimal value {value} with scale {scale} overflows timestamp nanoseconds"
342+
)
343+
})?;
344+
337345
let timestamp_nanos = if nanos_exponent >= 0 {
338-
value * 10_i128.pow(nanos_exponent as u32)
346+
value.checked_mul(power).ok_or_else(|| {
347+
exec_datafusion_err!(
348+
"Decimal value {value} with scale {scale} overflows timestamp nanoseconds"
349+
)
350+
})?
339351
} else {
340-
value / 10_i128.pow(nanos_exponent.unsigned_abs() as u32)
352+
value / power
341353
};
342-
timestamp_nanos as i64
354+
355+
i64::try_from(timestamp_nanos).map_err(|_| {
356+
exec_datafusion_err!(
357+
"Decimal value {value} with scale {scale} overflows timestamp nanoseconds"
358+
)
359+
})
343360
}
344361

345362
fn decimal128_to_timestamp_nanos(
@@ -348,7 +365,7 @@ fn decimal128_to_timestamp_nanos(
348365
) -> Result<ColumnarValue> {
349366
match arg {
350367
ColumnarValue::Scalar(ScalarValue::Decimal128(Some(value), _, scale)) => {
351-
let timestamp_nanos = decimal_to_nanoseconds(*value, *scale);
368+
let timestamp_nanos = decimal_to_nanoseconds(*value, *scale)?;
352369
Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
353370
Some(timestamp_nanos),
354371
tz,
@@ -362,8 +379,8 @@ fn decimal128_to_timestamp_nanos(
362379
let scale = decimal_arr.scale();
363380
let result: TimestampNanosecondArray = decimal_arr
364381
.iter()
365-
.map(|v| v.map(|val| decimal_to_nanoseconds(val, scale)))
366-
.collect();
382+
.map(|v| v.map(|val| decimal_to_nanoseconds(val, scale)).transpose())
383+
.collect::<Result<_>>()?;
367384
let result = result.with_timezone_opt(tz);
368385
Ok(ColumnarValue::Array(Arc::new(result)))
369386
}
@@ -947,6 +964,37 @@ mod tests {
947964
Ok(())
948965
}
949966

967+
#[test]
968+
fn to_timestamp_decimal128_overflow_returns_error() {
969+
let value = "99999999999999999999999999999999999999"
970+
.parse::<i128>()
971+
.unwrap();
972+
let err = decimal128_to_timestamp_nanos(
973+
&ColumnarValue::Scalar(ScalarValue::Decimal128(Some(value), 38, 0)),
974+
None,
975+
)
976+
.unwrap_err()
977+
.to_string();
978+
979+
assert_contains!(err, "overflows timestamp nanoseconds");
980+
}
981+
982+
#[test]
983+
fn to_timestamp_decimal128_array_overflow_returns_error() {
984+
let value = "99999999999999999999999999999999999999"
985+
.parse::<i128>()
986+
.unwrap();
987+
let array = Decimal128Array::from(vec![Some(value)])
988+
.with_precision_and_scale(38, 0)
989+
.unwrap();
990+
let err =
991+
decimal128_to_timestamp_nanos(&ColumnarValue::Array(Arc::new(array)), None)
992+
.unwrap_err()
993+
.to_string();
994+
995+
assert_contains!(err, "overflows timestamp nanoseconds");
996+
}
997+
950998
#[test]
951999
fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
9521000
// ensure that arrow array implementation is wired up and handles nulls correctly
@@ -1830,19 +1878,19 @@ mod tests {
18301878
#[test]
18311879
fn test_decimal_to_nanoseconds_negative_scale() {
18321880
// scale -2: internal value 5 represents 5 * 10^2 = 500 seconds
1833-
let nanos = decimal_to_nanoseconds(5, -2);
1881+
let nanos = decimal_to_nanoseconds(5, -2).unwrap();
18341882
assert_eq!(nanos, 500_000_000_000); // 500 seconds in nanoseconds
18351883

18361884
// scale -1: internal value 10 represents 10 * 10^1 = 100 seconds
1837-
let nanos = decimal_to_nanoseconds(10, -1);
1885+
let nanos = decimal_to_nanoseconds(10, -1).unwrap();
18381886
assert_eq!(nanos, 100_000_000_000);
18391887

18401888
// scale 0: internal value 5 represents 5 seconds
1841-
let nanos = decimal_to_nanoseconds(5, 0);
1889+
let nanos = decimal_to_nanoseconds(5, 0).unwrap();
18421890
assert_eq!(nanos, 5_000_000_000);
18431891

18441892
// scale 3: internal value 1500 represents 1.5 seconds
1845-
let nanos = decimal_to_nanoseconds(1500, 3);
1893+
let nanos = decimal_to_nanoseconds(1500, 3).unwrap();
18461894
assert_eq!(nanos, 1_500_000_000);
18471895
}
18481896
}

datafusion/sqllogictest/test_files/datetime/timestamps.slt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,12 @@ SELECT to_timestamp(arrow_cast(123456789.123456789, 'Decimal128(18,9)')) as c1,
595595
----
596596
1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789
597597

598+
# Regression test for https://github.com/apache/datafusion/issues/22213
599+
query error .*overflows timestamp nanoseconds
600+
SELECT to_timestamp(
601+
arrow_cast('99999999999999999999999999999999999999', 'Decimal128(38,0)')
602+
);
603+
598604

599605
# from_unixtime
600606

0 commit comments

Comments
 (0)