Skip to content

Commit cb2542c

Browse files
authored
fix: TRY_CAST returns NULL for timestamp/date overflow (#22897)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #22896. ## Rationale for this change `TRY_CAST` should return NULL on cast failure, but overflowing date/timestamp casts returned errors. <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? - Make scalar temporal overflow checks respect CastOptions.safe. - Skip DataFusion’s array pre-check for safe casts so Arrow can return NULLs. - Add regression tests. <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? Yes: ```bash cargo test -p datafusion-common timestamp_overflow_returns cargo test -p datafusion-expr-common timestamp_array_to_timestamp_overflow cargo test --test sqllogictests -- datetime/timestamps.slt ``` <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? Yes. TRY_CAST for overflowing date/timestamp casts now returns NULL; regular CAST still errors. <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> ## Known Limitation This PR does not add Date array-path coverage yet. For example: ```sql SELECT TRY_CAST(d AS TIMESTAMP(9)) FROM (VALUES (DATE '3000-01-01')) t(d); ``` This depends on the upstream Arrow fix in apache/arrow-rs#9825. Once DataFusion updates to an Arrow version containing that fix, we can add this regression test.
1 parent 78033fa commit cb2542c

3 files changed

Lines changed: 94 additions & 2 deletions

File tree

datafusion/common/src/scalar/mod.rs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4292,7 +4292,14 @@ impl ScalarValue {
42924292
.or_else(|| timestamp_to_timestamp_multiplier(&source_type, target_type))
42934293
&& let Some(value) = self.temporal_scalar_value_as_i64()
42944294
{
4295-
ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?;
4295+
match ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)
4296+
{
4297+
Ok(()) => {}
4298+
Err(_) if cast_options.safe => {
4299+
return ScalarValue::try_new_null(target_type);
4300+
}
4301+
Err(e) => return Err(e),
4302+
}
42964303
}
42974304

42984305
let scalar_array = self.to_array()?;
@@ -10190,6 +10197,24 @@ mod tests {
1019010197
);
1019110198
}
1019210199

10200+
#[test]
10201+
fn safe_cast_date_to_timestamp_overflow_returns_null() {
10202+
let scalar = ScalarValue::Date32(Some(i32::MAX));
10203+
let safe_options = CastOptions {
10204+
safe: true,
10205+
..DEFAULT_CAST_OPTIONS
10206+
};
10207+
10208+
let casted = scalar
10209+
.cast_to_with_options(
10210+
&DataType::Timestamp(TimeUnit::Nanosecond, None),
10211+
&safe_options,
10212+
)
10213+
.expect("expected safe cast to return null");
10214+
10215+
assert_eq!(casted, ScalarValue::TimestampNanosecond(None, None));
10216+
}
10217+
1019310218
#[test]
1019410219
fn cast_timestamp_to_timestamp_overflow_returns_error() {
1019510220
let scalar = ScalarValue::TimestampSecond(Some(i64::MAX), None);
@@ -10203,6 +10228,24 @@ mod tests {
1020310228
);
1020410229
}
1020510230

10231+
#[test]
10232+
fn safe_cast_timestamp_to_timestamp_overflow_returns_null() {
10233+
let scalar = ScalarValue::TimestampSecond(Some(i64::MAX), None);
10234+
let safe_options = CastOptions {
10235+
safe: true,
10236+
..DEFAULT_CAST_OPTIONS
10237+
};
10238+
10239+
let casted = scalar
10240+
.cast_to_with_options(
10241+
&DataType::Timestamp(TimeUnit::Nanosecond, None),
10242+
&safe_options,
10243+
)
10244+
.expect("expected safe cast to return null");
10245+
10246+
assert_eq!(casted, ScalarValue::TimestampNanosecond(None, None));
10247+
}
10248+
1020610249
#[test]
1020710250
fn null_dictionary_scalar_produces_null_dictionary_array() {
1020810251
let dictionary_scalar = ScalarValue::Dictionary(

datafusion/expr-common/src/columnar_value.rs

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,9 @@ fn cast_array_by_name(
325325
) {
326326
datafusion_common::nested_struct::cast_column(array, cast_type, cast_options)
327327
} else {
328-
ensure_temporal_array_timestamp_bounds(array, cast_type)?;
328+
if !cast_options.safe {
329+
ensure_temporal_array_timestamp_bounds(array, cast_type)?;
330+
}
329331
Ok(kernels::cast::cast_with_options(
330332
array,
331333
cast_type,
@@ -766,4 +768,32 @@ mod tests {
766768
"unexpected error: {err}"
767769
);
768770
}
771+
772+
#[test]
773+
fn safe_cast_timestamp_array_to_timestamp_overflow_returns_null() {
774+
let overflow_value = i64::MAX / 1_000_000_000 + 1;
775+
let array: ArrayRef =
776+
Arc::new(TimestampSecondArray::from(vec![Some(overflow_value)]));
777+
let value = ColumnarValue::Array(array);
778+
let safe_options = CastOptions {
779+
safe: true,
780+
..DEFAULT_CAST_OPTIONS
781+
};
782+
783+
let casted = value
784+
.cast_to(
785+
&DataType::Timestamp(TimeUnit::Nanosecond, None),
786+
Some(&safe_options),
787+
)
788+
.expect("expected safe cast to return null");
789+
790+
let ColumnarValue::Array(array) = casted else {
791+
panic!("expected array after cast");
792+
};
793+
let array = array
794+
.as_any()
795+
.downcast_ref::<TimestampNanosecondArray>()
796+
.expect("expected TimestampNanosecondArray");
797+
assert!(array.is_null(0));
798+
}
769799
}

datafusion/sqllogictest/test_files/datetime/timestamps.slt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5379,6 +5379,25 @@ SELECT to_timestamp(arrow_cast(-9223372036, 'Int64'));
53795379
query error converted value exceeds the representable i64 range
53805380
SELECT to_timestamp(arrow_cast(9223372037, 'Int64'));
53815381

5382+
# TRY_CAST returns NULL for timestamp/date casts that overflow
5383+
query P
5384+
SELECT TRY_CAST(arrow_cast(9223372037, 'Timestamp(s)') AS TIMESTAMP(9));
5385+
----
5386+
NULL
5387+
5388+
query P
5389+
SELECT TRY_CAST(DATE '3000-01-01' AS TIMESTAMP(9));
5390+
----
5391+
NULL
5392+
5393+
query P
5394+
SELECT TRY_CAST(ts AS TIMESTAMP(9)) AS ts
5395+
FROM (
5396+
VALUES (arrow_cast(9223372037, 'Timestamp(s)'))
5397+
) t(ts);
5398+
----
5399+
NULL
5400+
53825401
# Float truncation behavior
53835402
query P
53845403
SELECT to_timestamp_seconds(arrow_cast(-1.9, 'Float64'));

0 commit comments

Comments
 (0)