Skip to content

Commit 528ca74

Browse files
authored
fix: avoid panic in date_bin compute_distance near i64::MIN (#22408)
## Which issue does this PR close? - Closes #22215. ## Rationale for this change `date_bin` panics with `attempt to subtract with overflow` when the source timestamp sits near `i64::MIN` because `compute_distance` does `time_diff - (time_diff % stride)` and then `time_delta - stride` on raw `i64`. The scalar pipeline already maps `Err` from `bin_fn` into `NULL`, so the fix is to surface the overflow as a normal error. ## What changes are included in this PR? - Convert `compute_distance` to return `Result<i64>` and use `checked_sub`. - Propagate the result through `date_bin_nanos_interval` and `date_bin_months_interval`, plus replace the trailing `origin + time_delta` with `checked_add`. ## Are these changes tested? Yes. Added `test_date_bin_compute_distance_i64_min` which previously panicked and now returns `NULL`. Ran `cargo test -p datafusion-functions --lib -- datetime::date_bin`, `cargo fmt --check`, and `cargo clippy -p datafusion-functions --lib --tests --no-deps`. ## Are there any user-facing changes? Queries that previously panicked on extreme timestamps now return `NULL` (consistent with the existing out-of-range behavior covered by `test_date_bin_out_of_range`).
1 parent 0469e5e commit 528ca74

2 files changed

Lines changed: 84 additions & 7 deletions

File tree

datafusion/functions/src/datetime/date_bin.rs

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -328,20 +328,39 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> Resul
328328
})?;
329329

330330
// distance from origin to bin
331-
let time_delta = compute_distance(time_diff, stride_nanos);
331+
let time_delta = compute_distance(time_diff, stride_nanos)?;
332332

333-
Ok(origin + time_delta)
333+
origin.checked_add(time_delta).ok_or_else(|| {
334+
arrow::error::ArrowError::InvalidArgumentError(format!(
335+
"date_bin origin {origin} + delta {time_delta} overflows i64"
336+
))
337+
.into()
338+
})
334339
}
335340

336341
// distance from origin to bin
337-
fn compute_distance(time_diff: i64, stride: i64) -> i64 {
338-
let time_delta = time_diff - (time_diff % stride);
342+
fn compute_distance(time_diff: i64, stride: i64) -> Result<i64> {
343+
let remainder = time_diff.checked_rem(stride).ok_or_else(|| {
344+
arrow::error::ArrowError::InvalidArgumentError(format!(
345+
"date_bin compute_distance time_diff {time_diff} % stride {stride} overflows i64"
346+
))
347+
})?;
348+
let time_delta = time_diff.checked_sub(remainder).ok_or_else(|| {
349+
arrow::error::ArrowError::InvalidArgumentError(format!(
350+
"date_bin compute_distance time_diff {time_diff} - remainder {remainder} overflows i64"
351+
))
352+
})?;
339353

340354
if time_diff < 0 && stride > 1 && time_delta != time_diff {
341355
// The origin is later than the source timestamp, round down to the previous bin
342-
time_delta - stride
356+
time_delta.checked_sub(stride).ok_or_else(|| {
357+
arrow::error::ArrowError::InvalidArgumentError(format!(
358+
"date_bin compute_distance time_delta {time_delta} - stride {stride} overflows i64"
359+
))
360+
.into()
361+
})
343362
} else {
344-
time_delta
363+
Ok(time_delta)
345364
}
346365
}
347366

@@ -357,7 +376,7 @@ fn date_bin_months_interval(stride_months: i64, source: i64, origin: i64) -> Res
357376
- origin_date.month() as i32;
358377

359378
// distance from origin to bin
360-
let month_delta = compute_distance(month_diff as i64, stride_months);
379+
let month_delta = compute_distance(month_diff as i64, stride_months)?;
361380

362381
let mut bin_time = if month_delta < 0 {
363382
match origin_date
@@ -1341,4 +1360,52 @@ mod tests {
13411360
assert!(val.is_none(), "Expected None for out of range operation");
13421361
}
13431362
}
1363+
1364+
#[test]
1365+
fn test_date_bin_compute_distance_i64_min() {
1366+
// Regression for #22215: date_bin_nanos_interval on a source near i64::MIN
1367+
// previously panicked inside compute_distance with "attempt to subtract with overflow".
1368+
// Now it must return a normal Err that the scalar pipeline maps to NULL.
1369+
let result = date_bin_nanos_interval(3, i64::MIN, 0);
1370+
assert!(
1371+
result.is_err(),
1372+
"expected Err for source=i64::MIN, got {result:?}"
1373+
);
1374+
1375+
let return_field = &Arc::new(Field::new(
1376+
"f",
1377+
DataType::Timestamp(TimeUnit::Nanosecond, None),
1378+
true,
1379+
));
1380+
let args = vec![
1381+
ColumnarValue::Scalar(ScalarValue::new_interval_mdn(0, 0, 3)),
1382+
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(i64::MIN), None)),
1383+
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(0), None)),
1384+
];
1385+
let result = invoke_date_bin_with_args(args, 1, return_field);
1386+
assert!(result.is_ok(), "expected Ok with NULL, got {result:?}");
1387+
if let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(val, _)) =
1388+
result.unwrap()
1389+
{
1390+
assert!(
1391+
val.is_none(),
1392+
"Expected None for compute_distance overflow, got {val:?}"
1393+
);
1394+
} else {
1395+
panic!("Expected TimestampNanosecond scalar");
1396+
}
1397+
}
1398+
1399+
#[test]
1400+
fn test_date_bin_compute_distance_rem_overflow() {
1401+
// Regression for #22215: `time_diff % stride` panics with "attempt to
1402+
// calculate the remainder with overflow" when `time_diff == i64::MIN`
1403+
// and `stride == -1`. Now it must return a normal Err that the scalar
1404+
// pipeline maps to NULL.
1405+
let result = date_bin_nanos_interval(-1, i64::MIN, 0);
1406+
assert!(
1407+
result.is_err(),
1408+
"expected Err for time_diff=i64::MIN, stride=-1, got {result:?}"
1409+
);
1410+
}
13441411
}

datafusion/sqllogictest/test_files/date_bin_errors.slt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,14 @@ select date_bin(
6767
arrow_cast(-9223372036854775808, 'Timestamp(Nanosecond, None)')
6868
);
6969
----
70+
NULL
71+
72+
# compute_distance overflow: source at i64::MIN nanoseconds previously panicked
73+
# inside compute_distance; it must return NULL through the SQL execution path
74+
query P
75+
select date_bin(
76+
interval '3 nanoseconds',
77+
arrow_cast(-9223372036854775808, 'Timestamp(Nanosecond, None)')
78+
);
79+
----
7080
NULL

0 commit comments

Comments
 (0)