Skip to content

Commit 1fb44d2

Browse files
committed
stricter bounds check for year
1 parent 0511026 commit 1fb44d2

1 file changed

Lines changed: 17 additions & 2 deletions

File tree

  • native/spark-expr/src/conversion_funcs

native/spark-expr/src/conversion_funcs/string.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,12 @@ fn get_timestamp_values<T: TimeZone>(
969969
) -> SparkResult<Option<i64>> {
970970
let values: Vec<_> = value.split(['T', '-', ':', '.']).collect();
971971
let year = values[0].parse::<i32>().unwrap_or_default();
972+
973+
// NaiveDate (used internally by chrono's with_ymd_and_hms) is bounded to ±262142.
974+
if !(-262143..=262142).contains(&year) {
975+
return Ok(None);
976+
}
977+
972978
let month = values.get(1).map_or(1, |m| m.parse::<u32>().unwrap_or(1));
973979
let day = values.get(2).map_or(1, |d| d.parse::<u32>().unwrap_or(1));
974980
let hour = values.get(3).map_or(0, |h| h.parse::<u32>().unwrap_or(0));
@@ -1210,7 +1216,10 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> SparkResult<Option<i32>>
12101216
}
12111217

12121218
fn is_valid_digits(segment: i32, digits: usize) -> bool {
1213-
// An integer is able to represent a date within [+-]5 million years.
1219+
// NaiveDate is bounded to [-262142, 262142] (6 digits). We allow up to 7 digits to support
1220+
// leading-zero year strings like "0002020" (= year 2020), matching Spark's
1221+
// isValidDigits. Values outside the bounds are caught by an explicit bounds
1222+
// check below.
12141223
let max_digits_year = 7;
12151224
// year (segment 0) can be between 4 to 7 digits,
12161225
// month and day (segment 1 and 2) can be between 1 to 2 digits
@@ -1293,8 +1302,14 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> SparkResult<Option<i32>>
12931302

12941303
date_segments[current_segment as usize] = current_segment_value.0;
12951304

1305+
// Reject out-of-range years explicitly
1306+
let year = sign * date_segments[0];
1307+
if !(-262143..=262142).contains(&year) {
1308+
return Ok(None);
1309+
}
1310+
12961311
match NaiveDate::from_ymd_opt(
1297-
sign * date_segments[0],
1312+
year,
12981313
date_segments[1] as u32,
12991314
date_segments[2] as u32,
13001315
) {

0 commit comments

Comments
 (0)