Skip to content

Commit b360af5

Browse files
committed
fix string parsing for year
1 parent d8ae361 commit b360af5

1 file changed

Lines changed: 9 additions & 9 deletions

File tree

  • native/spark-expr/src/conversion_funcs

native/spark-expr/src/conversion_funcs/string.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,31 +1103,31 @@ fn timestamp_parser<T: TimeZone>(
11031103
// Define regex patterns and corresponding parsing functions
11041104
let patterns = &[
11051105
(
1106-
Regex::new(r"^\d{1,5}$").unwrap(),
1106+
Regex::new(r"^\d{4,5}$").unwrap(),
11071107
parse_str_to_year_timestamp as fn(&str, &T) -> SparkResult<Option<i64>>,
11081108
),
11091109
(
1110-
Regex::new(r"^\d{1,5}-\d{2}$").unwrap(),
1110+
Regex::new(r"^\d{4,5}-\d{2}$").unwrap(),
11111111
parse_str_to_month_timestamp,
11121112
),
11131113
(
1114-
Regex::new(r"^\d{1,5}-\d{2}-\d{2}$").unwrap(),
1114+
Regex::new(r"^\d{4,5}-\d{2}-\d{2}$").unwrap(),
11151115
parse_str_to_day_timestamp,
11161116
),
11171117
(
1118-
Regex::new(r"^\d{1,5}-\d{2}-\d{2}T\d{1,2}$").unwrap(),
1118+
Regex::new(r"^\d{4,5}-\d{2}-\d{2}T\d{1,2}$").unwrap(),
11191119
parse_str_to_hour_timestamp,
11201120
),
11211121
(
1122-
Regex::new(r"^\d{1,5}-\d{2}-\d{2}T\d{2}:\d{2}$").unwrap(),
1122+
Regex::new(r"^\d{4,5}-\d{2}-\d{2}T\d{2}:\d{2}$").unwrap(),
11231123
parse_str_to_minute_timestamp,
11241124
),
11251125
(
1126-
Regex::new(r"^\d{1,5}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$").unwrap(),
1126+
Regex::new(r"^\d{4,5}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$").unwrap(),
11271127
parse_str_to_second_timestamp,
11281128
),
11291129
(
1130-
Regex::new(r"^\d{1,5}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$").unwrap(),
1130+
Regex::new(r"^\d{4,5}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$").unwrap(),
11311131
parse_str_to_microsecond_timestamp,
11321132
),
11331133
(
@@ -1212,9 +1212,9 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> SparkResult<Option<i32>>
12121212
fn is_valid_digits(segment: i32, digits: usize) -> bool {
12131213
// An integer is able to represent a date within [+-]5 million years.
12141214
let max_digits_year = 7;
1215-
// year (segment 0) can be between 1 to 7 digits (Spark supports years like 100),
1215+
// year (segment 0) can be between 4 to 7 digits,
12161216
// month and day (segment 1 and 2) can be between 1 to 2 digits
1217-
(segment == 0 && digits >= 1 && digits <= max_digits_year)
1217+
(segment == 0 && digits >= 4 && digits <= max_digits_year)
12181218
|| (segment != 0 && digits > 0 && digits <= 2)
12191219
}
12201220

0 commit comments

Comments
 (0)