@@ -31,7 +31,7 @@ use num::{CheckedSub, Integer};
3131use regex:: Regex ;
3232use std:: num:: Wrapping ;
3333use std:: str:: FromStr ;
34- use std:: sync:: Arc ;
34+ use std:: sync:: { Arc , LazyLock } ;
3535
3636macro_rules! cast_utf8_to_timestamp {
3737 ( $array: expr, $eval_mode: expr, $array_type: ty, $cast_method: ident, $tz: expr) => { {
@@ -1096,6 +1096,20 @@ fn parse_str_to_microsecond_timestamp<T: TimeZone>(
10961096 get_timestamp_values ( value, "microsecond" , tz)
10971097}
10981098
1099+ static RE_YEAR : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"^\d{4,7}$" ) . unwrap ( ) ) ;
1100+ static RE_MONTH : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"^\d{4,7}-\d{2}$" ) . unwrap ( ) ) ;
1101+ static RE_DAY : LazyLock < Regex > =
1102+ LazyLock :: new ( || Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}$" ) . unwrap ( ) ) ;
1103+ static RE_HOUR : LazyLock < Regex > =
1104+ LazyLock :: new ( || Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}T\d{1,2}$" ) . unwrap ( ) ) ;
1105+ static RE_MINUTE : LazyLock < Regex > =
1106+ LazyLock :: new ( || Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}$" ) . unwrap ( ) ) ;
1107+ static RE_SECOND : LazyLock < Regex > =
1108+ LazyLock :: new ( || Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$" ) . unwrap ( ) ) ;
1109+ static RE_MICROSECOND : LazyLock < Regex > =
1110+ LazyLock :: new ( || Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$" ) . unwrap ( ) ) ;
1111+ static RE_TIME_ONLY : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"^T\d{1,2}$" ) . unwrap ( ) ) ;
1112+
10991113fn timestamp_parser < T : TimeZone > (
11001114 value : & str ,
11011115 eval_mode : EvalMode ,
@@ -1105,40 +1119,15 @@ fn timestamp_parser<T: TimeZone>(
11051119 if value. is_empty ( ) {
11061120 return Ok ( None ) ;
11071121 }
1108- // Define regex patterns and corresponding parsing functions
1109- let patterns = & [
1110- (
1111- Regex :: new ( r"^\d{4,7}$" ) . unwrap ( ) ,
1112- parse_str_to_year_timestamp as fn ( & str , & T ) -> SparkResult < Option < i64 > > ,
1113- ) ,
1114- (
1115- Regex :: new ( r"^\d{4,7}-\d{2}$" ) . unwrap ( ) ,
1116- parse_str_to_month_timestamp,
1117- ) ,
1118- (
1119- Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}$" ) . unwrap ( ) ,
1120- parse_str_to_day_timestamp,
1121- ) ,
1122- (
1123- Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}T\d{1,2}$" ) . unwrap ( ) ,
1124- parse_str_to_hour_timestamp,
1125- ) ,
1126- (
1127- Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}$" ) . unwrap ( ) ,
1128- parse_str_to_minute_timestamp,
1129- ) ,
1130- (
1131- Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$" ) . unwrap ( ) ,
1132- parse_str_to_second_timestamp,
1133- ) ,
1134- (
1135- Regex :: new ( r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$" ) . unwrap ( ) ,
1136- parse_str_to_microsecond_timestamp,
1137- ) ,
1138- (
1139- Regex :: new ( r"^T\d{1,2}$" ) . unwrap ( ) ,
1140- parse_str_to_time_only_timestamp,
1141- ) ,
1122+ let patterns: & [ ( & Regex , fn ( & str , & T ) -> SparkResult < Option < i64 > > ) ] = & [
1123+ ( & RE_YEAR , parse_str_to_year_timestamp) ,
1124+ ( & RE_MONTH , parse_str_to_month_timestamp) ,
1125+ ( & RE_DAY , parse_str_to_day_timestamp) ,
1126+ ( & RE_HOUR , parse_str_to_hour_timestamp) ,
1127+ ( & RE_MINUTE , parse_str_to_minute_timestamp) ,
1128+ ( & RE_SECOND , parse_str_to_second_timestamp) ,
1129+ ( & RE_MICROSECOND , parse_str_to_microsecond_timestamp) ,
1130+ ( & RE_TIME_ONLY , parse_str_to_time_only_timestamp) ,
11421131 ] ;
11431132
11441133 let mut timestamp = None ;
0 commit comments