Skip to content

Commit ed0646d

Browse files
committed
Compile regexps only once
1 parent ff02516 commit ed0646d

1 file changed

Lines changed: 24 additions & 35 deletions

File tree

  • native/spark-expr/src/conversion_funcs

native/spark-expr/src/conversion_funcs/string.rs

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use num::{CheckedSub, Integer};
3131
use regex::Regex;
3232
use std::num::Wrapping;
3333
use std::str::FromStr;
34-
use std::sync::Arc;
34+
use std::sync::{Arc, LazyLock};
3535

3636
macro_rules! cast_utf8_to_timestamp {
3737
($array:expr, $eval_mode:expr, $array_type:ty, $cast_method:ident, $tz:expr) => {{
@@ -1096,6 +1096,20 @@ fn parse_str_to_microsecond_timestamp<T: TimeZone>(
10961096
get_timestamp_values(value, "microsecond", tz)
10971097
}
10981098

1099+
static RE_YEAR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\d{4,7}$").unwrap());
1100+
static RE_MONTH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\d{4,7}-\d{2}$").unwrap());
1101+
static RE_DAY: LazyLock<Regex> =
1102+
LazyLock::new(|| Regex::new(r"^\d{4,7}-\d{2}-\d{2}$").unwrap());
1103+
static RE_HOUR: LazyLock<Regex> =
1104+
LazyLock::new(|| Regex::new(r"^\d{4,7}-\d{2}-\d{2}T\d{1,2}$").unwrap());
1105+
static RE_MINUTE: LazyLock<Regex> =
1106+
LazyLock::new(|| Regex::new(r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}$").unwrap());
1107+
static RE_SECOND: LazyLock<Regex> =
1108+
LazyLock::new(|| Regex::new(r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$").unwrap());
1109+
static RE_MICROSECOND: LazyLock<Regex> =
1110+
LazyLock::new(|| Regex::new(r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$").unwrap());
1111+
static RE_TIME_ONLY: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^T\d{1,2}$").unwrap());
1112+
10991113
fn timestamp_parser<T: TimeZone>(
11001114
value: &str,
11011115
eval_mode: EvalMode,
@@ -1105,40 +1119,15 @@ fn timestamp_parser<T: TimeZone>(
11051119
if value.is_empty() {
11061120
return Ok(None);
11071121
}
1108-
// Define regex patterns and corresponding parsing functions
1109-
let patterns = &[
1110-
(
1111-
Regex::new(r"^\d{4,7}$").unwrap(),
1112-
parse_str_to_year_timestamp as fn(&str, &T) -> SparkResult<Option<i64>>,
1113-
),
1114-
(
1115-
Regex::new(r"^\d{4,7}-\d{2}$").unwrap(),
1116-
parse_str_to_month_timestamp,
1117-
),
1118-
(
1119-
Regex::new(r"^\d{4,7}-\d{2}-\d{2}$").unwrap(),
1120-
parse_str_to_day_timestamp,
1121-
),
1122-
(
1123-
Regex::new(r"^\d{4,7}-\d{2}-\d{2}T\d{1,2}$").unwrap(),
1124-
parse_str_to_hour_timestamp,
1125-
),
1126-
(
1127-
Regex::new(r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}$").unwrap(),
1128-
parse_str_to_minute_timestamp,
1129-
),
1130-
(
1131-
Regex::new(r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$").unwrap(),
1132-
parse_str_to_second_timestamp,
1133-
),
1134-
(
1135-
Regex::new(r"^\d{4,7}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$").unwrap(),
1136-
parse_str_to_microsecond_timestamp,
1137-
),
1138-
(
1139-
Regex::new(r"^T\d{1,2}$").unwrap(),
1140-
parse_str_to_time_only_timestamp,
1141-
),
1122+
let patterns: &[(&Regex, fn(&str, &T) -> SparkResult<Option<i64>>)] = &[
1123+
(&RE_YEAR, parse_str_to_year_timestamp),
1124+
(&RE_MONTH, parse_str_to_month_timestamp),
1125+
(&RE_DAY, parse_str_to_day_timestamp),
1126+
(&RE_HOUR, parse_str_to_hour_timestamp),
1127+
(&RE_MINUTE, parse_str_to_minute_timestamp),
1128+
(&RE_SECOND, parse_str_to_second_timestamp),
1129+
(&RE_MICROSECOND, parse_str_to_microsecond_timestamp),
1130+
(&RE_TIME_ONLY, parse_str_to_time_only_timestamp),
11421131
];
11431132

11441133
let mut timestamp = None;

0 commit comments

Comments
 (0)