From 82fdb49c2b797378bc77470f4acfd0c302f029f0 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 11 Apr 2026 12:58:02 +0200 Subject: [PATCH] date: replace regex with hand-written format spec parser --- Cargo.lock | 1 - fuzz/Cargo.lock | 33 -- src/uu/date/Cargo.toml | 1 - src/uu/date/src/format_modifiers.rs | 471 +++++++++++++++++++++------- 4 files changed, 350 insertions(+), 156 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d181f4d914e..3e81bfb18f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3451,7 +3451,6 @@ dependencies = [ "jiff-icu", "libc", "parse_datetime", - "regex", "rustix", "tempfile", "uucore", diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 40dad413f52..1b7f7d88980 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -8,15 +8,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - [[package]] name = "android_system_properties" version = "0.1.5" @@ -1553,34 +1544,11 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - [[package]] name = "regex-automata" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "rust-ini" @@ -1960,7 +1928,6 @@ dependencies = [ "jiff-icu", "libc", "parse_datetime", - "regex", "rustix", "uucore", "windows-sys", diff --git a/src/uu/date/Cargo.toml b/src/uu/date/Cargo.toml index 840b8602f15..c82dbd4882b 100644 --- a/src/uu/date/Cargo.toml +++ b/src/uu/date/Cargo.toml @@ -40,7 +40,6 @@ jiff = { workspace = true, features = [ "tzdb-concatenated", ] } parse_datetime = { workspace = true } -regex = { workspace = true } uucore = { workspace = true, features = ["parser", "i18n-datetime"] } [target.'cfg(unix)'.dependencies] diff --git a/src/uu/date/src/format_modifiers.rs b/src/uu/date/src/format_modifiers.rs index 7cdf32b174c..fd269379a42 100644 --- a/src/uu/date/src/format_modifiers.rs +++ b/src/uu/date/src/format_modifiers.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore strtime +// spell-checker:ignore strtime Yhello //! GNU date format modifier support //! @@ -35,9 +35,7 @@ use jiff::Zoned; use jiff::fmt::strtime::{BrokenDownTime, Config, PosixCustom}; -use regex::Regex; use std::fmt; -use std::sync::OnceLock; use uucore::translate; /// Error type for format modifier operations @@ -72,14 +70,68 @@ impl From for FormatError { } } -/// Regex to match format specifiers with optional modifiers -/// Pattern: % \[flags\] \[width\] specifier -/// Flags: -, _, 0, ^, #, + -/// Width: one or more digits -/// Specifier: any letter or special sequence like :z, ::z, :::z -fn format_spec_regex() -> &'static Regex { - static RE: OnceLock = OnceLock::new(); - RE.get_or_init(|| Regex::new(r"%([_0^#+-]*)(\d*)(:*[a-zA-Z])").unwrap()) +/// A parsed `%`-format specifier: `%[flags][width][:colons]`. +struct ParsedSpec<'a> { + /// Flag characters from `[_0^#+-]`. + flags: &'a str, + /// Explicit width, if present. `None` means no width was specified. + /// A value that overflows `usize` is represented as `Some(usize::MAX)` so + /// the downstream allocation check surfaces it as `FieldWidthTooLarge`. + width: Option, + /// The specifier itself, including any leading colons (e.g. `Y`, `:z`, `::z`). + spec: &'a str, + /// Total byte length of the parsed sequence including the leading `%`. + len: usize, +} + +/// Try to parse a format spec at the start of `s`. +/// +/// Implements the grammar `%[_0^#+-]*[0-9]*:*[a-zA-Z]` anchored at the +/// beginning of `s`. Returns `None` if `s` does not start with `%` or no +/// valid specifier follows. +fn parse_format_spec(s: &str) -> Option> { + let bytes = s.as_bytes(); + if bytes.first() != Some(&b'%') { + return None; + } + + let mut pos = 1; + + // Flags: any of [_0^#+-], zero or more. + let flags_start = pos; + while pos < bytes.len() && matches!(bytes[pos], b'_' | b'0' | b'^' | b'#' | b'+' | b'-') { + pos += 1; + } + let flags = &s[flags_start..pos]; + + // Width: zero or more ASCII digits. + let width_start = pos; + while pos < bytes.len() && bytes[pos].is_ascii_digit() { + pos += 1; + } + let width = if pos > width_start { + Some(s[width_start..pos].parse::().unwrap_or(usize::MAX)) + } else { + None + }; + + // Specifier: zero or more `:` followed by a single ASCII letter. + let spec_start = pos; + while pos < bytes.len() && bytes[pos] == b':' { + pos += 1; + } + if pos >= bytes.len() || !bytes[pos].is_ascii_alphabetic() { + return None; + } + pos += 1; + let spec = &s[spec_start..pos]; + + Some(ParsedSpec { + flags, + width, + spec, + len: pos, + }) } /// Check if format string contains any GNU modifiers and format if present. @@ -93,16 +145,7 @@ pub fn format_with_modifiers_if_present( format_string: &str, config: &Config, ) -> Option> { - let re = format_spec_regex(); - - // Quick check: does the string contain any modifiers? - let has_modifiers = re.captures_iter(format_string).any(|cap| { - let flags = cap.get(1).map_or("", |m| m.as_str()); - let width_str = cap.get(2).map_or("", |m| m.as_str()); - !flags.is_empty() || !width_str.is_empty() - }); - - if !has_modifiers { + if !has_gnu_modifiers(format_string) { return None; } @@ -110,6 +153,35 @@ pub fn format_with_modifiers_if_present( Some(format_with_modifiers(date, format_string, config)) } +/// Quick check: does the format string contain any GNU modifier +/// (a flag or width) on a `%`-spec, ignoring `%%` literals? +/// +/// Note that colon-prefixed specifiers without flags or width (e.g. `%:z`, +/// `%::z`) are deliberately *not* considered modifiers: jiff's strftime can +/// format them directly, so the caller can take the standard fast path. +fn has_gnu_modifiers(format_string: &str) -> bool { + let bytes = format_string.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'%' { + // Skip %% literal + if bytes.get(i + 1) == Some(&b'%') { + i += 2; + continue; + } + if let Some(parsed) = parse_format_spec(&format_string[i..]) { + if !parsed.flags.is_empty() || parsed.width.is_some() { + return true; + } + i += parsed.len; + continue; + } + } + i += 1; + } + false +} + /// Process a format string with GNU modifiers. /// /// # Arguments @@ -127,49 +199,49 @@ fn format_with_modifiers( format_string: &str, config: &Config, ) -> Result { - // First, replace %% with a placeholder to avoid matching it - let placeholder = "\x00PERCENT\x00"; - let temp_format = format_string.replace("%%", placeholder); - - let re = format_spec_regex(); let mut result = String::new(); - let mut last_end = 0; - let broken_down = BrokenDownTime::from(date); - for cap in re.captures_iter(&temp_format) { - let whole_match = cap.get(0).unwrap(); - let flags = cap.get(1).map_or("", |m| m.as_str()); - let width_str = cap.get(2).map_or("", |m| m.as_str()); - let spec = cap.get(3).unwrap().as_str(); - - // Add text before this match - result.push_str(&temp_format[last_end..whole_match.start()]); - - // Format the base specifier first - let base_format = format!("%{spec}"); - let formatted = broken_down.to_string_with_config(config, &base_format)?; - - // Check if this specifier has modifiers - if !flags.is_empty() || !width_str.is_empty() { - // Apply modifiers to the formatted value - let width: usize = width_str.parse().unwrap_or(0); - let explicit_width = !width_str.is_empty(); - let modified = apply_modifiers(&formatted, flags, width, spec, explicit_width)?; - result.push_str(&modified); - } else { - // No modifiers, use formatted value as-is - result.push_str(&formatted); - } + // Reused across iterations to avoid allocating a fresh `String` per spec. + // Holds the leading `%` plus the specifier itself (e.g. `%Y`, `%::z`), + // which is at most a handful of bytes. + let mut base_format = String::with_capacity(8); + + let bytes = format_string.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'%' { + // Handle %% literal: emit a single '%' and continue. + if bytes.get(i + 1) == Some(&b'%') { + result.push('%'); + i += 2; + continue; + } - last_end = whole_match.end(); - } + if let Some(parsed) = parse_format_spec(&format_string[i..]) { + // Format the base specifier first, reusing `base_format`. + base_format.clear(); + base_format.push('%'); + base_format.push_str(parsed.spec); + let formatted = broken_down.to_string_with_config(config, &base_format)?; + + if !parsed.flags.is_empty() || parsed.width.is_some() { + let modified = apply_modifiers(&formatted, &parsed)?; + result.push_str(&modified); + } else { + result.push_str(&formatted); + } - // Add remaining text - result.push_str(&temp_format[last_end..]); + i += parsed.len; + continue; + } + } - // Restore %% by converting placeholder to % - let result = result.replace(placeholder, "%"); + // Pass-through: copy a single UTF-8 code point unchanged. + let ch_len = format_string[i..].chars().next().map_or(1, char::len_utf8); + result.push_str(&format_string[i..i + ch_len]); + i += ch_len; + } Ok(result) } @@ -261,20 +333,14 @@ fn strip_default_padding(value: &str) -> String { /// Apply width and flag modifiers to a formatted value. /// -/// The `specifier` parameter is the format specifier (e.g., "d", "B", "Y") -/// which determines the default padding character (space for text, zero for numeric). +/// The specifier inside `parsed` (e.g., "d", "B", "Y") determines the default +/// padding character (space for text, zero for numeric). /// Flags are processed in order so that when conflicting flags appear, /// the last one takes precedence (e.g., `_+` means `+` wins for padding). -/// -/// The `explicit_width` parameter indicates whether a width was explicitly -/// specified in the format string (true) or if width is 0 (false). -fn apply_modifiers( - value: &str, - flags: &str, - width: usize, - specifier: &str, - explicit_width: bool, -) -> Result { +fn apply_modifiers(value: &str, parsed: &ParsedSpec<'_>) -> Result { + let flags = parsed.flags; + let width = parsed.width; + let specifier = parsed.spec; let mut result = value.to_string(); // Determine default pad character based on specifier type @@ -350,13 +416,13 @@ fn apply_modifiers( return Ok(strip_default_padding(&result)); } - // Handle padding flag without explicit width: use default width + // Handle padding flag without explicit width: use default width. // This applies when _ or 0 flag overrides the default padding character - // and no explicit width is specified (e.g., %_m, %0e) - let effective_width = if !explicit_width && (underscore_flag || pad_char != default_pad) { - get_default_width(specifier) - } else { - width + // and no explicit width is specified (e.g., %_m, %0e). + let effective_width = match width { + Some(w) => w, + None if underscore_flag || pad_char != default_pad => get_default_width(specifier), + None => 0, }; // When the requested width is narrower than the default formatted width, GNU first removes default padding and then reapplies the requested width. @@ -383,7 +449,7 @@ fn apply_modifiers( if result.chars().next().is_some_and(|c| c.is_ascii_digit()) { let default_w = get_default_width(specifier); // Add sign only if explicit width provided OR result exceeds default width - if explicit_width || (default_w > 0 && result.len() > default_w) { + if width.is_some() || (default_w > 0 && result.len() > default_w) { result.insert(0, '+'); } } @@ -455,6 +521,17 @@ mod tests { Config::new().custom(PosixCustom::new()).lenient(true) } + /// Build a `ParsedSpec` for unit-testing `apply_modifiers` without a real + /// format string. `len` is set to 0 because these tests never use it. + fn spec<'a>(flags: &'a str, width: Option, spec: &'a str) -> ParsedSpec<'a> { + ParsedSpec { + flags, + width, + spec, + len: 0, + } + } + #[test] fn test_width_and_padding_modifiers() { let date = make_test_date(1999, 6, 1, 0); @@ -616,47 +693,77 @@ mod tests { #[test] fn test_apply_modifiers_basic() { // No modifiers (numeric specifier) - assert_eq!(apply_modifiers("1999", "", 0, "Y", false).unwrap(), "1999"); + assert_eq!( + apply_modifiers("1999", &spec("", None, "Y")).unwrap(), + "1999" + ); // Zero padding assert_eq!( - apply_modifiers("1999", "0", 10, "Y", true).unwrap(), + apply_modifiers("1999", &spec("0", Some(10), "Y")).unwrap(), "0000001999" ); // Space padding (strips leading zeros) - assert_eq!(apply_modifiers("06", "_", 5, "m", true).unwrap(), " 6"); + assert_eq!( + apply_modifiers("06", &spec("_", Some(5), "m")).unwrap(), + " 6" + ); // No-pad (strips leading zeros, width ignored) - assert_eq!(apply_modifiers("01", "-", 5, "d", true).unwrap(), "1"); + assert_eq!( + apply_modifiers("01", &spec("-", Some(5), "d")).unwrap(), + "1" + ); // Uppercase - assert_eq!(apply_modifiers("june", "^", 0, "B", false).unwrap(), "JUNE"); + assert_eq!( + apply_modifiers("june", &spec("^", None, "B")).unwrap(), + "JUNE" + ); // Swap case: all uppercase → lowercase - assert_eq!(apply_modifiers("UTC", "#", 0, "Z", false).unwrap(), "utc"); + assert_eq!( + apply_modifiers("UTC", &spec("#", None, "Z")).unwrap(), + "utc" + ); // Swap case: mixed case → uppercase - assert_eq!(apply_modifiers("June", "#", 0, "B", false).unwrap(), "JUNE"); + assert_eq!( + apply_modifiers("June", &spec("#", None, "B")).unwrap(), + "JUNE" + ); } #[test] fn test_apply_modifiers_signs() { // Force sign with explicit width assert_eq!( - apply_modifiers("1970", "+", 6, "Y", true).unwrap(), + apply_modifiers("1970", &spec("+", Some(6), "Y")).unwrap(), "+01970" ); // Force sign without explicit width: should NOT add sign for 4-digit year - assert_eq!(apply_modifiers("1999", "+", 0, "Y", false).unwrap(), "1999"); + assert_eq!( + apply_modifiers("1999", &spec("+", None, "Y")).unwrap(), + "1999" + ); // Force sign without explicit width: SHOULD add sign for year > 4 digits assert_eq!( - apply_modifiers("12345", "+", 0, "Y", false).unwrap(), + apply_modifiers("12345", &spec("+", None, "Y")).unwrap(), "+12345" ); // Negative with zero padding: sign first, then zeros - assert_eq!(apply_modifiers("-22", "0", 5, "s", true).unwrap(), "-0022"); + assert_eq!( + apply_modifiers("-22", &spec("0", Some(5), "s")).unwrap(), + "-0022" + ); // Negative with space padding: spaces first, then sign - assert_eq!(apply_modifiers("-22", "_", 5, "s", true).unwrap(), " -22"); + assert_eq!( + apply_modifiers("-22", &spec("_", Some(5), "s")).unwrap(), + " -22" + ); // Force sign (_+): + is last, overrides _ → zero pad with sign - assert_eq!(apply_modifiers("5", "_+", 5, "s", true).unwrap(), "+0005"); + assert_eq!( + apply_modifiers("5", &spec("_+", Some(5), "s")).unwrap(), + "+0005" + ); // No-pad + uppercase: no padding applied assert_eq!( - apply_modifiers("june", "-^", 10, "B", true).unwrap(), + apply_modifiers("june", &spec("-^", Some(10), "B")).unwrap(), "JUNE" ); } @@ -665,32 +772,38 @@ mod tests { fn test_case_flag_precedence() { // Test that ^ (uppercase) overrides # (swap case) assert_eq!( - apply_modifiers("June", "^#", 0, "B", false).unwrap(), + apply_modifiers("June", &spec("^#", None, "B")).unwrap(), "JUNE" ); assert_eq!( - apply_modifiers("June", "#^", 0, "B", false).unwrap(), + apply_modifiers("June", &spec("#^", None, "B")).unwrap(), "JUNE" ); // Test # alone (swap case) - assert_eq!(apply_modifiers("June", "#", 0, "B", false).unwrap(), "JUNE"); - assert_eq!(apply_modifiers("JUNE", "#", 0, "B", false).unwrap(), "june"); + assert_eq!( + apply_modifiers("June", &spec("#", None, "B")).unwrap(), + "JUNE" + ); + assert_eq!( + apply_modifiers("JUNE", &spec("#", None, "B")).unwrap(), + "june" + ); } #[test] fn test_apply_modifiers_text_specifiers() { // Text specifiers default to space padding assert_eq!( - apply_modifiers("June", "", 10, "B", true).unwrap(), + apply_modifiers("June", &spec("", Some(10), "B")).unwrap(), " June" ); assert_eq!( - apply_modifiers("Mon", "", 10, "a", true).unwrap(), + apply_modifiers("Mon", &spec("", Some(10), "a")).unwrap(), " Mon" ); // Numeric specifiers default to zero padding assert_eq!( - apply_modifiers("6", "", 10, "m", true).unwrap(), + apply_modifiers("6", &spec("", Some(10), "m")).unwrap(), "0000000006" ); } @@ -698,37 +811,38 @@ mod tests { #[test] fn test_apply_modifiers_width_smaller_than_result() { // Width smaller than result strips default padding - assert_eq!(apply_modifiers("01", "", 1, "d", true).unwrap(), "1"); - assert_eq!(apply_modifiers("06", "", 1, "m", true).unwrap(), "6"); + assert_eq!(apply_modifiers("01", &spec("", Some(1), "d")).unwrap(), "1"); + assert_eq!(apply_modifiers("06", &spec("", Some(1), "m")).unwrap(), "6"); } #[test] fn test_apply_modifiers_parametrized() { let test_cases = vec![ - ("1", "0", 3, "Y", true, "001"), - ("1", "_", 3, "d", true, " 1"), - ("1", "-", 3, "d", true, "1"), // no-pad: width ignored - ("abc", "^", 5, "B", true, " ABC"), // text specifier: space pad - ("5", "+", 4, "s", true, "+005"), - ("5", "_+", 4, "s", true, "+005"), // + is last: zero pad with sign - ("-3", "0", 5, "s", true, "-0003"), - ("05", "_", 3, "d", true, " 5"), - ("09", "-", 4, "d", true, "9"), // no-pad: width ignored - ("1970", "_+", 6, "Y", true, "+01970"), // + is last: zero pad with sign + ("1", "0", Some(3), "Y", "001"), + ("1", "_", Some(3), "d", " 1"), + ("1", "-", Some(3), "d", "1"), // no-pad: width ignored + ("abc", "^", Some(5), "B", " ABC"), // text specifier: space pad + ("5", "+", Some(4), "s", "+005"), + ("5", "_+", Some(4), "s", "+005"), // + is last: zero pad with sign + ("-3", "0", Some(5), "s", "-0003"), + ("05", "_", Some(3), "d", " 5"), + ("09", "-", Some(4), "d", "9"), // no-pad: width ignored + ("1970", "_+", Some(6), "Y", "+01970"), // + is last: zero pad with sign ]; - for (value, flags, width, spec, explicit_width, expected) in test_cases { + for (value, flags, width, s, expected) in test_cases { + let p = spec(flags, width, s); assert_eq!( - apply_modifiers(value, flags, width, spec, explicit_width).unwrap(), + apply_modifiers(value, &p).unwrap(), expected, - "value='{value}', flags='{flags}', width={width}, spec='{spec}', explicit_width={explicit_width}", + "value='{value}', flags='{flags}', width={width:?}, spec='{s}'", ); } } #[test] fn test_apply_modifiers_width_too_large() { - let err = apply_modifiers("x", "", usize::MAX, "c", true).unwrap_err(); + let err = apply_modifiers("x", &spec("", Some(usize::MAX), "c")).unwrap_err(); assert!(matches!( err, FormatError::FieldWidthTooLarge { width, specifier } @@ -736,31 +850,54 @@ mod tests { )); } + #[test] + fn test_format_with_modifiers_width_overflows_usize() { + // A width literal that overflows `usize` must surface as + // `FieldWidthTooLarge` (via the downstream allocation check), + // not silently fall back to width 0. + let date = make_test_date(1999, 6, 1, 0); + let config = get_config(); + let huge = "9".repeat(40); + let format = format!("%{huge}Y"); + let err = format_with_modifiers(&date, &format, &config).unwrap_err(); + assert!(matches!( + err, + FormatError::FieldWidthTooLarge { width, specifier } + if width == usize::MAX && specifier == "Y" + )); + } + #[test] fn test_underscore_flag_without_width() { // %_m should pad month to default width 2 with spaces - assert_eq!(apply_modifiers("6", "_", 0, "m", false).unwrap(), " 6"); + assert_eq!(apply_modifiers("6", &spec("_", None, "m")).unwrap(), " 6"); // %_d should pad day to default width 2 with spaces - assert_eq!(apply_modifiers("1", "_", 0, "d", false).unwrap(), " 1"); + assert_eq!(apply_modifiers("1", &spec("_", None, "d")).unwrap(), " 1"); // %_H should pad hour to default width 2 with spaces - assert_eq!(apply_modifiers("5", "_", 0, "H", false).unwrap(), " 5"); + assert_eq!(apply_modifiers("5", &spec("_", None, "H")).unwrap(), " 5"); // %_Y should pad year to default width 4 with spaces - assert_eq!(apply_modifiers("1999", "_", 0, "Y", false).unwrap(), "1999"); + assert_eq!( + apply_modifiers("1999", &spec("_", None, "Y")).unwrap(), + "1999" + ); // already at default width } #[test] fn test_plus_flag_without_width() { // %+Y without width should NOT add sign for 4-digit year - assert_eq!(apply_modifiers("1999", "+", 0, "Y", false).unwrap(), "1999"); + assert_eq!( + apply_modifiers("1999", &spec("+", None, "Y")).unwrap(), + "1999" + ); // %+Y without width SHOULD add sign for year > 4 digits assert_eq!( - apply_modifiers("12345", "+", 0, "Y", false).unwrap(), + apply_modifiers("12345", &spec("+", None, "Y")).unwrap(), "+12345" ); // %+Y with explicit width should add sign assert_eq!( - apply_modifiers("1999", "+", 6, "Y", true).unwrap(), + apply_modifiers("1999", &spec("+", Some(6), "Y")).unwrap(), "+01999" ); } @@ -796,4 +933,96 @@ mod tests { "GNU: %_C should produce '19', not ' 19' (default width is 2, not 4)" ); } + + #[test] + fn test_parse_format_spec() { + // (input, expected: Some((flags, width, spec, len)) or None) + type ParsedTuple = (&'static str, Option, &'static str, usize); + let cases: &[(&str, Option)] = &[ + // ---- plain single-letter specifiers ---- + ("%Y", Some(("", None, "Y", 2))), + ("%a", Some(("", None, "a", 2))), + ("%B", Some(("", None, "B", 2))), + // ---- single flag, no width ---- + ("%-d", Some(("-", None, "d", 3))), + ("%_m", Some(("_", None, "m", 3))), + ("%0e", Some(("0", None, "e", 3))), + ("%^B", Some(("^", None, "B", 3))), + ("%#Z", Some(("#", None, "Z", 3))), + ("%+Y", Some(("+", None, "Y", 3))), + // ---- combined flags ---- + ("%_+Y", Some(("_+", None, "Y", 4))), + ("%-^B", Some(("-^", None, "B", 4))), + // ---- width only ---- + ("%10Y", Some(("", Some(10), "Y", 4))), + ("%4C", Some(("", Some(4), "C", 3))), + // `0` is a flag, then `5` is the width. + ("%05d", Some(("0", Some(5), "d", 4))), + // ---- flags + width ---- + ("%_10m", Some(("_", Some(10), "m", 5))), + ("%+6Y", Some(("+", Some(6), "Y", 4))), + ("%-5d", Some(("-", Some(5), "d", 4))), + ("%+4C", Some(("+", Some(4), "C", 4))), + // ---- colon-prefixed specifiers (numeric timezones) ---- + ("%:z", Some(("", None, ":z", 3))), + ("%::z", Some(("", None, "::z", 4))), + ("%:::z", Some(("", None, ":::z", 5))), + ("%-3:z", Some(("-", Some(3), ":z", 5))), + // ---- only the spec is consumed; trailing text is ignored ---- + ("%Y-%m-%d", Some(("", None, "Y", 2))), + ("%10Yhello", Some(("", Some(10), "Y", 4))), + // ---- invalid: should return None ---- + ("Y", None), + ("", None), + ("%", None), + ("%-", None), + ("%10", None), + ("%_+", None), + ("%:", None), + ("%::", None), + ("%%", None), // %% is not a spec — caller handles it. + ("%é", None), // non-ASCII letter + ]; + + for (input, expected) in cases { + let actual = parse_format_spec(input).map(|p| (p.flags, p.width, p.spec, p.len)); + assert_eq!(actual, *expected, "input = {input:?}"); + } + } + + #[test] + fn test_has_gnu_modifiers() { + // (input, expected) + let cases: &[(&str, bool)] = &[ + // ---- modifier present (flag and/or width) ---- + ("%10Y", true), + ("%^B", true), + ("%-d", true), + ("%_m", true), + ("%+Y", true), + ("today is %-d of %B", true), + ("%5:z", true), + // %% mixed with a real modifier is still detected. + ("%%%-d", true), + ("%%%10Y", true), + // ---- no modifier: plain specs only ---- + ("%Y-%m-%d", false), + ("%H:%M:%S", false), + ("%a %b %e %T %Z %Y", false), + ("", false), + ("no percent here", false), + // ---- %% literals must never count as modifiers ---- + ("%%", false), + ("100%% done", false), + ("%%Y", false), + ("%%10", false), + // ---- colon specs without flags/width are not modifiers ---- + ("%:z", false), + ("%::z", false), + ]; + + for (input, expected) in cases { + assert_eq!(has_gnu_modifiers(input), *expected, "input = {input:?}"); + } + } }