|
3 | 3 | // For the full copyright and license information, please view the LICENSE |
4 | 4 | // file that was distributed with this source code. |
5 | 5 |
|
6 | | -// spell-checker:ignore strtime ; (format) DATEFILE MMDDhhmm ; (vars) datetime datetimes getres AWST ACST AEST |
| 6 | +// spell-checker:ignore strtime ; (format) DATEFILE MMDDhhmm ; (vars) datetime datetimes getres AWST ACST AEST foobarbaz |
7 | 7 |
|
8 | 8 | mod locale; |
9 | 9 |
|
10 | 10 | use clap::{Arg, ArgAction, Command}; |
11 | 11 | use jiff::fmt::strtime::{self, BrokenDownTime, Config, PosixCustom}; |
12 | 12 | use jiff::tz::{TimeZone, TimeZoneDatabase}; |
13 | 13 | use jiff::{Timestamp, Zoned}; |
| 14 | +use std::borrow::Cow; |
14 | 15 | use std::collections::HashMap; |
15 | 16 | use std::fs::File; |
16 | 17 | use std::io::{BufRead, BufReader, BufWriter, Write}; |
@@ -130,6 +131,42 @@ enum DayDelta { |
130 | 131 | Next, |
131 | 132 | } |
132 | 133 |
|
| 134 | +/// Strip parenthesized comments from a date string. |
| 135 | +/// |
| 136 | +/// GNU date removes balanced parentheses and their content, treating them as comments. |
| 137 | +/// If parentheses are unbalanced, everything from the unmatched '(' onwards is ignored. |
| 138 | +/// |
| 139 | +/// Examples: |
| 140 | +/// - "2026(comment)-01-05" -> "2026-01-05" |
| 141 | +/// - "1(ignore comment to eol" -> "1" |
| 142 | +/// - "(" -> "" |
| 143 | +/// - "((foo)2026-01-05)" -> "" |
| 144 | +fn strip_parenthesized_comments(input: &str) -> Cow<'_, str> { |
| 145 | + if !input.contains('(') { |
| 146 | + return Cow::Borrowed(input); |
| 147 | + } |
| 148 | + |
| 149 | + let mut result = String::with_capacity(input.len()); |
| 150 | + let mut depth = 0; |
| 151 | + |
| 152 | + for c in input.chars() { |
| 153 | + match c { |
| 154 | + '(' => { |
| 155 | + depth += 1; |
| 156 | + } |
| 157 | + ')' if depth > 0 => { |
| 158 | + depth -= 1; |
| 159 | + } |
| 160 | + _ if depth == 0 => { |
| 161 | + result.push(c); |
| 162 | + } |
| 163 | + _ => {} |
| 164 | + } |
| 165 | + } |
| 166 | + |
| 167 | + Cow::Owned(result) |
| 168 | +} |
| 169 | + |
133 | 170 | /// Parse military timezone with optional hour offset. |
134 | 171 | /// Pattern: single letter (a-z except j) optionally followed by 1-2 digits. |
135 | 172 | /// Returns Some(total_hours_in_utc) or None if pattern doesn't match. |
@@ -286,7 +323,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { |
286 | 323 | // Iterate over all dates - whether it's a single date or a file. |
287 | 324 | let dates: Box<dyn Iterator<Item = _>> = match settings.date_source { |
288 | 325 | DateSource::Human(ref input) => { |
| 326 | + // GNU compatibility (Comments in parentheses) |
| 327 | + let input = strip_parenthesized_comments(input); |
289 | 328 | let input = input.trim(); |
| 329 | + |
290 | 330 | // GNU compatibility (Empty string): |
291 | 331 | // An empty string (or whitespace-only) should be treated as midnight today. |
292 | 332 | let is_empty_or_whitespace = input.is_empty(); |
@@ -887,4 +927,38 @@ mod tests { |
887 | 927 | assert_eq!(parse_military_timezone_with_offset("m999"), None); // Too long |
888 | 928 | assert_eq!(parse_military_timezone_with_offset("9m"), None); // Starts with digit |
889 | 929 | } |
| 930 | + |
| 931 | + #[test] |
| 932 | + fn test_strip_parenthesized_comments() { |
| 933 | + assert_eq!(strip_parenthesized_comments("hello"), "hello"); |
| 934 | + assert_eq!(strip_parenthesized_comments("2026-01-05"), "2026-01-05"); |
| 935 | + assert_eq!(strip_parenthesized_comments("("), ""); |
| 936 | + assert_eq!(strip_parenthesized_comments("1(comment"), "1"); |
| 937 | + assert_eq!( |
| 938 | + strip_parenthesized_comments("2026-01-05(this is a comment"), |
| 939 | + "2026-01-05" |
| 940 | + ); |
| 941 | + assert_eq!( |
| 942 | + strip_parenthesized_comments("2026(comment)-01-05"), |
| 943 | + "2026-01-05" |
| 944 | + ); |
| 945 | + assert_eq!(strip_parenthesized_comments("()"), ""); |
| 946 | + assert_eq!(strip_parenthesized_comments("((foo)2026-01-05)"), ""); |
| 947 | + |
| 948 | + // These cases test the balanced parentheses removal feature |
| 949 | + // which extends beyond what GNU date strictly supports |
| 950 | + assert_eq!(strip_parenthesized_comments("a(b)c"), "ac"); |
| 951 | + assert_eq!(strip_parenthesized_comments("a(b)c(d)e"), "ace"); |
| 952 | + assert_eq!(strip_parenthesized_comments("(a)(b)"), ""); |
| 953 | + |
| 954 | + // When parentheses are unmatched, processing stops at the unmatched opening paren |
| 955 | + // In this case "a(b)c(d", the (b) is balanced but (d is unmatched |
| 956 | + // We process "a(b)c" and stop at the unmatched "(d" |
| 957 | + assert_eq!(strip_parenthesized_comments("a(b)c(d"), "ac"); |
| 958 | + |
| 959 | + // Additional edge cases for nested and complex parentheses |
| 960 | + assert_eq!(strip_parenthesized_comments("a(b(c)d)e"), "ae"); // Nested balanced |
| 961 | + assert_eq!(strip_parenthesized_comments("a(b(c)d"), "a"); // Nested unbalanced |
| 962 | + assert_eq!(strip_parenthesized_comments("a(b)c(d)e(f"), "ace"); // Multiple groups, last unmatched |
| 963 | + } |
890 | 964 | } |
0 commit comments