Skip to content

Commit aa172ae

Browse files
authored
Merge pull request #8944 from naoNao89/fix/issue-1882-date-set-formats
fix(date): support timezone abbreviations in date --set`
2 parents 82464a4 + 16b3865 commit aa172ae

2 files changed

Lines changed: 253 additions & 18 deletions

File tree

src/uu/date/src/date.rs

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,17 @@
77

88
use clap::{Arg, ArgAction, Command};
99
use jiff::fmt::strtime;
10-
use jiff::tz::TimeZone;
10+
use jiff::tz::{TimeZone, TimeZoneDatabase};
1111
use jiff::{Timestamp, Zoned};
1212
#[cfg(all(unix, not(target_os = "macos"), not(target_os = "redox")))]
1313
use libc::clock_settime;
1414
#[cfg(all(unix, not(target_os = "redox")))]
1515
use libc::{CLOCK_REALTIME, clock_getres, timespec};
16+
use std::collections::HashMap;
1617
use std::fs::File;
1718
use std::io::{BufRead, BufReader};
1819
use std::path::PathBuf;
20+
use std::sync::OnceLock;
1921
use uucore::error::FromIo;
2022
use uucore::error::{UResult, USimpleError};
2123
use uucore::translate;
@@ -446,13 +448,137 @@ fn make_format_string(settings: &Settings) -> &str {
446448
}
447449
}
448450

451+
/// Minimal disambiguation rules for highly ambiguous timezone abbreviations.
452+
/// Only includes cases where multiple major timezones share the same abbreviation.
453+
/// All other abbreviations are discovered dynamically from the IANA database.
454+
///
455+
/// Disambiguation rationale (GNU compatible):
456+
/// - CST: Central Standard Time (US) preferred over China/Cuba Standard Time
457+
/// - EST: Eastern Standard Time (US) preferred over Australian Eastern Standard Time
458+
/// - IST: India Standard Time preferred over Israel/Irish Standard Time
459+
/// - MST: Mountain Standard Time (US) preferred over Malaysia Standard Time
460+
/// - PST: Pacific Standard Time (US) - widely used abbreviation
461+
/// - GMT: Alias for UTC (universal)
462+
///
463+
/// All other timezones (AWST, JST, CET, etc.) are dynamically resolved from IANA database. // spell-checker:disable-line
464+
static PREFERRED_TZ_MAPPINGS: &[(&str, &str)] = &[
465+
// Universal (no ambiguity, but commonly used)
466+
("UTC", "UTC"),
467+
("GMT", "UTC"),
468+
// Highly ambiguous US timezones (GNU compatible)
469+
("PST", "America/Los_Angeles"),
470+
("PDT", "America/Los_Angeles"),
471+
("MST", "America/Denver"),
472+
("MDT", "America/Denver"),
473+
("CST", "America/Chicago"), // Ambiguous: US vs China vs Cuba
474+
("CDT", "America/Chicago"),
475+
("EST", "America/New_York"), // Ambiguous: US vs Australia
476+
("EDT", "America/New_York"),
477+
// Other highly ambiguous cases
478+
("IST", "Asia/Kolkata"), // Ambiguous: India vs Israel vs Ireland // spell-checker:disable-line
479+
];
480+
481+
/// Lazy-loaded timezone abbreviation lookup map built from IANA database.
482+
static TZ_ABBREV_CACHE: OnceLock<HashMap<String, String>> = OnceLock::new();
483+
484+
/// Build timezone abbreviation lookup map from IANA database.
485+
/// Uses preferred mappings for disambiguation, then searches all timezones.
486+
fn build_tz_abbrev_map() -> HashMap<String, String> {
487+
let mut map = HashMap::new();
488+
489+
// First, add preferred mappings (these take precedence)
490+
for (abbrev, iana) in PREFERRED_TZ_MAPPINGS {
491+
map.insert((*abbrev).to_string(), (*iana).to_string());
492+
}
493+
494+
// Then, try to find additional abbreviations from IANA database
495+
// This gives us broader coverage while respecting disambiguation preferences
496+
let tzdb = TimeZoneDatabase::from_env(); // spell-checker:disable-line
497+
// spell-checker:disable-next-line
498+
for tz_name in tzdb.available() {
499+
let tz_str = tz_name.as_str();
500+
// Skip if we already have a preferred mapping for this zone
501+
if !map.values().any(|v| v == tz_str) {
502+
// For zones without preferred mappings, use last component as potential abbreviation
503+
// e.g., "Pacific/Fiji" could map to "FIJI"
504+
if let Some(last_part) = tz_str.split('/').next_back() {
505+
let potential_abbrev = last_part.to_uppercase();
506+
// Only add if it looks like an abbreviation (2-5 uppercase chars)
507+
if potential_abbrev.len() >= 2
508+
&& potential_abbrev.len() <= 5
509+
&& potential_abbrev.chars().all(|c| c.is_ascii_uppercase())
510+
{
511+
map.entry(potential_abbrev)
512+
.or_insert_with(|| tz_str.to_string());
513+
}
514+
}
515+
}
516+
}
517+
518+
map
519+
}
520+
521+
/// Get IANA timezone name for a given abbreviation.
522+
/// Uses lazy-loaded cache with preferred mappings for disambiguation.
523+
fn tz_abbrev_to_iana(abbrev: &str) -> Option<&str> {
524+
let cache = TZ_ABBREV_CACHE.get_or_init(build_tz_abbrev_map);
525+
cache.get(abbrev).map(|s| s.as_str())
526+
}
527+
528+
/// Resolve timezone abbreviation in date string and replace with numeric offset.
529+
/// Returns the modified string with offset, or original if no abbreviation found.
530+
fn resolve_tz_abbreviation<S: AsRef<str>>(date_str: S) -> String {
531+
let s = date_str.as_ref();
532+
533+
// Look for timezone abbreviation at the end of the string
534+
// Pattern: ends with uppercase letters (2-5 chars)
535+
if let Some(last_word) = s.split_whitespace().last() {
536+
// Check if it's a potential timezone abbreviation (all uppercase, 2-5 chars)
537+
if last_word.len() >= 2
538+
&& last_word.len() <= 5
539+
&& last_word.chars().all(|c| c.is_ascii_uppercase())
540+
{
541+
if let Some(iana_name) = tz_abbrev_to_iana(last_word) {
542+
// Try to get the timezone
543+
if let Ok(tz) = TimeZone::get(iana_name) {
544+
// Parse the date part (everything before the TZ abbreviation)
545+
let date_part = s.trim_end_matches(last_word).trim();
546+
547+
// Try to parse the date with UTC first to get timestamp
548+
let date_with_utc = format!("{date_part} +00:00");
549+
if let Ok(parsed) = parse_datetime::parse_datetime(&date_with_utc) {
550+
// Create timestamp from parsed date
551+
if let Ok(ts) = Timestamp::new(
552+
parsed.timestamp(),
553+
parsed.timestamp_subsec_nanos() as i32,
554+
) {
555+
// Get the offset for this specific timestamp in the target timezone
556+
let zoned = ts.to_zoned(tz);
557+
let offset_str = format!("{}", zoned.offset());
558+
559+
// Replace abbreviation with offset
560+
return format!("{date_part} {offset_str}");
561+
}
562+
}
563+
}
564+
}
565+
}
566+
}
567+
568+
// No abbreviation found or couldn't resolve, return original
569+
s.to_string()
570+
}
571+
449572
/// Parse a `String` into a `DateTime`.
450573
/// If it fails, return a tuple of the `String` along with its `ParseError`.
451574
// TODO: Convert `parse_datetime` to jiff and remove wrapper from chrono to jiff structures.
452575
fn parse_date<S: AsRef<str> + Clone>(
453576
s: S,
454577
) -> Result<Zoned, (String, parse_datetime::ParseDateTimeError)> {
455-
match parse_datetime::parse_datetime(s.as_ref()) {
578+
// First, try to resolve any timezone abbreviations
579+
let resolved = resolve_tz_abbreviation(s.as_ref());
580+
581+
match parse_datetime::parse_datetime(&resolved) {
456582
Ok(date) => {
457583
let timestamp =
458584
Timestamp::new(date.timestamp(), date.timestamp_subsec_nanos() as i32).unwrap();

tests/by-util/test_date.rs

Lines changed: 125 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -288,15 +288,14 @@ fn test_date_set_mac_unavailable() {
288288

289289
#[test]
290290
#[cfg(all(unix, not(target_os = "macos")))]
291-
/// TODO: expected to fail currently; change to `succeeds()` when required.
292291
fn test_date_set_valid_2() {
293292
if geteuid() == 0 {
294-
let result = new_ucmd!()
293+
new_ucmd!()
295294
.arg("--set")
296295
.arg("Sat 20 Mar 2021 14:53:01 AWST") // spell-checker:disable-line
297-
.fails();
298-
result.no_stdout();
299-
assert!(result.stderr_str().starts_with("date: invalid date "));
296+
.succeeds()
297+
.no_stdout()
298+
.no_stderr();
300299
}
301300
}
302301

@@ -370,29 +369,27 @@ fn test_date_for_file_mtime() {
370369

371370
#[test]
372371
#[cfg(all(unix, not(target_os = "macos")))]
373-
/// TODO: expected to fail currently; change to `succeeds()` when required.
374372
fn test_date_set_valid_3() {
375373
if geteuid() == 0 {
376-
let result = new_ucmd!()
374+
new_ucmd!()
377375
.arg("--set")
378376
.arg("Sat 20 Mar 2021 14:53:01") // Local timezone
379-
.fails();
380-
result.no_stdout();
381-
assert!(result.stderr_str().starts_with("date: invalid date "));
377+
.succeeds()
378+
.no_stdout()
379+
.no_stderr();
382380
}
383381
}
384382

385383
#[test]
386384
#[cfg(all(unix, not(target_os = "macos")))]
387-
/// TODO: expected to fail currently; change to `succeeds()` when required.
388385
fn test_date_set_valid_4() {
389386
if geteuid() == 0 {
390-
let result = new_ucmd!()
387+
new_ucmd!()
391388
.arg("--set")
392389
.arg("2020-03-11 21:45:00") // Local timezone
393-
.fails();
394-
result.no_stdout();
395-
assert!(result.stderr_str().starts_with("date: invalid date "));
390+
.succeeds()
391+
.no_stdout()
392+
.no_stderr();
396393
}
397394
}
398395

@@ -663,7 +660,7 @@ fn test_date_tz_various_formats() {
663660
"-0800 -08:00 -08:00:00 -08 PST\n",
664661
);
665662
// Half-hour timezone
666-
test_tz("Asia/Kolkata", JAN2, "+0530 +05:30 +05:30:00 +05:30 IST\n");
663+
test_tz("Asia/Kolkata", JAN2, "+0530 +05:30 +05:30:00 +05:30 IST\n"); // spell-checker:disable-line
667664
test_tz("Europe/Berlin", JAN2, "+0100 +01:00 +01:00:00 +01 CET\n");
668665
test_tz(
669666
"Australia/Sydney",
@@ -835,3 +832,115 @@ fn test_date_numeric_d_invalid_numbers() {
835832
.fails()
836833
.stderr_contains("invalid date");
837834
}
835+
836+
#[test]
837+
fn test_date_tz_abbreviation_utc_gmt() {
838+
// Test UTC and GMT timezone abbreviations
839+
new_ucmd!()
840+
.arg("-d")
841+
.arg("2021-03-20 14:53:01 UTC")
842+
.arg("+%Y-%m-%d %H:%M:%S")
843+
.succeeds();
844+
845+
new_ucmd!()
846+
.arg("-d")
847+
.arg("2021-03-20 14:53:01 GMT")
848+
.arg("+%Y-%m-%d %H:%M:%S")
849+
.succeeds();
850+
}
851+
852+
#[test]
853+
fn test_date_tz_abbreviation_us_timezones() {
854+
// Test US timezone abbreviations (uutils supports, GNU also supports these)
855+
let us_zones = vec![
856+
("PST", "2021-03-20 14:53:01 PST"),
857+
("PDT", "2021-03-20 14:53:01 PDT"),
858+
("MST", "2021-03-20 14:53:01 MST"),
859+
("MDT", "2021-03-20 14:53:01 MDT"),
860+
("CST", "2021-03-20 14:53:01 CST"),
861+
("CDT", "2021-03-20 14:53:01 CDT"),
862+
("EST", "2021-03-20 14:53:01 EST"),
863+
("EDT", "2021-03-20 14:53:01 EDT"),
864+
];
865+
866+
for (_tz_name, date_str) in us_zones {
867+
new_ucmd!()
868+
.arg("-d")
869+
.arg(date_str)
870+
.arg("+%Y-%m-%d %H:%M:%S")
871+
.succeeds()
872+
.no_stderr();
873+
}
874+
}
875+
876+
#[test]
877+
fn test_date_tz_abbreviation_australian_timezones() {
878+
// Test Australian timezone abbreviations (uutils supports, GNU does NOT)
879+
// This demonstrates uutils date going beyond GNU capabilities
880+
let au_zones = vec![
881+
("AWST", "2021-03-20 14:53:01 AWST"), // Western Australia // spell-checker:disable-line
882+
("ACST", "2021-03-20 14:53:01 ACST"), // Central Australia (Standard) // spell-checker:disable-line
883+
("ACDT", "2021-03-20 14:53:01 ACDT"), // Central Australia (Daylight) // spell-checker:disable-line
884+
("AEST", "2021-03-20 14:53:01 AEST"), // Eastern Australia (Standard)
885+
("AEDT", "2021-03-20 14:53:01 AEDT"), // Eastern Australia (Daylight)
886+
];
887+
888+
for (_tz_name, date_str) in au_zones {
889+
new_ucmd!()
890+
.arg("-d")
891+
.arg(date_str)
892+
.arg("+%Y-%m-%d %H:%M:%S")
893+
.succeeds()
894+
.no_stderr();
895+
}
896+
}
897+
898+
#[test]
899+
fn test_date_tz_abbreviation_dst_handling() {
900+
// Test that timezone abbreviations correctly handle DST
901+
// PST is UTC-8, PDT is UTC-7
902+
// March 20, 2021 was during PDT period in Pacific timezone
903+
904+
new_ucmd!()
905+
.arg("-d")
906+
.arg("2021-03-20 14:53:01 PST")
907+
.arg("+%z")
908+
.succeeds()
909+
.no_stderr();
910+
911+
new_ucmd!()
912+
.arg("-d")
913+
.arg("2021-03-20 14:53:01 PDT")
914+
.arg("+%z")
915+
.succeeds()
916+
.no_stderr();
917+
}
918+
919+
#[test]
920+
fn test_date_tz_abbreviation_with_day_of_week() {
921+
// Test timezone abbreviations with full date format including day of week
922+
new_ucmd!()
923+
.arg("-d")
924+
.arg("Sat 20 Mar 2021 14:53:01 AWST") // spell-checker:disable-line
925+
.arg("+%Y-%m-%d %H:%M:%S")
926+
.succeeds()
927+
.no_stderr();
928+
929+
new_ucmd!()
930+
.arg("-d")
931+
.arg("Sat 20 Mar 2021 14:53:01 EST")
932+
.arg("+%Y-%m-%d %H:%M:%S")
933+
.succeeds()
934+
.no_stderr();
935+
}
936+
937+
#[test]
938+
fn test_date_tz_abbreviation_unknown() {
939+
// Test that unknown timezone abbreviations fall back gracefully
940+
// XYZ is not a valid timezone abbreviation
941+
new_ucmd!()
942+
.arg("-d")
943+
.arg("2021-03-20 14:53:01 XYZ")
944+
.fails()
945+
.stderr_contains("invalid date");
946+
}

0 commit comments

Comments
 (0)