Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions src/uu/date/src/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use uucore::display::Quotable;
use uucore::error::FromIo;
use uucore::error::{UResult, USimpleError};
#[cfg(feature = "i18n-datetime")]
use uucore::i18n::datetime::{localize_format_string, should_use_icu_locale};
use uucore::i18n::datetime::{NamePadding, localize_format_string, should_use_icu_locale};
use uucore::translate;
use uucore::{format_usage, show};
#[cfg(windows)]
Expand Down Expand Up @@ -715,7 +715,7 @@ fn format_date_with_locale_aware_months(
// rest of the function without a dangling reference.
#[cfg(feature = "i18n-datetime")]
let localized: Option<String> = (!skip_localization && should_use_icu_locale())
.then(|| localize_format_string(format_string, date.date()));
.then(|| localize_format_string(format_string, date.date(), NamePadding::Raw));
#[cfg(feature = "i18n-datetime")]
let fmt: &str = localized.as_deref().unwrap_or(format_string);
#[cfg(not(feature = "i18n-datetime"))]
Expand Down
1 change: 1 addition & 0 deletions src/uu/ls/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ uucore = { workspace = true, features = [
"fs",
"fsext",
"fsxattr",
"i18n-datetime",
"parser-size",
"parser-glob",
"quoting-style",
Expand Down
10 changes: 8 additions & 2 deletions src/uu/ls/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ use uucore::{
os_str_as_bytes_lossy,
quoting_style::{QuotingStyle, locale_aware_escape_dir_name, locale_aware_escape_name},
show,
time::{FormatSystemTimeFallback, format_system_time},
time::{FormatSystemTimeFallback, NamePadding, format_system_time_locale_aware},
};

use crate::colors::{StyleManager, color_name};
Expand Down Expand Up @@ -620,7 +620,13 @@ fn display_date(
_ => &config.time_format_recent,
};

format_system_time(out, time, fmt, FormatSystemTimeFallback::Integer)
format_system_time_locale_aware(
out,
time,
fmt,
FormatSystemTimeFallback::Integer,
NamePadding::Padded,
)
}

fn display_len_or_rdev(metadata: &Metadata, config: &Config) -> SizeOrDeviceId {
Expand Down
2 changes: 2 additions & 0 deletions src/uucore/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ icu_decimal = { workspace = true, optional = true, features = [
icu_locale = { workspace = true, optional = true, features = ["compiled_data"] }
icu_provider = { workspace = true, optional = true }
jiff-icu = { workspace = true, optional = true }
unicode-width = { workspace = true, optional = true }

# Fluent dependencies (always available for localization)
fluent = { workspace = true }
Expand Down Expand Up @@ -164,6 +165,7 @@ i18n-datetime = [
"icu_datetime",
"jiff-icu",
"jiff",
"unicode-width",
]
mode = ["libc"]
perms = ["entries", "libc", "walkdir"]
Expand Down
193 changes: 172 additions & 21 deletions src/uucore/src/lib/features/i18n/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

// spell-checker:ignore fieldsets prefs febr abmon langinfo uppercased
// spell-checker:ignore fieldsets prefs febr abmon langinfo uppercased wcswidth alef

//! Locale-aware datetime formatting utilities using ICU and jiff-icu

Expand All @@ -17,6 +17,14 @@ use jiff_icu::ConvertFrom;
use std::sync::OnceLock;

use crate::i18n::get_locale_from_env;
/// Controls whether locale name lookups return raw or padded names.
#[derive(Clone, Copy)]
pub enum NamePadding {
/// Raw names with no trailing padding — for `date` and similar utilities.
Raw,
/// Names padded to uniform display width — for columnar output like `ls`.
Padded,
}

/// Get the locale for time/date formatting from LC_TIME environment variable
pub fn get_time_locale() -> &'static (Locale, super::UEncoding) {
Expand Down Expand Up @@ -67,8 +75,135 @@ pub enum CalendarType {
Ethiopian,
}

/// Transform a strftime format string to use locale-specific calendar values
pub fn localize_format_string(format: &str, date: JiffDate) -> String {
/// Sum per-character Unicode display widths.
///
/// We intentionally avoid `UnicodeWidthStr::width` because its string-level
/// API applies Arabic lam-alef ligature detection (ل+أ → 1 cell) which
/// glibc's `wcswidth` does not. GNU ls pads via `wcswidth`, so we must
/// match that behavior.
fn display_width(s: &str) -> usize {
use unicode_width::UnicodeWidthChar;
s.chars()
.map(|c| UnicodeWidthChar::width(c).unwrap_or(0))
.sum()
}

/// Pad every entry in `names` with trailing spaces so all entries share the
/// same Unicode display width (the maximum across the array). This mirrors
/// GNU ls's `abmon_len` / weekday alignment logic.
fn pad_names<const N: usize>(names: [String; N]) -> [String; N] {
let widths: [usize; N] = std::array::from_fn(|i| display_width(&names[i]));
let max = widths.iter().copied().max().unwrap_or(0);
if max == 0 || widths.iter().all(|&w| w == max) {
return names;
}
let mut i = 0;
names.map(|s| {
let cur = widths[i];
i += 1;
if cur >= max {
s
} else {
format!("{s}{:width$}", "", width = max - cur)
}
})
}

/// Cached locale name arrays, computed once per process. Each variant is
/// `None` when the ICU formatter for that field width cannot be created
/// (should only happen for truly broken locale data).
///
/// Both raw and padded variants are stored: `date` needs raw names (no
/// trailing spaces) while `ls` needs padded names for column alignment.
struct CachedLocaleNames {
/// `%B` — full month names, raw
month_long: Option<[String; 12]>,
/// `%B` — full month names, padded to uniform display width
month_long_padded: Option<[String; 12]>,
/// `%b` / `%h` — abbreviated month names (trailing dots stripped), raw
month_abbrev: Option<[String; 12]>,
/// `%b` / `%h` — abbreviated month names, padded
month_abbrev_padded: Option<[String; 12]>,
/// `%A` — full weekday names, raw
weekday_long: Option<[String; 7]>,
/// `%A` — full weekday names, padded
weekday_long_padded: Option<[String; 7]>,
/// `%a` — abbreviated weekday names, raw
weekday_short: Option<[String; 7]>,
/// `%a` — abbreviated weekday names, padded
weekday_short_padded: Option<[String; 7]>,
}

/// Return the cached, pre-padded locale names (computed once per process).
///
/// Like [`get_time_locale`], the result is frozen at first access.
/// If `LC_TIME` changes after that point the cached names will be stale.
/// This is acceptable: each coreutils invocation is a fresh process.
fn get_cached_locale_names() -> &'static CachedLocaleNames {
static CACHE: OnceLock<CachedLocaleNames> = OnceLock::new();
CACHE.get_or_init(|| {
let (locale, _) = get_time_locale();
let locale_prefs: icu_datetime::DateTimeFormatterPreferences = locale.clone().into();

// Hardcoded dates that are guaranteed valid — month 1..=12 day 1,
// and day 1..=7 of January 2001. Any failure is a bug, not a
// recoverable condition.
let month_dates: [Date<Iso>; 12] = std::array::from_fn(|i| {
Date::<Iso>::try_new_iso(2001, (i + 1) as u8, 1)
.expect("month 1..=12 day 1 is always valid")
});
// Jan 1 2001 is a Monday, so Jan 1..=7 yields Mon(0)..Sun(6)
// when indexed via `to_monday_zero_offset()`.
let weekday_dates: [Date<Iso>; 7] = std::array::from_fn(|i| {
Date::<Iso>::try_new_iso(2001, 1, (i + 1) as u8).expect("Jan 1..=7 is always valid")
});

let month_long = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long())
.ok()
.map(|f| month_dates.each_ref().map(|d| f.format(d).to_string()));
let month_long_padded = month_long.clone().map(pad_names);

// ICU's medium format may include trailing periods (e.g., "febr."
// for Hungarian). The standard C/POSIX locale via nl_langinfo
// returns abbreviations WITHOUT trailing periods, so we strip them.
let month_abbrev = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium())
.ok()
.map(|f| {
month_dates
.each_ref()
.map(|d| f.format(d).to_string().trim_end_matches('.').to_string())
});
let month_abbrev_padded = month_abbrev.clone().map(pad_names);

let weekday_long = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long())
.ok()
.map(|f| weekday_dates.each_ref().map(|d| f.format(d).to_string()));
let weekday_long_padded = weekday_long.clone().map(pad_names);

let weekday_short = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short())
.ok()
.map(|f| weekday_dates.each_ref().map(|d| f.format(d).to_string()));
let weekday_short_padded = weekday_short.clone().map(pad_names);

CachedLocaleNames {
month_long,
month_long_padded,
month_abbrev,
month_abbrev_padded,
weekday_long,
weekday_long_padded,
weekday_short,
weekday_short_padded,
}
})
}

/// Transform a strftime format string to use locale-specific calendar values.
///
/// When `padding` is [`NamePadding::Padded`], month and weekday names are
/// padded to uniform display width (for columnar output like `ls`). When
/// [`NamePadding::Raw`], raw names are used (for `date` and similar utilities).
pub fn localize_format_string(format: &str, date: JiffDate, padding: NamePadding) -> String {
const PERCENT_PLACEHOLDER: &str = "\x00\x00";

let (locale, _) = get_time_locale();
Expand Down Expand Up @@ -113,36 +248,52 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String {
.replace("%e", &format!("{cal_day:2}"));
}

// Format localized names using ICU DateTimeFormatter
let locale_prefs = locale.clone().into();
// Look up locale names from the once-per-process cache.
let pad = matches!(padding, NamePadding::Padded);
let cached = get_cached_locale_names();
let month_idx = date.month() as usize - 1;
let weekday_idx = date.weekday().to_monday_zero_offset() as usize;

if fmt.contains("%B") {
if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) {
fmt = fmt.replace("%B", &f.format(&iso_date).to_string());
let src = if pad {
&cached.month_long_padded
} else {
&cached.month_long
};
if let Some(names) = src {
fmt = fmt.replace("%B", &names[month_idx]);
}
}
if fmt.contains("%b") || fmt.contains("%h") {
if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) {
// ICU's medium format may include trailing periods (e.g., "febr." for Hungarian),
// which when combined with locale format strings that also add periods after
// %b (e.g., "%Y. %b. %d") results in double periods ("febr..").
// The standard C/POSIX locale via nl_langinfo returns abbreviations
// WITHOUT trailing periods, so we strip them here for consistency.
let month_abbrev = f.format(&iso_date).to_string();
let month_abbrev = month_abbrev.trim_end_matches('.').to_string();
let src = if pad {
&cached.month_abbrev_padded
} else {
&cached.month_abbrev
};
if let Some(names) = src {
fmt = fmt
.replace("%b", &month_abbrev)
.replace("%h", &month_abbrev);
.replace("%b", &names[month_idx])
.replace("%h", &names[month_idx]);
}
}
if fmt.contains("%A") {
if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) {
fmt = fmt.replace("%A", &f.format(&iso_date).to_string());
let src = if pad {
&cached.weekday_long_padded
} else {
&cached.weekday_long
};
if let Some(names) = src {
fmt = fmt.replace("%A", &names[weekday_idx]);
}
}
if fmt.contains("%a") {
if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) {
fmt = fmt.replace("%a", &f.format(&iso_date).to_string());
let src = if pad {
&cached.weekday_short_padded
} else {
&cached.weekday_short
};
if let Some(names) = src {
fmt = fmt.replace("%a", &names[weekday_idx]);
}
}

Expand Down
Loading
Loading