diff --git a/Cargo.lock b/Cargo.lock index d181f4d914e..802f227da41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4540,6 +4540,7 @@ dependencies = [ "thiserror 2.0.18", "time", "unic-langid", + "unicode-width 0.2.2", "unit-prefix", "utmp-classic", "uucore_procs", diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 40dad413f52..f8a58914de2 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -1422,9 +1422,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] name = "portable-atomic" @@ -2143,6 +2143,7 @@ dependencies = [ "sm3", "thiserror", "unic-langid", + "unicode-width", "unit-prefix", "uucore_procs", "wild", diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index ab7cbf680c6..8d3c46a2bd4 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -22,7 +22,7 @@ use uucore::display::Quotable; use uucore::error::FromIo; use uucore::error::{UResult, USimpleError}; #[cfg(feature = "i18n-datetime")] -use uucore::i18n::datetime::{localize_format_string, should_use_icu_locale}; +use uucore::i18n::datetime::{NamePadding, localize_format_string, should_use_icu_locale}; use uucore::translate; use uucore::{format_usage, show}; #[cfg(windows)] @@ -715,7 +715,7 @@ fn format_date_with_locale_aware_months( // rest of the function without a dangling reference. #[cfg(feature = "i18n-datetime")] let localized: Option = (!skip_localization && should_use_icu_locale()) - .then(|| localize_format_string(format_string, date.date())); + .then(|| localize_format_string(format_string, date.date(), NamePadding::Raw)); #[cfg(feature = "i18n-datetime")] let fmt: &str = localized.as_deref().unwrap_or(format_string); #[cfg(not(feature = "i18n-datetime"))] diff --git a/src/uu/ls/Cargo.toml b/src/uu/ls/Cargo.toml index b8688819ef8..6bbdc31f74f 100644 --- a/src/uu/ls/Cargo.toml +++ b/src/uu/ls/Cargo.toml @@ -36,6 +36,7 @@ uucore = { workspace = true, features = [ "fs", "fsext", "fsxattr", + "i18n-datetime", "parser-size", "parser-glob", "quoting-style", diff --git a/src/uu/ls/src/display.rs b/src/uu/ls/src/display.rs index d1c35e45c7a..a59b2e2a38a 100644 --- a/src/uu/ls/src/display.rs +++ b/src/uu/ls/src/display.rs @@ -56,7 +56,7 @@ use uucore::{ os_str_as_bytes_lossy, quoting_style::{QuotingStyle, locale_aware_escape_dir_name, locale_aware_escape_name}, show, - time::{FormatSystemTimeFallback, format_system_time}, + time::{FormatSystemTimeFallback, NamePadding, format_system_time_locale_aware}, }; use crate::colors::{StyleManager, color_name}; @@ -620,7 +620,13 @@ fn display_date( _ => &config.time_format_recent, }; - format_system_time(out, time, fmt, FormatSystemTimeFallback::Integer) + format_system_time_locale_aware( + out, + time, + fmt, + FormatSystemTimeFallback::Integer, + NamePadding::Padded, + ) } fn display_len_or_rdev(metadata: &Metadata, config: &Config) -> SizeOrDeviceId { diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index bd4acb8b2fa..73367e4a0d4 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -86,6 +86,7 @@ icu_decimal = { workspace = true, optional = true, features = [ icu_locale = { workspace = true, optional = true, features = ["compiled_data"] } icu_provider = { workspace = true, optional = true } jiff-icu = { workspace = true, optional = true } +unicode-width = { workspace = true, optional = true } # Fluent dependencies (always available for localization) fluent = { workspace = true } @@ -164,6 +165,7 @@ i18n-datetime = [ "icu_datetime", "jiff-icu", "jiff", + "unicode-width", ] mode = ["libc"] perms = ["entries", "libc", "walkdir"] diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs index ce52013605f..af524ea9e50 100644 --- a/src/uucore/src/lib/features/i18n/datetime.rs +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fieldsets prefs febr abmon langinfo uppercased +// spell-checker:ignore fieldsets prefs febr abmon langinfo uppercased wcswidth alef //! Locale-aware datetime formatting utilities using ICU and jiff-icu @@ -17,6 +17,14 @@ use jiff_icu::ConvertFrom; use std::sync::OnceLock; use crate::i18n::get_locale_from_env; +/// Controls whether locale name lookups return raw or padded names. +#[derive(Clone, Copy)] +pub enum NamePadding { + /// Raw names with no trailing padding — for `date` and similar utilities. + Raw, + /// Names padded to uniform display width — for columnar output like `ls`. + Padded, +} /// Get the locale for time/date formatting from LC_TIME environment variable pub fn get_time_locale() -> &'static (Locale, super::UEncoding) { @@ -67,8 +75,135 @@ pub enum CalendarType { Ethiopian, } -/// Transform a strftime format string to use locale-specific calendar values -pub fn localize_format_string(format: &str, date: JiffDate) -> String { +/// Sum per-character Unicode display widths. +/// +/// We intentionally avoid `UnicodeWidthStr::width` because its string-level +/// API applies Arabic lam-alef ligature detection (ل+أ → 1 cell) which +/// glibc's `wcswidth` does not. GNU ls pads via `wcswidth`, so we must +/// match that behavior. +fn display_width(s: &str) -> usize { + use unicode_width::UnicodeWidthChar; + s.chars() + .map(|c| UnicodeWidthChar::width(c).unwrap_or(0)) + .sum() +} + +/// Pad every entry in `names` with trailing spaces so all entries share the +/// same Unicode display width (the maximum across the array). This mirrors +/// GNU ls's `abmon_len` / weekday alignment logic. +fn pad_names(names: [String; N]) -> [String; N] { + let widths: [usize; N] = std::array::from_fn(|i| display_width(&names[i])); + let max = widths.iter().copied().max().unwrap_or(0); + if max == 0 || widths.iter().all(|&w| w == max) { + return names; + } + let mut i = 0; + names.map(|s| { + let cur = widths[i]; + i += 1; + if cur >= max { + s + } else { + format!("{s}{:width$}", "", width = max - cur) + } + }) +} + +/// Cached locale name arrays, computed once per process. Each variant is +/// `None` when the ICU formatter for that field width cannot be created +/// (should only happen for truly broken locale data). +/// +/// Both raw and padded variants are stored: `date` needs raw names (no +/// trailing spaces) while `ls` needs padded names for column alignment. +struct CachedLocaleNames { + /// `%B` — full month names, raw + month_long: Option<[String; 12]>, + /// `%B` — full month names, padded to uniform display width + month_long_padded: Option<[String; 12]>, + /// `%b` / `%h` — abbreviated month names (trailing dots stripped), raw + month_abbrev: Option<[String; 12]>, + /// `%b` / `%h` — abbreviated month names, padded + month_abbrev_padded: Option<[String; 12]>, + /// `%A` — full weekday names, raw + weekday_long: Option<[String; 7]>, + /// `%A` — full weekday names, padded + weekday_long_padded: Option<[String; 7]>, + /// `%a` — abbreviated weekday names, raw + weekday_short: Option<[String; 7]>, + /// `%a` — abbreviated weekday names, padded + weekday_short_padded: Option<[String; 7]>, +} + +/// Return the cached, pre-padded locale names (computed once per process). +/// +/// Like [`get_time_locale`], the result is frozen at first access. +/// If `LC_TIME` changes after that point the cached names will be stale. +/// This is acceptable: each coreutils invocation is a fresh process. +fn get_cached_locale_names() -> &'static CachedLocaleNames { + static CACHE: OnceLock = OnceLock::new(); + CACHE.get_or_init(|| { + let (locale, _) = get_time_locale(); + let locale_prefs: icu_datetime::DateTimeFormatterPreferences = locale.clone().into(); + + // Hardcoded dates that are guaranteed valid — month 1..=12 day 1, + // and day 1..=7 of January 2001. Any failure is a bug, not a + // recoverable condition. + let month_dates: [Date; 12] = std::array::from_fn(|i| { + Date::::try_new_iso(2001, (i + 1) as u8, 1) + .expect("month 1..=12 day 1 is always valid") + }); + // Jan 1 2001 is a Monday, so Jan 1..=7 yields Mon(0)..Sun(6) + // when indexed via `to_monday_zero_offset()`. + let weekday_dates: [Date; 7] = std::array::from_fn(|i| { + Date::::try_new_iso(2001, 1, (i + 1) as u8).expect("Jan 1..=7 is always valid") + }); + + let month_long = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) + .ok() + .map(|f| month_dates.each_ref().map(|d| f.format(d).to_string())); + let month_long_padded = month_long.clone().map(pad_names); + + // ICU's medium format may include trailing periods (e.g., "febr." + // for Hungarian). The standard C/POSIX locale via nl_langinfo + // returns abbreviations WITHOUT trailing periods, so we strip them. + let month_abbrev = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) + .ok() + .map(|f| { + month_dates + .each_ref() + .map(|d| f.format(d).to_string().trim_end_matches('.').to_string()) + }); + let month_abbrev_padded = month_abbrev.clone().map(pad_names); + + let weekday_long = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) + .ok() + .map(|f| weekday_dates.each_ref().map(|d| f.format(d).to_string())); + let weekday_long_padded = weekday_long.clone().map(pad_names); + + let weekday_short = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) + .ok() + .map(|f| weekday_dates.each_ref().map(|d| f.format(d).to_string())); + let weekday_short_padded = weekday_short.clone().map(pad_names); + + CachedLocaleNames { + month_long, + month_long_padded, + month_abbrev, + month_abbrev_padded, + weekday_long, + weekday_long_padded, + weekday_short, + weekday_short_padded, + } + }) +} + +/// Transform a strftime format string to use locale-specific calendar values. +/// +/// When `padding` is [`NamePadding::Padded`], month and weekday names are +/// padded to uniform display width (for columnar output like `ls`). When +/// [`NamePadding::Raw`], raw names are used (for `date` and similar utilities). +pub fn localize_format_string(format: &str, date: JiffDate, padding: NamePadding) -> String { const PERCENT_PLACEHOLDER: &str = "\x00\x00"; let (locale, _) = get_time_locale(); @@ -113,36 +248,52 @@ pub fn localize_format_string(format: &str, date: JiffDate) -> String { .replace("%e", &format!("{cal_day:2}")); } - // Format localized names using ICU DateTimeFormatter - let locale_prefs = locale.clone().into(); + // Look up locale names from the once-per-process cache. + let pad = matches!(padding, NamePadding::Padded); + let cached = get_cached_locale_names(); + let month_idx = date.month() as usize - 1; + let weekday_idx = date.weekday().to_monday_zero_offset() as usize; if fmt.contains("%B") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::long()) { - fmt = fmt.replace("%B", &f.format(&iso_date).to_string()); + let src = if pad { + &cached.month_long_padded + } else { + &cached.month_long + }; + if let Some(names) = src { + fmt = fmt.replace("%B", &names[month_idx]); } } if fmt.contains("%b") || fmt.contains("%h") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::M::medium()) { - // ICU's medium format may include trailing periods (e.g., "febr." for Hungarian), - // which when combined with locale format strings that also add periods after - // %b (e.g., "%Y. %b. %d") results in double periods ("febr.."). - // The standard C/POSIX locale via nl_langinfo returns abbreviations - // WITHOUT trailing periods, so we strip them here for consistency. - let month_abbrev = f.format(&iso_date).to_string(); - let month_abbrev = month_abbrev.trim_end_matches('.').to_string(); + let src = if pad { + &cached.month_abbrev_padded + } else { + &cached.month_abbrev + }; + if let Some(names) = src { fmt = fmt - .replace("%b", &month_abbrev) - .replace("%h", &month_abbrev); + .replace("%b", &names[month_idx]) + .replace("%h", &names[month_idx]); } } if fmt.contains("%A") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::long()) { - fmt = fmt.replace("%A", &f.format(&iso_date).to_string()); + let src = if pad { + &cached.weekday_long_padded + } else { + &cached.weekday_long + }; + if let Some(names) = src { + fmt = fmt.replace("%A", &names[weekday_idx]); } } if fmt.contains("%a") { - if let Ok(f) = DateTimeFormatter::try_new(locale_prefs, fieldsets::E::short()) { - fmt = fmt.replace("%a", &f.format(&iso_date).to_string()); + let src = if pad { + &cached.weekday_short_padded + } else { + &cached.weekday_short + }; + if let Some(names) = src { + fmt = fmt.replace("%a", &names[weekday_idx]); } } diff --git a/src/uucore/src/lib/features/time.rs b/src/uucore/src/lib/features/time.rs index bc5d9ec665d..5059e39c82a 100644 --- a/src/uucore/src/lib/features/time.rs +++ b/src/uucore/src/lib/features/time.rs @@ -16,6 +16,21 @@ use std::time::{SystemTime, UNIX_EPOCH}; use crate::error::{UResult, USimpleError}; use crate::show_error; +#[cfg(feature = "i18n-datetime")] +pub use crate::i18n::datetime::NamePadding; + +/// Controls whether locale name lookups return raw or padded names. +/// +/// Without the `i18n-datetime` feature the parameter is accepted but ignored. +#[cfg(not(feature = "i18n-datetime"))] +#[derive(Clone, Copy)] +pub enum NamePadding { + /// Raw names with no trailing padding — for `date` and similar utilities. + Raw, + /// Names padded to uniform display width — for columnar output like `ls`. + Padded, +} + /// Format the given date according to this time format style. fn format_zoned(out: &mut W, zoned: Zoned, fmt: &str) -> UResult<()> { let tm = BrokenDownTime::from(&zoned); @@ -49,6 +64,36 @@ pub enum FormatSystemTimeFallback { Float, // Just print seconds+nanoseconds since epoch (`stat`) } +/// Write the seconds-since-epoch fallback used when a `SystemTime` is out of +/// the range representable by `jiff::Zoned`. +fn write_fallback_seconds( + out: &mut W, + time: SystemTime, + mode: FormatSystemTimeFallback, +) -> UResult<()> { + // TODO: The range allowed by jiff is different from what GNU accepts, + // but it still far enough in the future/past to be unlikely to matter: + // jiff: Year between -9999 to 9999 (UTC) [-377705023201..=253402207200] + // GNU: Year fits in signed 32 bits (timezone dependent) + let (mut secs, mut nsecs) = system_time_to_sec(time); + match mode { + FormatSystemTimeFallback::Integer => out.write_all(secs.to_string().as_bytes())?, + FormatSystemTimeFallback::IntegerError => { + let str = secs.to_string(); + show_error!("time '{str}' is out of range"); + out.write_all(str.as_bytes())?; + } + FormatSystemTimeFallback::Float => { + if secs < 0 && nsecs != 0 { + secs -= 1; + nsecs = 1_000_000_000 - nsecs; + } + out.write_fmt(format_args!("{secs}.{nsecs:09}"))?; + } + } + Ok(()) +} + /// Format a `SystemTime` according to given fmt, and append to vector out. pub fn format_system_time( out: &mut W, @@ -56,34 +101,42 @@ pub fn format_system_time( fmt: &str, mode: FormatSystemTimeFallback, ) -> UResult<()> { - let zoned: Result = time.try_into(); - if let Ok(zoned) = zoned { - format_zoned(out, zoned, fmt) - } else { + match time.try_into() { + Ok(zoned) => format_zoned(out, zoned, fmt), // Assume that if we cannot build a Zoned element, the timestamp is // out of reasonable range, just print it then. - // TODO: The range allowed by jiff is different from what GNU accepts, - // but it still far enough in the future/past to be unlikely to matter: - // jiff: Year between -9999 to 9999 (UTC) [-377705023201..=253402207200] - // GNU: Year fits in signed 32 bits (timezone dependent) - let (mut secs, mut nsecs) = system_time_to_sec(time); - match mode { - FormatSystemTimeFallback::Integer => out.write_all(secs.to_string().as_bytes())?, - FormatSystemTimeFallback::IntegerError => { - let str = secs.to_string(); - show_error!("time '{str}' is out of range"); - out.write_all(str.as_bytes())?; - } - FormatSystemTimeFallback::Float => { - if secs < 0 && nsecs != 0 { - secs -= 1; - nsecs = 1_000_000_000 - nsecs; - } - out.write_fmt(format_args!("{secs}.{nsecs:09}"))?; + Err(_) => write_fallback_seconds(out, time, mode), + } +} + +/// Like [`format_system_time`], but when built with the `i18n-datetime` +/// feature and a non-C `LC_TIME` locale is active, rewrites locale-dependent +/// strftime directives (`%b`, `%B`, `%a`, `%A`, and `%Y`/`%m`/`%d`/`%e` for +/// non-Gregorian calendars) to their localized values before formatting. +/// For Gregorian locales, `%Y`/`%m`/`%d`/`%e` are unaffected (e.g. `en_US` +/// still renders 2025 as `2025`). +/// +/// With the feature disabled or a C/POSIX locale, this is identical to +/// `format_system_time`. +pub fn format_system_time_locale_aware( + out: &mut W, + time: SystemTime, + fmt: &str, + mode: FormatSystemTimeFallback, + padding: NamePadding, +) -> UResult<()> { + #[cfg(feature = "i18n-datetime")] + { + use crate::i18n::datetime::{localize_format_string, should_use_icu_locale}; + if should_use_icu_locale() { + if let Ok(zoned) = >::try_into(time) { + let localized = localize_format_string(fmt, zoned.date(), padding); + return format_zoned(out, zoned, &localized); } + // Out-of-range: fall through to the plain fallback below. } - Ok(()) } + format_system_time(out, time, fmt, mode) } #[cfg(test)] diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 7096e2040ee..c9bde75c452 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -2224,6 +2224,67 @@ fn test_date_thai_locale_solar_calendar() { assert!(rfc_output.starts_with(¤t_year.to_string())); } +/// Regression test: `date +%B` / `+%b` / `+%A` / `+%a` must not have trailing +/// padding spaces. The ls-specific column alignment padding must not leak into +/// date output. +#[cfg(unix)] +#[test] +fn test_date_month_weekday_names_no_trailing_spaces() { + let current_year: i32 = new_ucmd!() + .env("LC_ALL", "C") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + for locale in ["fr_FR.UTF-8", "th_TH.UTF-8", "fi_FI.UTF-8"] { + if !is_locale_available(locale) { + continue; + } + // Check month names (%B, %b) for all 12 months + for month in 1..=12 { + for fmt in ["+%B", "+%b"] { + let output = new_ucmd!() + .env("LC_ALL", locale) + .arg("--date") + .arg(format!("{current_year}-{month:02}-01")) + .arg(fmt) + .succeeds() + .stdout_str() + .to_string(); + let name = output.trim_end_matches('\n'); + assert_eq!( + name, + name.trim_end(), + "[{locale}] {fmt} month {month:02} has trailing spaces: {name:?}" + ); + } + } + // Check weekday names (%A, %a) for 7 consecutive days (Apr 6–12) + for day_offset in 0..7 { + let day = 6 + day_offset; + for fmt in ["+%A", "+%a"] { + let output = new_ucmd!() + .env("LC_ALL", locale) + .arg("--date") + .arg(format!("{current_year}-04-{day:02}")) + .arg(fmt) + .succeeds() + .stdout_str() + .to_string(); + let name = output.trim_end_matches('\n'); + assert_eq!( + name, + name.trim_end(), + "[{locale}] {fmt} day offset {day_offset} has trailing spaces: {name:?}" + ); + } + } + } +} + #[cfg(unix)] fn check_date(locale: &str, date: &str, fmt: &str, expected: &str) { let actual = new_ucmd!() diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index fb9a00fa2df..01cfb8064bb 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -3,7 +3,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (words) READMECAREFULLY birthtime doesntexist oneline somebackup lrwx somefile somegroup somehiddenbackup somehiddenfile tabsize aaaaaaaa bbbb cccc dddddddd ncccc neee naaaaa nbcdef nfffff dired subdired tmpfs mdir COLORTERM mexe bcdef mfoo timefile -// spell-checker:ignore (words) fakeroot setcap drwxr bcdlps mdangling mentry awith acolons NOFILE +// spell-checker:ignore (words) fakeroot setcap drwxr bcdlps mdangling mentry awith acolons NOFILE abmon alef wcswidth + #![allow( clippy::similar_names, clippy::too_many_lines, @@ -32,6 +33,8 @@ use uutests::unwrap_or_return; use uutests::util::TestScenario; #[cfg(any(unix, feature = "feat_selinux"))] use uutests::util::expected_result; +#[cfg(unix)] +use uutests::util::is_locale_available; use uutests::{at_and_ucmd, util_name}; const LONG_ARGS: &[&str] = &[ @@ -2444,6 +2447,254 @@ fn test_ls_time_recent_future() { .stdout_contains("RECENT"); } +/// A single non-C `ls -l --time-style=locale` test case. +/// +/// Each case sets `LC_ALL` to `locale` and lists a file whose mtime is fixed +/// at 2025-03-12 (so the month field is March). The assertions describe the +/// output shape GNU `ls` produces in that locale. +/// +/// The C locale is tested separately as a simple sanity check; these cases +/// cover locales that produce genuinely localized output. +#[cfg(unix)] +struct LocaleTimeStyleCase { + /// The locale to set via `LC_ALL`. + locale: &'static str, + /// If set, stdout must contain at least one `char` inside this inclusive + /// Unicode range — used to pin the localized script (Arabic, Thai, …). + script_range: Option<(char, char)>, + /// If true, stdout must contain at least one byte `>= 0x80`. This is the + /// weakest localization signal and is used for non-UTF-8 locales where + /// we can't assume a specific Unicode range (ICU emits UTF-8 regardless + /// of the nominal encoding, so a high byte is still present). + require_high_byte: bool, + /// If true, the Gregorian year `2025` is allowed in stdout. When false + /// (default), the presence of `2025` is treated as a failure — this pins + /// the alternate-calendar year conversion (Persian 1403, Buddhist 2568, + /// Ethiopian 2017) and guards against regressions where only the month + /// name is localized but the year still reads `2025`. + allow_gregorian_year: bool, +} + +/// Tests for `ls -l --time-style=locale` with various locales. +/// +/// GNU `ls --time-style=locale` uses `nl_langinfo` to look up the locale's +/// month names and date format. Different locales produce substantially +/// different output (different month names, different calendars, different +/// byte encodings). This test mirrors the approach used in `test_date.rs`: +/// each locale is probed with `locale charmap`; if unavailable the case is +/// skipped (so CI without extra locales still passes). +/// +/// Locales exercised: +/// * `C` — sanity: English month, no localization +/// * `ru_RU.KOI8-R` — non-UTF-8 single-byte encoding, Russian month +/// * `fa_IR.UTF-8` — Persian calendar year (e.g. 1403) +/// * `am_ET.UTF-8` — Ethiopian calendar year (e.g. 2017) +/// * `th_TH.UTF-8` — Buddhist calendar year (e.g. 2568) +/// * `zh_CN.GB18030` — non-UTF-8 multi-byte encoding, year-first format +#[test] +#[cfg(unix)] +fn test_ls_time_style_locale() { + // 2025-03-12 00:00:00 UTC. + const MTIME_SECS: u64 = 1_741_774_800; + const C_LOCALE_OUTPUT: &str = "Mar 12 2025"; + + // Sanity: C locale produces the English month abbreviation. + { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + let f = at.make_file("probe"); + f.set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(MTIME_SECS)) + .unwrap(); + + let result = scene + .ucmd() + .env("LC_ALL", "C") + .env("TZ", "UTC") + .arg("-l") + .arg("--time-style=locale") + .arg("probe") + .succeeds(); + + let stdout_lossy = String::from_utf8_lossy(result.stdout()); + assert!( + stdout_lossy.contains(C_LOCALE_OUTPUT), + "[C] expected stdout to contain {C_LOCALE_OUTPUT:?}, got: {stdout_lossy}" + ); + } + + let cases: &[LocaleTimeStyleCase] = &[ + LocaleTimeStyleCase { + locale: "ru_RU.KOI8-R", + script_range: None, + require_high_byte: true, + allow_gregorian_year: true, + }, + LocaleTimeStyleCase { + locale: "fa_IR.UTF-8", + // Persian/Arabic script: U+0600..=U+06FF + script_range: Some(('\u{0600}', '\u{06FF}')), + require_high_byte: false, + // Persian calendar: March 12 2025 → 1403, never 2025. + allow_gregorian_year: false, + }, + LocaleTimeStyleCase { + locale: "am_ET.UTF-8", + // Ethiopic script: U+1200..=U+137F + script_range: Some(('\u{1200}', '\u{137F}')), + require_high_byte: false, + // Ethiopian calendar: March 12 2025 → 2017. + allow_gregorian_year: false, + }, + LocaleTimeStyleCase { + locale: "th_TH.UTF-8", + // Thai script: U+0E00..=U+0E7F + script_range: Some(('\u{0E00}', '\u{0E7F}')), + require_high_byte: false, + // Buddhist calendar: 2025 + 543 = 2568. + allow_gregorian_year: false, + }, + LocaleTimeStyleCase { + locale: "zh_CN.GB18030", + script_range: None, + require_high_byte: true, + allow_gregorian_year: true, + }, + ]; + + for case in cases { + if !is_locale_available(case.locale) { + println!("Skipping: {} locale not available", case.locale); + continue; + } + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + let f = at.make_file("probe"); + f.set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(MTIME_SECS)) + .unwrap(); + + let result = scene + .ucmd() + .env("LC_ALL", case.locale) + .env("TZ", "UTC") + .arg("-l") + .arg("--time-style=locale") + .arg("probe") + .succeeds(); + + let bytes = result.stdout(); + let stdout_lossy = String::from_utf8_lossy(bytes); + let locale = case.locale; + + assert!( + !stdout_lossy.contains(C_LOCALE_OUTPUT), + "[{locale}] stdout should not contain {C_LOCALE_OUTPUT:?} (C-locale fallback), got: {stdout_lossy}" + ); + if let Some((lo, hi)) = case.script_range { + assert!( + stdout_lossy.chars().any(|c| (lo..=hi).contains(&c)), + "[{locale}] stdout should contain a char in U+{lo_u:04X}..=U+{hi_u:04X}, got: {stdout_lossy}", + lo_u = lo as u32, + hi_u = hi as u32 + ); + } + if case.require_high_byte { + assert!( + bytes.iter().any(|&b| b >= 0x80), + "[{locale}] stdout should contain a non-ASCII byte, got: {stdout_lossy}" + ); + } + if !case.allow_gregorian_year { + assert!( + !stdout_lossy.contains("2025"), + "[{locale}] stdout should not contain Gregorian year 2025 \ + (alternate calendar expected), got: {stdout_lossy}" + ); + } + } +} + +/// Regression test for GNU `tests/ls/abmon-align.sh`: abbreviated month +/// names must be padded to uniform display width and all be distinct. +#[cfg(unix)] +#[test] +fn test_ls_abmon_align() { + use std::collections::HashSet; + use unicode_width::UnicodeWidthChar; + + let filenames: Vec = (1..=12).map(|i| format!("{i:02}.ts")).collect(); + let timestamps: Vec = (1..=12) + .map(|mon| { + jiff::civil::date(2025, mon, 15) + .to_zoned(jiff::tz::TimeZone::UTC) + .unwrap() + .timestamp() + .as_second() as u64 + }) + .collect(); + + for locale in ["C", "fi_FI.UTF-8", "fr_FR.UTF-8", "ar_SY.UTF-8"] { + if !is_locale_available(locale) { + continue; + } + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + for (name, &ts) in filenames.iter().zip(timestamps.iter()) { + at.make_file(name) + .set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(ts)) + .unwrap(); + } + + let mut cmd = scene.ucmd(); + cmd.env("LC_ALL", locale) + .env("TZ", "UTC") + .args(&["-lgG", "--time-style=+%b"]); + for f in &filenames { + cmd.arg(f); + } + let stdout = String::from_utf8_lossy(cmd.succeeds().stdout()).to_string(); + + // Extract the month field from each line of `-lgG --time-style=+%b` + // output. The format is: permissions links size month filename + // We strip the filename suffix first, then split on whitespace. + let months: Vec = stdout + .lines() + .filter_map(|l| { + // Strip the " NN.ts" suffix (6 bytes) to isolate the month. + let l = &l[..l.len() - 6]; + let fields: Vec<&str> = l.splitn(4, char::is_whitespace).collect(); + + if fields.len() < 4 { + return None; + } + + Some(fields[3].to_string()) + }) + .collect(); + + // Use per-character width (not UnicodeWidthStr::width) to match + // the display_width() function used by pad_names() — the string-level + // API applies Arabic lam-alef ligature detection that glibc's wcswidth + // (and our padding code) does not. + let widths: Vec = months + .iter() + .map(|m| { + m.chars() + .map(|c| UnicodeWidthChar::width(c).unwrap_or(0)) + .sum::() + }) + .collect(); + let unique: HashSet<&str> = months.iter().map(|m| m.trim_end()).collect(); + + assert_eq!(months.len(), 12, "[{locale}] expected 12 lines"); + assert!( + widths.iter().all(|&w| w == widths[0]), + "[{locale}] widths not uniform: {widths:?}\n{stdout}" + ); + assert_eq!(unique.len(), 12, "[{locale}] duplicate months\n{stdout}"); + } +} + #[test] fn test_ls_order_time() { let scene = TestScenario::new(util_name!()); diff --git a/tests/uutests/src/lib/util.rs b/tests/uutests/src/lib/util.rs index 834023e4ca7..64234abcb6c 100644 --- a/tests/uutests/src/lib/util.rs +++ b/tests/uutests/src/lib/util.rs @@ -100,16 +100,29 @@ pub fn is_ci() -> bool { env::var("CI").is_ok_and(|s| s.eq_ignore_ascii_case("true")) } -/// Check if a locale is available on the system by verifying that -/// `locale charmap` returns `"UTF-8"` when `LC_ALL` is set to the given locale. +/// Check if a locale is available on the system by verifying that `locale +/// charmap` returns the charmap implied by the locale name when `LC_ALL` is +/// set to it. +/// +/// The expected charmap is derived from the portion of the locale name after +/// the `.` (e.g. `"fa_IR.UTF-8"` → `"UTF-8"`, `"ru_RU.KOI8-R"` → `"KOI8-R"`, +/// `"zh_CN.GB18030"` → `"GB18030"`). A locale name with no suffix defaults to +/// `"UTF-8"`. This avoids false positives when the requested locale is not +/// installed and `locale` silently falls back to C (which would otherwise +/// report `"ANSI_X3.4-1968"`). #[cfg(unix)] pub fn is_locale_available(locale: &str) -> bool { use std::process::Command; + // C / POSIX are always available. + if locale == "C" || locale == "POSIX" { + return true; + } + let expected = locale.split_once('.').map_or("UTF-8", |(_, enc)| enc); Command::new("locale") .env("LC_ALL", locale) .arg("charmap") .output() - .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "UTF-8") + .map(|o| String::from_utf8_lossy(&o.stdout).trim() == expected) .unwrap_or(false) }