Skip to content

Commit 0ea2617

Browse files
committed
ls: add tests for --time-style=locale across locales
1 parent c409373 commit 0ea2617

File tree

1 file changed

+241
-0
lines changed

1 file changed

+241
-0
lines changed

tests/by-util/test_ls.rs

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2444,6 +2444,247 @@ fn test_ls_time_recent_future() {
24442444
.stdout_contains("RECENT");
24452445
}
24462446

2447+
/// Check whether a locale (possibly non-UTF-8) is available by asking `locale charmap`.
2448+
#[cfg(unix)]
2449+
fn is_any_locale_available(locale: &str) -> bool {
2450+
use std::process::Command;
2451+
Command::new("locale")
2452+
.env("LC_ALL", locale)
2453+
.arg("charmap")
2454+
.output()
2455+
.map(|o| o.status.success() && !o.stdout.is_empty())
2456+
.unwrap_or(false)
2457+
}
2458+
2459+
/// Tests for `ls -l --time-style=locale` with various locales.
2460+
///
2461+
/// GNU `ls --time-style=locale` uses `nl_langinfo` to look up the locale's
2462+
/// month names and date format. Different locales produce substantially
2463+
/// different output (different month names, different calendars, different
2464+
/// byte encodings). These tests mirror the approach used in `test_date.rs`:
2465+
/// each locale is probed with `locale charmap`; if unavailable the test is
2466+
/// skipped (so CI without extra locales still passes).
2467+
///
2468+
/// Locales exercised:
2469+
/// * `ru_RU.KOI8-R` — non-UTF-8 single-byte encoding
2470+
/// * `fa_IR.UTF-8` — Persian calendar year (e.g. 1403)
2471+
/// * `am_ET.UTF-8` — Ethiopian calendar year
2472+
/// * `th_TH.UTF-8` — Buddhist calendar year (e.g. 2568)
2473+
/// * `zh_CN.GB18030` — non-UTF-8 multi-byte encoding, year-first format
2474+
#[test]
2475+
#[ignore = "uutils ls does not yet honor LC_TIME for --time-style=locale"]
2476+
#[cfg(unix)]
2477+
fn test_ls_time_style_locale_ru_koi8r() {
2478+
if !is_any_locale_available("ru_RU.KOI8-R") {
2479+
println!("Skipping: ru_RU.KOI8-R locale not available");
2480+
return;
2481+
}
2482+
let scene = TestScenario::new(util_name!());
2483+
let at = &scene.fixtures;
2484+
let f = at.make_file("ru_test");
2485+
// Fixed mtime: 2025-03-12 (so the month field is March)
2486+
f.set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(1_741_774_800))
2487+
.unwrap();
2488+
2489+
let result = scene
2490+
.ucmd()
2491+
.env("LC_ALL", "ru_RU.KOI8-R")
2492+
.env("TZ", "UTC")
2493+
.arg("-l")
2494+
.arg("--time-style=locale")
2495+
.arg("ru_test")
2496+
.succeeds();
2497+
2498+
// KOI8-R Russian month abbreviation for March ("ÍÁÒ") contains bytes
2499+
// outside the ASCII range (0x80..=0xFF). The output must not be the
2500+
// plain "Mar" that the C locale would emit.
2501+
let bytes = result.stdout();
2502+
let has_high_byte = bytes.iter().any(|&b| b >= 0x80);
2503+
let stdout_lossy = String::from_utf8_lossy(bytes);
2504+
assert!(
2505+
has_high_byte || !stdout_lossy.contains("Mar "),
2506+
"ru_RU.KOI8-R should produce a localized (non-ASCII) month, got: {stdout_lossy}"
2507+
);
2508+
}
2509+
2510+
#[test]
2511+
#[ignore = "uutils ls does not yet honor LC_TIME for --time-style=locale"]
2512+
#[cfg(unix)]
2513+
fn test_ls_time_style_locale_fa_ir() {
2514+
if !is_any_locale_available("fa_IR.UTF-8") {
2515+
println!("Skipping: fa_IR.UTF-8 locale not available");
2516+
return;
2517+
}
2518+
let scene = TestScenario::new(util_name!());
2519+
let at = &scene.fixtures;
2520+
let f = at.make_file("fa_test");
2521+
// 2025-03-12 -> Persian year 1403 (until ~March 20 2025).
2522+
f.set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(1_741_774_800))
2523+
.unwrap();
2524+
2525+
let result = scene
2526+
.ucmd()
2527+
.env("LC_ALL", "fa_IR.UTF-8")
2528+
.env("TZ", "UTC")
2529+
.arg("-l")
2530+
.arg("--time-style=locale")
2531+
.arg("fa_test")
2532+
.succeeds();
2533+
2534+
let stdout = result.stdout_str();
2535+
// GNU outputs Arabic/Persian script and the Persian calendar year 1403.
2536+
// The plain C-locale "Mar 12 2025" must not appear.
2537+
assert!(
2538+
!stdout.contains("Mar 12 2025"),
2539+
"fa_IR.UTF-8 should not produce C-locale output, got: {stdout}"
2540+
);
2541+
// Persian/Arabic script codepoints are in U+0600..=U+06FF
2542+
let has_arabic_script = stdout
2543+
.chars()
2544+
.any(|c| ('\u{0600}'..='\u{06FF}').contains(&c));
2545+
assert!(
2546+
has_arabic_script,
2547+
"fa_IR.UTF-8 output should contain Arabic/Persian script, got: {stdout}"
2548+
);
2549+
}
2550+
2551+
#[test]
2552+
#[ignore = "uutils ls does not yet honor LC_TIME for --time-style=locale"]
2553+
#[cfg(unix)]
2554+
fn test_ls_time_style_locale_am_et() {
2555+
if !is_any_locale_available("am_ET.UTF-8") {
2556+
println!("Skipping: am_ET.UTF-8 locale not available");
2557+
return;
2558+
}
2559+
let scene = TestScenario::new(util_name!());
2560+
let at = &scene.fixtures;
2561+
let f = at.make_file("am_test");
2562+
f.set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(1_741_774_800))
2563+
.unwrap();
2564+
2565+
let result = scene
2566+
.ucmd()
2567+
.env("LC_ALL", "am_ET.UTF-8")
2568+
.env("TZ", "UTC")
2569+
.arg("-l")
2570+
.arg("--time-style=locale")
2571+
.arg("am_test")
2572+
.succeeds();
2573+
2574+
let stdout = result.stdout_str();
2575+
assert!(
2576+
!stdout.contains("Mar 12 2025"),
2577+
"am_ET.UTF-8 should not produce C-locale output, got: {stdout}"
2578+
);
2579+
// Ethiopic script codepoints are in U+1200..=U+137F
2580+
let has_ethiopic = stdout
2581+
.chars()
2582+
.any(|c| ('\u{1200}'..='\u{137F}').contains(&c));
2583+
assert!(
2584+
has_ethiopic,
2585+
"am_ET.UTF-8 output should contain Ethiopic script, got: {stdout}"
2586+
);
2587+
}
2588+
2589+
#[test]
2590+
#[ignore = "uutils ls does not yet honor LC_TIME for --time-style=locale"]
2591+
#[cfg(unix)]
2592+
fn test_ls_time_style_locale_th_th() {
2593+
if !is_any_locale_available("th_TH.UTF-8") {
2594+
println!("Skipping: th_TH.UTF-8 locale not available");
2595+
return;
2596+
}
2597+
let scene = TestScenario::new(util_name!());
2598+
let at = &scene.fixtures;
2599+
let f = at.make_file("th_test");
2600+
f.set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(1_741_774_800))
2601+
.unwrap();
2602+
2603+
let result = scene
2604+
.ucmd()
2605+
.env("LC_ALL", "th_TH.UTF-8")
2606+
.env("TZ", "UTC")
2607+
.arg("-l")
2608+
.arg("--time-style=locale")
2609+
.arg("th_test")
2610+
.succeeds();
2611+
2612+
let stdout = result.stdout_str();
2613+
assert!(
2614+
!stdout.contains("Mar 12 2025"),
2615+
"th_TH.UTF-8 should not produce C-locale output, got: {stdout}"
2616+
);
2617+
// Thai script codepoints are in U+0E00..=U+0E7F
2618+
let has_thai = stdout
2619+
.chars()
2620+
.any(|c| ('\u{0E00}'..='\u{0E7F}').contains(&c));
2621+
assert!(
2622+
has_thai,
2623+
"th_TH.UTF-8 output should contain Thai script, got: {stdout}"
2624+
);
2625+
}
2626+
2627+
#[test]
2628+
#[ignore = "uutils ls does not yet honor LC_TIME for --time-style=locale"]
2629+
#[cfg(unix)]
2630+
fn test_ls_time_style_locale_zh_cn_gb18030() {
2631+
if !is_any_locale_available("zh_CN.GB18030") {
2632+
println!("Skipping: zh_CN.GB18030 locale not available");
2633+
return;
2634+
}
2635+
let scene = TestScenario::new(util_name!());
2636+
let at = &scene.fixtures;
2637+
let f = at.make_file("zh_test");
2638+
f.set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(1_741_774_800))
2639+
.unwrap();
2640+
2641+
let result = scene
2642+
.ucmd()
2643+
.env("LC_ALL", "zh_CN.GB18030")
2644+
.env("TZ", "UTC")
2645+
.arg("-l")
2646+
.arg("--time-style=locale")
2647+
.arg("zh_test")
2648+
.succeeds();
2649+
2650+
// GB18030 is not UTF-8; output is expected to contain bytes >= 0x80.
2651+
let bytes = result.stdout();
2652+
let has_high_byte = bytes.iter().any(|&b| b >= 0x80);
2653+
let stdout_lossy = String::from_utf8_lossy(bytes);
2654+
assert!(
2655+
has_high_byte,
2656+
"zh_CN.GB18030 output should contain non-ASCII bytes, got: {stdout_lossy}"
2657+
);
2658+
// Must not fall back to the plain C-locale month.
2659+
assert!(
2660+
!stdout_lossy.contains("Mar 12 2025"),
2661+
"zh_CN.GB18030 should not produce C-locale output, got: {stdout_lossy}"
2662+
);
2663+
}
2664+
2665+
/// Sanity check: the C locale still produces the English month abbreviation
2666+
/// with `--time-style=locale`, regardless of whether any of the above locales
2667+
/// are installed.
2668+
#[test]
2669+
#[cfg(unix)]
2670+
fn test_ls_time_style_locale_c() {
2671+
let scene = TestScenario::new(util_name!());
2672+
let at = &scene.fixtures;
2673+
let f = at.make_file("c_test");
2674+
f.set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(1_741_774_800))
2675+
.unwrap();
2676+
2677+
scene
2678+
.ucmd()
2679+
.env("LC_ALL", "C")
2680+
.env("TZ", "UTC")
2681+
.arg("-l")
2682+
.arg("--time-style=locale")
2683+
.arg("c_test")
2684+
.succeeds()
2685+
.stdout_contains("Mar 12 2025");
2686+
}
2687+
24472688
#[test]
24482689
fn test_ls_order_time() {
24492690
let scene = TestScenario::new(util_name!());

0 commit comments

Comments
 (0)