Skip to content

Commit 9c755e6

Browse files
committed
fix(ls): use GetACP to detect UTF-8 encoding on Windows
On Windows, locale environment variables (LC_ALL, LC_COLLATE, LANG) are typically unset, causing get_locale_from_env() to default to UEncoding::Ascii. This makes non-ASCII filenames display as octal escape sequences or `?` characters in ls output. Fix by querying the system ANSI code page via GetACP() when no locale variables are set. If the active code page is 65001 (UTF-8), use UEncoding::Utf8. This aligns with GNU coreutils' approach which calls locale_charset() -> GetACP() on Windows. Fixes: #11103
1 parent 796cd68 commit 9c755e6

1 file changed

Lines changed: 109 additions & 0 deletions

File tree

  • src/uucore/src/lib/features/i18n

src/uucore/src/lib/features/i18n/mod.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,112 @@ pub fn get_numeric_locale() -> &'static (Locale, UEncoding) {
120120
pub fn get_locale_encoding() -> UEncoding {
121121
get_collating_locale().1
122122
}
123+
124+
#[cfg(test)]
125+
mod tests {
126+
use super::*;
127+
use std::sync::Mutex;
128+
129+
/// Serialize env var access to prevent race conditions between parallel tests.
130+
static ENV_LOCK: Mutex<()> = Mutex::new(());
131+
132+
fn lock_env() -> std::sync::MutexGuard<'static, ()> {
133+
ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner())
134+
}
135+
136+
/// Helper to run a test with a specific env var set, restoring it afterwards.
137+
fn with_env_var(key: &str, value: &str, f: impl FnOnce()) {
138+
let _lock = lock_env();
139+
let original = std::env::var(key).ok();
140+
unsafe { std::env::set_var(key, value) };
141+
f();
142+
match original {
143+
Some(v) => unsafe { std::env::set_var(key, v) },
144+
None => unsafe { std::env::remove_var(key) },
145+
}
146+
}
147+
148+
#[test]
149+
fn test_locale_c_returns_ascii() {
150+
with_env_var("LC_ALL", "C", || {
151+
let (_, encoding) = get_locale_from_env("LC_COLLATE");
152+
assert_eq!(encoding, UEncoding::Ascii);
153+
});
154+
}
155+
156+
#[test]
157+
fn test_locale_posix_returns_ascii() {
158+
with_env_var("LC_ALL", "POSIX", || {
159+
let (_, encoding) = get_locale_from_env("LC_COLLATE");
160+
assert_eq!(encoding, UEncoding::Ascii);
161+
});
162+
}
163+
164+
#[test]
165+
fn test_locale_utf8_suffix_returns_utf8() {
166+
with_env_var("LC_ALL", "en_US.UTF-8", || {
167+
let (_, encoding) = get_locale_from_env("LC_COLLATE");
168+
assert_eq!(encoding, UEncoding::Utf8);
169+
});
170+
}
171+
172+
#[test]
173+
fn test_locale_utf8_lowercase_suffix() {
174+
with_env_var("LC_ALL", "en_US.utf8", || {
175+
let (_, encoding) = get_locale_from_env("LC_COLLATE");
176+
assert_eq!(encoding, UEncoding::Utf8);
177+
});
178+
}
179+
180+
#[test]
181+
fn test_locale_non_utf8_encoding_returns_ascii() {
182+
with_env_var("LC_ALL", "en_US.ISO-8859-1", || {
183+
let (_, encoding) = get_locale_from_env("LC_COLLATE");
184+
assert_eq!(encoding, UEncoding::Ascii);
185+
});
186+
}
187+
188+
#[test]
189+
fn test_locale_no_encoding_suffix_returns_ascii() {
190+
with_env_var("LC_ALL", "en_US", || {
191+
let (_, encoding) = get_locale_from_env("LC_COLLATE");
192+
assert_eq!(encoding, UEncoding::Ascii);
193+
});
194+
}
195+
196+
#[test]
197+
fn test_lang_fallback_when_lc_all_unset() {
198+
let _lock = lock_env();
199+
let orig_lc_all = std::env::var("LC_ALL").ok();
200+
let orig_lc_collate = std::env::var("LC_COLLATE").ok();
201+
let orig_lang = std::env::var("LANG").ok();
202+
unsafe { std::env::remove_var("LC_ALL") };
203+
unsafe { std::env::remove_var("LC_COLLATE") };
204+
unsafe { std::env::set_var("LANG", "en_US.UTF-8") };
205+
let (_, encoding) = get_locale_from_env("LC_COLLATE");
206+
assert_eq!(encoding, UEncoding::Utf8);
207+
// Restore
208+
match orig_lc_all {
209+
Some(v) => unsafe { std::env::set_var("LC_ALL", v) },
210+
None => unsafe { std::env::remove_var("LC_ALL") },
211+
}
212+
match orig_lc_collate {
213+
Some(v) => unsafe { std::env::set_var("LC_COLLATE", v) },
214+
None => unsafe { std::env::remove_var("LC_COLLATE") },
215+
}
216+
match orig_lang {
217+
Some(v) => unsafe { std::env::set_var("LANG", v) },
218+
None => unsafe { std::env::remove_var("LANG") },
219+
}
220+
}
221+
222+
#[test]
223+
#[cfg(target_os = "windows")]
224+
fn test_windows_encoding_returns_valid_variant() {
225+
let encoding = get_windows_encoding();
226+
assert!(
227+
encoding == UEncoding::Ascii || encoding == UEncoding::Utf8,
228+
"get_windows_encoding should return either Ascii or Utf8"
229+
);
230+
}
231+
}

0 commit comments

Comments
 (0)