Skip to content

Commit 3effba3

Browse files
committed
fix(wc): keep WASI default locale UTF-8 compatible
WASI does not provide an ambient locale, so treating a missing LC_CTYPE/LANG as C/POSIX makes wc -m count bytes by default. Preserve the existing UTF-8 behavior for WASI while keeping POSIX fallback on platforms with native locale environments.
1 parent 5402711 commit 3effba3

2 files changed

Lines changed: 20 additions & 4 deletions

File tree

src/uucore/src/lib/features/i18n/charmap.rs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,24 @@ fn get_effective_ctype_locale() -> Option<String> {
4141

4242
/// Return whether the effective `LC_CTYPE` locale is the byte-oriented C/POSIX locale.
4343
///
44-
/// A missing effective locale defaults to POSIX behavior. Only exact `C` and
45-
/// `POSIX` locale values are treated as explicit C/POSIX locales; locales such
46-
/// as `C.UTF-8` are not.
44+
/// WASI has no native locale environment, so it keeps the existing
45+
/// UTF-8-compatible behavior regardless of forwarded locale variables.
46+
#[cfg(target_os = "wasi")]
4747
pub fn is_effective_ctype_c_or_posix() -> bool {
48-
get_effective_ctype_locale().is_none_or(|locale| locale == "C" || locale == "POSIX")
48+
false
49+
}
50+
51+
/// Return whether the effective `LC_CTYPE` locale is the byte-oriented C/POSIX locale.
52+
///
53+
/// A missing effective locale defaults to POSIX behavior on platforms with a
54+
/// native locale environment.
55+
#[cfg(not(target_os = "wasi"))]
56+
pub fn is_effective_ctype_c_or_posix() -> bool {
57+
match get_effective_ctype_locale().as_deref() {
58+
Some("C" | "POSIX") => true,
59+
Some(_) => false,
60+
None => true,
61+
}
4962
}
5063

5164
fn get_encoding() -> &'static MbEncoding {

tests/by-util/test_wc.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,7 @@ fn test_posixly_correct_whitespace() {
983983
}
984984

985985
#[test]
986+
#[cfg_attr(wasi_runner, ignore = "WASI has no native C/POSIX locale")]
986987
fn test_wc_chars_c_locale() {
987988
// In C/POSIX locale, wc -m should count bytes, not UTF-8 characters
988989
// Vietnamese "Tiếng Việt" uses diacritics (2 bytes per char in UTF-8)
@@ -1057,6 +1058,7 @@ fn test_wc_chars_utf8_locale() {
10571058
}
10581059

10591060
#[test]
1061+
#[cfg_attr(wasi_runner, ignore = "WASI has no native C/POSIX locale")]
10601062
fn test_wc_chars_default_locale() {
10611063
// When no locale is set (empty LC_ALL), it defaults to POSIX (chars == bytes)
10621064
// This ensures backward compatibility
@@ -1084,6 +1086,7 @@ fn test_wc_chars_default_locale() {
10841086
}
10851087

10861088
#[test]
1089+
#[cfg_attr(wasi_runner, ignore = "WASI has no native C/POSIX locale")]
10871090
fn test_wc_multibyte_c_locale() {
10881091
// Issue #9712 and #5831: Test various multibyte characters in C locale
10891092
// All should be counted as bytes

0 commit comments

Comments
 (0)