Skip to content

Commit 8b92113

Browse files
committed
uucore/human: cache locale and use ICU for thousands separator (#9090)
1 parent f729c74 commit 8b92113

1 file changed

Lines changed: 77 additions & 25 deletions

File tree

  • src/uucore/src/lib/features/format

src/uucore/src/lib/features/format/human.rs

Lines changed: 77 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,16 @@
99
//!
1010
//! Format sizes like gnulibs human_readable() would
1111
12+
use std::sync::OnceLock;
1213
use unit_prefix::NumberPrefix;
1314

15+
#[cfg(feature = "i18n-decimal")]
16+
use icu_decimal::provider::DecimalSymbolsV1;
17+
#[cfg(feature = "i18n-decimal")]
18+
use icu_locale::Locale;
19+
#[cfg(feature = "i18n-decimal")]
20+
use icu_provider::prelude::*;
21+
1422
#[derive(Copy, Clone, PartialEq)]
1523
pub enum SizeFormat {
1624
Bytes,
@@ -59,36 +67,80 @@ pub fn human_readable(size: u64, sfmt: SizeFormat) -> String {
5967
/// - `'\0'` for C/POSIX locale (no separator)
6068
/// - `'.'` for European locales (de_DE, fr_FR, it_IT, es_ES, etc.)
6169
/// - `','` for other locales (default, en_US style)
70+
/// Get the thousands/grouping separator for the given locale using ICU
71+
#[cfg(feature = "i18n-decimal")]
72+
fn get_grouping_separator_from_icu(loc: Locale) -> char {
73+
let data_locale = DataLocale::from(loc);
74+
75+
let request = DataRequest {
76+
id: DataIdentifierBorrowed::for_locale(&data_locale),
77+
metadata: DataRequestMetadata::default(),
78+
};
79+
80+
let response: DataResponse<DecimalSymbolsV1> =
81+
icu_decimal::provider::Baked.load(request).unwrap();
82+
83+
let symbols = response.payload.get();
84+
let grouping_str = symbols.grouping_separator();
85+
86+
// Convert the grouping separator string to a char
87+
// ICU returns a string, but we need a char. Take the first char or default to comma.
88+
grouping_str.chars().next().unwrap_or(',')
89+
}
90+
91+
/// Get the cached thousands separator based on the current locale
6292
fn get_thousands_separator() -> char {
63-
// Try to read LC_NUMERIC or LANG environment variable
64-
if let Ok(locale) = std::env::var("LC_NUMERIC")
65-
.or_else(|_| std::env::var("LC_ALL"))
66-
.or_else(|_| std::env::var("LANG"))
67-
{
68-
// C and POSIX locales have no thousands separator
69-
if locale == "C" || locale == "POSIX" || locale.starts_with("C.") {
70-
return '\0';
71-
}
93+
static THOUSANDS_SEP: OnceLock<char> = OnceLock::new();
7294

73-
// Simple heuristic: European locales use period, others use comma
74-
// This covers common cases like de_DE, fr_FR, it_IT, es_ES, nl_NL, etc.
75-
if locale.starts_with("de_")
76-
|| locale.starts_with("fr_")
77-
|| locale.starts_with("it_")
78-
|| locale.starts_with("es_")
79-
|| locale.starts_with("nl_")
80-
|| locale.starts_with("pt_")
81-
|| locale.starts_with("da_")
82-
|| locale.starts_with("sv_")
83-
|| locale.starts_with("no_")
84-
|| locale.starts_with("fi_")
95+
*THOUSANDS_SEP.get_or_init(|| {
96+
#[cfg(feature = "i18n-decimal")]
8597
{
86-
return '.';
98+
use crate::i18n::get_numeric_locale;
99+
let (locale, _encoding) = get_numeric_locale();
100+
101+
// C and POSIX locales have no thousands separator
102+
// The default locale from i18n is en-US-posix for C/POSIX
103+
if locale.to_string().contains("posix") {
104+
return '\0';
105+
}
106+
107+
get_grouping_separator_from_icu(locale.clone())
87108
}
88-
}
109+
110+
#[cfg(not(feature = "i18n-decimal"))]
111+
{
112+
// Fallback implementation when i18n-decimal feature is disabled
113+
// Try to read LC_NUMERIC or LANG environment variable
114+
if let Ok(locale) = std::env::var("LC_NUMERIC")
115+
.or_else(|_| std::env::var("LC_ALL"))
116+
.or_else(|_| std::env::var("LANG"))
117+
{
118+
// C and POSIX locales have no thousands separator
119+
if locale == "C" || locale == "POSIX" || locale.starts_with("C.") {
120+
return '\0';
121+
}
122+
123+
// Simple heuristic: European locales use period, others use comma
124+
// This covers common cases like de_DE, fr_FR, it_IT, es_ES, nl_NL, etc.
125+
if locale.starts_with("de_")
126+
|| locale.starts_with("fr_")
127+
|| locale.starts_with("it_")
128+
|| locale.starts_with("es_")
129+
|| locale.starts_with("nl_")
130+
|| locale.starts_with("pt_")
131+
|| locale.starts_with("da_")
132+
|| locale.starts_with("sv_")
133+
|| locale.starts_with("no_")
134+
|| locale.starts_with("fi_")
135+
{
136+
return '.';
137+
}
138+
}
89139

90-
// Default to comma (en_US style)
91-
','
140+
// Default to comma (en_US style)
141+
','
142+
}
143+
})
92144
}
93145

94146
/// Format a number with thousands separators based on LC_NUMERIC locale.

0 commit comments

Comments
 (0)