|
9 | 9 | //! |
10 | 10 | //! Format sizes like gnulibs human_readable() would |
11 | 11 |
|
| 12 | +use std::sync::OnceLock; |
12 | 13 | use unit_prefix::NumberPrefix; |
13 | 14 |
|
| 15 | +#[cfg(feature = "i18n-decimal")] |
| 16 | +use icu_decimal::provider::DecimalSymbolsV1; |
| 17 | +#[cfg(feature = "i18n-decimal")] |
| 18 | +use icu_locale::Locale; |
| 19 | +#[cfg(feature = "i18n-decimal")] |
| 20 | +use icu_provider::prelude::*; |
| 21 | + |
14 | 22 | #[derive(Copy, Clone, PartialEq)] |
15 | 23 | pub enum SizeFormat { |
16 | 24 | Bytes, |
@@ -59,36 +67,80 @@ pub fn human_readable(size: u64, sfmt: SizeFormat) -> String { |
59 | 67 | /// - `'\0'` for C/POSIX locale (no separator) |
60 | 68 | /// - `'.'` for European locales (de_DE, fr_FR, it_IT, es_ES, etc.) |
61 | 69 | /// - `','` for other locales (default, en_US style) |
| 70 | +/// Get the thousands/grouping separator for the given locale using ICU |
| 71 | +#[cfg(feature = "i18n-decimal")] |
| 72 | +fn get_grouping_separator_from_icu(loc: Locale) -> char { |
| 73 | + let data_locale = DataLocale::from(loc); |
| 74 | + |
| 75 | + let request = DataRequest { |
| 76 | + id: DataIdentifierBorrowed::for_locale(&data_locale), |
| 77 | + metadata: DataRequestMetadata::default(), |
| 78 | + }; |
| 79 | + |
| 80 | + let response: DataResponse<DecimalSymbolsV1> = |
| 81 | + icu_decimal::provider::Baked.load(request).unwrap(); |
| 82 | + |
| 83 | + let symbols = response.payload.get(); |
| 84 | + let grouping_str = symbols.grouping_separator(); |
| 85 | + |
| 86 | + // Convert the grouping separator string to a char |
| 87 | + // ICU returns a string, but we need a char. Take the first char or default to comma. |
| 88 | + grouping_str.chars().next().unwrap_or(',') |
| 89 | +} |
| 90 | + |
| 91 | +/// Get the cached thousands separator based on the current locale |
62 | 92 | fn get_thousands_separator() -> char { |
63 | | - // Try to read LC_NUMERIC or LANG environment variable |
64 | | - if let Ok(locale) = std::env::var("LC_NUMERIC") |
65 | | - .or_else(|_| std::env::var("LC_ALL")) |
66 | | - .or_else(|_| std::env::var("LANG")) |
67 | | - { |
68 | | - // C and POSIX locales have no thousands separator |
69 | | - if locale == "C" || locale == "POSIX" || locale.starts_with("C.") { |
70 | | - return '\0'; |
71 | | - } |
| 93 | + static THOUSANDS_SEP: OnceLock<char> = OnceLock::new(); |
72 | 94 |
|
73 | | - // Simple heuristic: European locales use period, others use comma |
74 | | - // This covers common cases like de_DE, fr_FR, it_IT, es_ES, nl_NL, etc. |
75 | | - if locale.starts_with("de_") |
76 | | - || locale.starts_with("fr_") |
77 | | - || locale.starts_with("it_") |
78 | | - || locale.starts_with("es_") |
79 | | - || locale.starts_with("nl_") |
80 | | - || locale.starts_with("pt_") |
81 | | - || locale.starts_with("da_") |
82 | | - || locale.starts_with("sv_") |
83 | | - || locale.starts_with("no_") |
84 | | - || locale.starts_with("fi_") |
| 95 | + *THOUSANDS_SEP.get_or_init(|| { |
| 96 | + #[cfg(feature = "i18n-decimal")] |
85 | 97 | { |
86 | | - return '.'; |
| 98 | + use crate::i18n::get_numeric_locale; |
| 99 | + let (locale, _encoding) = get_numeric_locale(); |
| 100 | + |
| 101 | + // C and POSIX locales have no thousands separator |
| 102 | + // The default locale from i18n is en-US-posix for C/POSIX |
| 103 | + if locale.to_string().contains("posix") { |
| 104 | + return '\0'; |
| 105 | + } |
| 106 | + |
| 107 | + get_grouping_separator_from_icu(locale.clone()) |
87 | 108 | } |
88 | | - } |
| 109 | + |
| 110 | + #[cfg(not(feature = "i18n-decimal"))] |
| 111 | + { |
| 112 | + // Fallback implementation when i18n-decimal feature is disabled |
| 113 | + // Try to read LC_NUMERIC or LANG environment variable |
| 114 | + if let Ok(locale) = std::env::var("LC_NUMERIC") |
| 115 | + .or_else(|_| std::env::var("LC_ALL")) |
| 116 | + .or_else(|_| std::env::var("LANG")) |
| 117 | + { |
| 118 | + // C and POSIX locales have no thousands separator |
| 119 | + if locale == "C" || locale == "POSIX" || locale.starts_with("C.") { |
| 120 | + return '\0'; |
| 121 | + } |
| 122 | + |
| 123 | + // Simple heuristic: European locales use period, others use comma |
| 124 | + // This covers common cases like de_DE, fr_FR, it_IT, es_ES, nl_NL, etc. |
| 125 | + if locale.starts_with("de_") |
| 126 | + || locale.starts_with("fr_") |
| 127 | + || locale.starts_with("it_") |
| 128 | + || locale.starts_with("es_") |
| 129 | + || locale.starts_with("nl_") |
| 130 | + || locale.starts_with("pt_") |
| 131 | + || locale.starts_with("da_") |
| 132 | + || locale.starts_with("sv_") |
| 133 | + || locale.starts_with("no_") |
| 134 | + || locale.starts_with("fi_") |
| 135 | + { |
| 136 | + return '.'; |
| 137 | + } |
| 138 | + } |
89 | 139 |
|
90 | | - // Default to comma (en_US style) |
91 | | - ',' |
| 140 | + // Default to comma (en_US style) |
| 141 | + ',' |
| 142 | + } |
| 143 | + }) |
92 | 144 | } |
93 | 145 |
|
94 | 146 | /// Format a number with thousands separators based on LC_NUMERIC locale. |
|
0 commit comments