Skip to content

Commit 52b811d

Browse files
committed
sort: support l10n sorting
1 parent 32eef06 commit 52b811d

4 files changed

Lines changed: 28 additions & 3 deletions

File tree

src/uu/sort/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ self_cell = { workspace = true }
3434
tempfile = { workspace = true }
3535
thiserror = { workspace = true }
3636
unicode-width = { workspace = true }
37-
uucore = { workspace = true, features = ["fs", "parser", "version-cmp"] }
37+
uucore = { workspace = true, features = ["fs", "parser", "version-cmp", "i18n-collator"] }
3838
fluent = { workspace = true }
3939

4040
[target.'cfg(target_os = "linux")'.dependencies]

src/uu/sort/src/custom_str_cmp.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//! The goal is to compare strings without transforming them first (i.e. not allocating new strings)
99
1010
use std::cmp::Ordering;
11+
use uucore::i18n::collator::locale_cmp;
1112

1213
fn filter_char(c: u8, ignore_non_printing: bool, ignore_non_dictionary: bool) -> bool {
1314
if ignore_non_dictionary && !(c.is_ascii_alphanumeric() || c.is_ascii_whitespace()) {
@@ -35,8 +36,8 @@ pub fn custom_str_cmp(
3536
ignore_case: bool,
3637
) -> Ordering {
3738
if !(ignore_case || ignore_non_dictionary || ignore_non_printing) {
38-
// There are no custom settings. Fall back to the default strcmp, which is faster.
39-
return a.cmp(b);
39+
// There are no custom settings. Fall back to locale-aware comparison.
40+
return locale_cmp(a, b);
4041
}
4142
let mut a_chars = a
4243
.iter()

src/uu/sort/src/sort.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use uucore::error::{FromIo, strip_errno};
4646
use uucore::error::{UError, UResult, USimpleError, UUsageError};
4747
use uucore::extendedbigdecimal::ExtendedBigDecimal;
4848
use uucore::format_usage;
49+
use uucore::i18n::collator::CollatorOptions;
4950
use uucore::line_ending::LineEnding;
5051
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
5152
use uucore::parser::parse_size::{ParseSizeError, Parser};
@@ -1040,6 +1041,9 @@ const STDIN_FILE: &str = "-";
10401041
#[uucore::main]
10411042
#[allow(clippy::cognitive_complexity)]
10421043
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
1044+
// Initialize locale-aware collator for string comparisons
1045+
uucore::i18n::collator::try_init_collator(CollatorOptions::default());
1046+
10431047
let mut settings = GlobalSettings::default();
10441048

10451049
let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?;

tests/by-util/test_sort.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1906,4 +1906,24 @@ fn test_color_environment_variables() {
19061906
}
19071907
}
19081908

1909+
#[test]
1910+
fn test_locale_sorting_c() {
1911+
// Test LC_ALL=C locale sorting (should sort by byte values)
1912+
new_ucmd!()
1913+
.env("LC_ALL", "C")
1914+
.pipe_in("a\no\nu\nä\nö\nü\n")
1915+
.succeeds()
1916+
.stdout_is("a\no\nu\nä\nö\nü\n");
1917+
}
1918+
1919+
#[test]
1920+
fn test_locale_sorting_german() {
1921+
// Test LC_ALL=de_DE.utf-8 locale sorting (should respect German collation)
1922+
new_ucmd!()
1923+
.env("LC_ALL", "de_DE.utf-8")
1924+
.pipe_in("a\no\nu\nä\nö\nü\n")
1925+
.succeeds()
1926+
.stdout_is("a\nä\no\nö\nu\nü\n");
1927+
}
1928+
19091929
/* spell-checker: enable */

0 commit comments

Comments
 (0)