Skip to content

Commit 6b49ff9

Browse files
authored
sort: gnu coreutils compatibility (sort float.sh) (#9839)
* feat(sort): support international decimal separators in numeric sorting M
1 parent d806231 commit 6b49ff9

4 files changed

Lines changed: 93 additions & 8 deletions

File tree

fuzz/Cargo.lock

Lines changed: 34 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/sort/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,12 @@ self_cell = { workspace = true }
3434
tempfile = { workspace = true }
3535
thiserror = { workspace = true }
3636
unicode-width = { workspace = true }
37-
uucore = { workspace = true, features = ["fs", "parser-size", "version-cmp"] }
37+
uucore = { workspace = true, features = [
38+
"fs",
39+
"parser-size",
40+
"version-cmp",
41+
"i18n-decimal",
42+
] }
3843
fluent = { workspace = true }
3944

4045
[target.'cfg(unix)'.dependencies]

src/uu/sort/src/sort.rs

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ use uucore::error::{FromIo, strip_errno};
4747
use uucore::error::{UError, UResult, USimpleError, UUsageError};
4848
use uucore::extendedbigdecimal::ExtendedBigDecimal;
4949
use uucore::format_usage;
50+
use uucore::i18n::decimal::locale_decimal_separator;
5051
use uucore::line_ending::LineEnding;
5152
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
5253
use uucore::parser::parse_size::{ParseSizeError, Parser};
@@ -106,6 +107,14 @@ mod options {
106107

107108
const DECIMAL_PT: u8 = b'.';
108109

110+
fn locale_decimal_pt() -> u8 {
111+
match locale_decimal_separator().as_bytes().first().copied() {
112+
Some(b'.') => b'.',
113+
Some(b',') => b',',
114+
_ => DECIMAL_PT,
115+
}
116+
}
117+
109118
const NEGATIVE: &u8 = &b'-';
110119
const POSITIVE: &u8 = &b'+';
111120

@@ -683,8 +692,8 @@ impl<'a> Line<'a> {
683692
}
684693
SortMode::GeneralNumeric => {
685694
let initial_selection = &self.line[selection.clone()];
686-
687-
let leading = get_leading_gen(initial_selection);
695+
let decimal_pt = locale_decimal_pt();
696+
let leading = get_leading_gen(initial_selection, decimal_pt);
688697

689698
// Shorten selection to leading.
690699
selection.start += leading.start;
@@ -1072,7 +1081,11 @@ impl FieldSelector {
10721081
Selection::WithNumInfo(range_str, info)
10731082
} else if self.settings.mode == SortMode::GeneralNumeric {
10741083
// Parse this number as BigDecimal, as this is the requirement for general numeric sorting.
1075-
Selection::AsBigDecimal(general_bd_parse(&range_str[get_leading_gen(range_str)]))
1084+
let decimal_pt = locale_decimal_pt();
1085+
Selection::AsBigDecimal(general_bd_parse(
1086+
&range_str[get_leading_gen(range_str, decimal_pt)],
1087+
decimal_pt,
1088+
))
10761089
} else {
10771090
// This is not a numeric sort, so we don't need a NumCache.
10781091
Selection::Str(range_str)
@@ -2491,7 +2504,7 @@ fn ascii_case_insensitive_cmp(a: &[u8], b: &[u8]) -> Ordering {
24912504
// scientific notation, so we strip those lines only after the end of the following numeric string.
24922505
// For example, 5e10KFD would be 5e10 or 5x10^10 and +10000HFKJFK would become 10000.
24932506
#[allow(clippy::cognitive_complexity)]
2494-
fn get_leading_gen(inp: &[u8]) -> Range<usize> {
2507+
fn get_leading_gen(inp: &[u8], decimal_pt: u8) -> Range<usize> {
24952508
let trimmed = inp.trim_ascii_start();
24962509
let leading_whitespace_len = inp.len() - trimmed.len();
24972510

@@ -2529,7 +2542,7 @@ fn get_leading_gen(inp: &[u8]) -> Range<usize> {
25292542
continue;
25302543
}
25312544

2532-
if c == DECIMAL_PT && !had_decimal_pt && !had_e_notation {
2545+
if c == decimal_pt && !had_decimal_pt && !had_e_notation {
25332546
had_decimal_pt = true;
25342547
continue;
25352548
}
@@ -2572,9 +2585,16 @@ pub enum GeneralBigDecimalParseResult {
25722585
/// Parse the beginning string into a [`GeneralBigDecimalParseResult`].
25732586
/// Using a [`GeneralBigDecimalParseResult`] instead of [`ExtendedBigDecimal`] is necessary to correctly order floats.
25742587
#[inline(always)]
2575-
fn general_bd_parse(a: &[u8]) -> GeneralBigDecimalParseResult {
2588+
fn general_bd_parse(a: &[u8], decimal_pt: u8) -> GeneralBigDecimalParseResult {
2589+
let parsed_bytes = (decimal_pt != DECIMAL_PT).then(|| {
2590+
a.iter()
2591+
.map(|&b| if b == decimal_pt { DECIMAL_PT } else { b })
2592+
.collect::<Vec<_>>()
2593+
});
2594+
let input = parsed_bytes.as_deref().unwrap_or(a);
2595+
25762596
// The string should be valid ASCII to be parsed.
2577-
let Ok(a) = std::str::from_utf8(a) else {
2597+
let Ok(a) = std::str::from_utf8(input) else {
25782598
return GeneralBigDecimalParseResult::Invalid;
25792599
};
25802600

tests/by-util/test_sort.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,32 @@ fn test_g_float() {
16311631
.stdout_is(output);
16321632
}
16331633

1634+
#[test]
1635+
fn test_g_float_locale_decimal_separator() {
1636+
let Ok(locale_fr_utf8) = env::var("LOCALE_FR_UTF8") else {
1637+
return;
1638+
};
1639+
if locale_fr_utf8 == "none" {
1640+
return;
1641+
}
1642+
1643+
let ts = TestScenario::new("sort");
1644+
1645+
ts.ucmd()
1646+
.env("LC_ALL", &locale_fr_utf8)
1647+
.args(&["-g", "--stable"])
1648+
.pipe_in("1,9\n1,10\n")
1649+
.succeeds()
1650+
.stdout_is("1,10\n1,9\n");
1651+
1652+
ts.ucmd()
1653+
.env("LC_ALL", &locale_fr_utf8)
1654+
.args(&["-g", "--stable"])
1655+
.pipe_in("1.9\n1.10\n")
1656+
.succeeds()
1657+
.stdout_is("1.10\n1.9\n");
1658+
}
1659+
16341660
#[test]
16351661
// Test misc numbers ("'a" is not interpreted as literal, trailing text is ignored...)
16361662
fn test_g_misc() {

0 commit comments

Comments
 (0)