Skip to content

Commit 30541f6

Browse files
authored
Merge branch 'main' into mv-enotsup-xattr
2 parents 767ac46 + b2feb82 commit 30541f6

4 files changed

Lines changed: 192 additions & 53 deletions

File tree

src/uu/od/src/prn_float.rs

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,61 @@ pub static FORMAT_ITEM_BF16: FormatterItemInfo = FormatterItemInfo {
3737
formatter: FormatWriter::BFloatWriter(format_item_bf16),
3838
};
3939

40+
/// Clean up a normalized float string by removing unnecessary padding and digits.
41+
/// - Strip leading spaces.
42+
/// - Trim trailing zeros after the decimal point (and the dot itself if empty).
43+
/// - Leave the exponent part (e/E...) untouched.
44+
fn trim_float_repr(raw: &str) -> String {
45+
// Drop padding added by `format!` width specification
46+
let mut s = raw.trim_start().to_string();
47+
48+
// Keep NaN/Inf representations as-is
49+
let lower = s.to_ascii_lowercase();
50+
if lower == "nan" || lower == "inf" || lower == "-inf" {
51+
return s;
52+
}
53+
54+
// Separate exponent from mantissa
55+
let mut exp_part = String::new();
56+
if let Some(idx) = s.find(['e', 'E']) {
57+
exp_part = s[idx..].to_string();
58+
s.truncate(idx);
59+
}
60+
61+
// Trim trailing zeros in mantissa, then remove trailing dot if left alone
62+
if s.contains('.') {
63+
while s.ends_with('0') {
64+
s.pop();
65+
}
66+
if s.ends_with('.') {
67+
s.pop();
68+
}
69+
}
70+
71+
// If everything was trimmed, leave a single zero
72+
if s.is_empty() || s == "-" || s == "+" {
73+
s.push('0');
74+
}
75+
76+
s.push_str(&exp_part);
77+
s
78+
}
79+
80+
/// Pad a floating value to a fixed width for column alignment while keeping
81+
/// the original precision (including trailing zeros). This mirrors the
82+
/// behavior of other float formatters (`f32`, `f64`) and keeps the output
83+
/// stable across platforms.
84+
fn pad_float_repr(raw: &str, width: usize) -> String {
85+
format!("{raw:>width$}")
86+
}
87+
4088
pub fn format_item_f16(f: f64) -> String {
41-
format!(" {}", format_f16(f16::from_f64(f)))
89+
let value = f16::from_f64(f);
90+
let width = FORMAT_ITEM_F16.print_width - 1;
91+
// Format once, trim redundant zeros, then re-pad to the canonical width
92+
let raw = format_f16(value);
93+
let trimmed = trim_float_repr(&raw);
94+
format!(" {}", pad_float_repr(&trimmed, width))
4295
}
4396

4497
pub fn format_item_f32(f: f64) -> String {
@@ -82,7 +135,10 @@ fn format_f64_exp_precision(f: f64, width: usize, precision: usize) -> String {
82135

83136
pub fn format_item_bf16(f: f64) -> String {
84137
let bf = bf16::from_f32(f as f32);
85-
format!(" {}", format_binary16_like(f, 15, 8, is_subnormal_bf16(bf)))
138+
let width = FORMAT_ITEM_BF16.print_width - 1;
139+
let raw = format_binary16_like(f64::from(bf), width, 8, is_subnormal_bf16(bf));
140+
let trimmed = trim_float_repr(&raw);
141+
format!(" {}", pad_float_repr(&trimmed, width))
86142
}
87143

88144
fn format_f16(f: f16) -> String {

src/uucore/src/lib/features/checksum/validate.rs

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,14 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6-
// spell-checker:ignore rsplit hexdigit bitlen bytelen invalidchecksum inva idchecksum xffname
6+
// spell-checker:ignore rsplit hexdigit bitlen invalidchecksum inva idchecksum xffname
77

88
use std::borrow::Cow;
99
use std::ffi::OsStr;
1010
use std::fmt::Display;
1111
use std::fs::File;
1212
use std::io::{self, BufReader, Read, Write, stdin};
1313

14-
use data_encoding::BASE64;
1514
use os_display::Quotable;
1615

1716
use crate::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, digest_reader, unescape_filename};
@@ -467,35 +466,45 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
467466

468467
/// Extract the expected digest from the checksum string
469468
fn get_expected_digest_as_hex_string(
470-
line_info: &LineInfo,
471-
len_hint: Option<usize>,
469+
checksum: &String,
470+
byte_len_hint: Option<usize>,
472471
) -> Option<Cow<'_, str>> {
473-
let ck = &line_info.checksum;
474-
475-
let against_hint = |len| len_hint.is_none_or(|l| l == len);
476-
477-
if ck.len() % 2 != 0 {
472+
if checksum.len() % 2 != 0 {
478473
// If the length of the digest is not a multiple of 2, then it
479474
// must be improperly formatted (1 hex digit is 2 characters)
480475
return None;
481476
}
482477

483-
// If the digest can be decoded as hexadecimal AND its length matches the
484-
// one expected (in case it's given), just go with it.
485-
if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && against_hint(ck.len()) {
486-
return Some(Cow::Borrowed(ck));
478+
let checks_hint = |len| byte_len_hint.is_none_or(|hint| hint == len);
479+
480+
// If the digest can be decoded as hexadecimal AND its byte length matches
481+
// the one expected (in case it's given), just go with it.
482+
if checksum.as_bytes().iter().all(u8::is_ascii_hexdigit) && checks_hint(checksum.len() / 2) {
483+
return Some(checksum.as_str().into());
487484
}
488485

489-
// If hexadecimal digest fails for any reason, interpret the digest as base 64.
490-
BASE64
491-
.decode(ck.as_bytes()) // Decode the string as encoded base64
492-
.map(hex::encode) // Encode it back as hexadecimal
493-
.map(Cow::<str>::Owned)
494-
.ok()
495-
.and_then(|s| {
496-
// Check the digest length
497-
if against_hint(s.len()) { Some(s) } else { None }
498-
})
486+
// If hexadecimal digest fails for any reason, interpret the digest as base
487+
// 64.
488+
489+
// But first, verify the encoded checksum length, which should be a
490+
// multiple of 4.
491+
if checksum.len() % 4 != 0 {
492+
return None;
493+
}
494+
495+
// Perform the decoding and be FORGIVING about it, to allow for checksums
496+
// with invalid padding to still be decoded. This is enforced by
497+
// `test_untagged_base64_matching_tag` in `test_cksum.rs`
498+
//
499+
// TODO: Ideally, we should not re-encode the result in hexadecimal, to avoid
500+
// un-necessary computation.
501+
502+
match base64_simd::forgiving_decode_to_vec(checksum.as_bytes()) {
503+
Ok(buffer) if checks_hint(buffer.len()) => Some(hex::encode(buffer).into()),
504+
// The resulting length is not as expected
505+
Ok(_) => None,
506+
Err(_) => None,
507+
}
499508
}
500509

501510
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
@@ -691,12 +700,13 @@ fn process_algo_based_line(
691700
// If the digest bitlen is known, we can check the format of the expected
692701
// checksum with it.
693702
let digest_char_length_hint = match (algo_kind, algo_byte_len) {
694-
(AlgoKind::Blake2b, Some(bytelen)) => Some(bytelen * 2),
703+
(AlgoKind::Blake2b, Some(byte_len)) => Some(byte_len),
695704
_ => None,
696705
};
697706

698-
let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint)
699-
.ok_or(LineCheckError::ImproperlyFormatted)?;
707+
let expected_checksum =
708+
get_expected_digest_as_hex_string(&line_info.checksum, digest_char_length_hint)
709+
.ok_or(LineCheckError::ImproperlyFormatted)?;
700710

701711
let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?;
702712

@@ -719,7 +729,7 @@ fn process_non_algo_based_line(
719729
// Remove the leading asterisk if present - only for the first line
720730
filename_to_check = &filename_to_check[1..];
721731
}
722-
let expected_checksum = get_expected_digest_as_hex_string(line_info, None)
732+
let expected_checksum = get_expected_digest_as_hex_string(&line_info.checksum, None)
723733
.ok_or(LineCheckError::ImproperlyFormatted)?;
724734

725735
// When a specific algorithm name is input, use it and use the provided
@@ -1173,7 +1183,7 @@ mod tests {
11731183
let mut cached_line_format = None;
11741184
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
11751185

1176-
let result = get_expected_digest_as_hex_string(&line_info, None);
1186+
let result = get_expected_digest_as_hex_string(&line_info.checksum, None);
11771187

11781188
assert_eq!(
11791189
result.unwrap(),
@@ -1188,7 +1198,7 @@ mod tests {
11881198
let mut cached_line_format = None;
11891199
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
11901200

1191-
let result = get_expected_digest_as_hex_string(&line_info, None);
1201+
let result = get_expected_digest_as_hex_string(&line_info.checksum, None);
11921202

11931203
assert!(result.is_none());
11941204
}

tests/by-util/test_cksum.rs

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2458,6 +2458,71 @@ mod gnu_cksum_c {
24582458
scene
24592459
}
24602460

2461+
fn make_scene_with_comment() -> TestScenario {
2462+
let scene = make_scene();
2463+
2464+
scene
2465+
.fixtures
2466+
.append("CHECKSUMS", "# Very important comment\n");
2467+
2468+
scene
2469+
}
2470+
2471+
fn make_scene_with_invalid_line() -> TestScenario {
2472+
let scene = make_scene_with_comment();
2473+
2474+
scene.fixtures.append("CHECKSUMS", "invalid_line\n");
2475+
2476+
scene
2477+
}
2478+
2479+
#[test]
2480+
fn test_tagged_invalid_length() {
2481+
let (at, mut ucmd) = at_and_ucmd!();
2482+
2483+
at.write(
2484+
"sha2-bad-length.sum",
2485+
"SHA2-128 (/dev/null) = 38b060a751ac96384cd9327eb1b1e36a",
2486+
);
2487+
2488+
ucmd.arg("--check")
2489+
.arg("sha2-bad-length.sum")
2490+
.fails()
2491+
.stderr_contains("sha2-bad-length.sum: no properly formatted checksum lines found");
2492+
}
2493+
2494+
#[test]
2495+
#[cfg_attr(not(unix), ignore = "/dev/null is only available on UNIX")]
2496+
fn test_untagged_base64_matching_tag() {
2497+
let (at, mut ucmd) = at_and_ucmd!();
2498+
2499+
at.write("tag-prefix.sum", "SHA1+++++++++++++++++++++++= /dev/null");
2500+
2501+
ucmd.arg("--check")
2502+
.arg("-a")
2503+
.arg("sha1")
2504+
.arg("tag-prefix.sum")
2505+
.fails()
2506+
.stderr_contains("WARNING: 1 computed checksum did NOT match");
2507+
}
2508+
2509+
#[test]
2510+
#[cfg_attr(windows, ignore = "Awkward filename is not supported on windows")]
2511+
fn test_awkward_filename() {
2512+
let ts = TestScenario::new(util_name!());
2513+
let at = &ts.fixtures;
2514+
2515+
let awkward_file = "abc (f) = abc";
2516+
2517+
at.touch(awkward_file);
2518+
2519+
let result = ts.ucmd().arg("-a").arg("sha1").arg(awkward_file).succeeds();
2520+
2521+
at.write_bytes("tag-awkward.sum", result.stdout());
2522+
2523+
ts.ucmd().arg("-c").arg("tag-awkward.sum").succeeds();
2524+
}
2525+
24612526
#[test]
24622527
#[ignore = "todo"]
24632528
fn test_signed_checksums() {
@@ -2509,16 +2574,6 @@ mod gnu_cksum_c {
25092574
.no_output();
25102575
}
25112576

2512-
fn make_scene_with_comment() -> TestScenario {
2513-
let scene = make_scene();
2514-
2515-
scene
2516-
.fixtures
2517-
.append("CHECKSUMS", "# Very important comment\n");
2518-
2519-
scene
2520-
}
2521-
25222577
#[test]
25232578
fn test_status_with_comment() {
25242579
let scene = make_scene_with_comment();
@@ -2532,14 +2587,6 @@ mod gnu_cksum_c {
25322587
.no_output();
25332588
}
25342589

2535-
fn make_scene_with_invalid_line() -> TestScenario {
2536-
let scene = make_scene_with_comment();
2537-
2538-
scene.fixtures.append("CHECKSUMS", "invalid_line\n");
2539-
2540-
scene
2541-
}
2542-
25432590
#[test]
25442591
fn test_check_strict() {
25452592
let scene = make_scene_with_invalid_line();

tests/by-util/test_od.rs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6-
// spell-checker:ignore abcdefghijklmnopqrstuvwxyz Anone fdbb littl
6+
// spell-checker:ignore abcdefghijklmnopqrstuvwxyz Anone fdbb littl bfloat
77

88
#[cfg(unix)]
99
use std::io::Read;
@@ -197,6 +197,32 @@ fn test_hex32() {
197197
.stdout_only(expected_output);
198198
}
199199

200+
// Regression: 16-bit IEEE half should print with canonical precision (no spurious digits)
201+
#[test]
202+
fn test_float16_compact() {
203+
let input: [u8; 4] = [0x3c, 0x00, 0x3c, 0x00]; // two times 1.0 in big-endian half
204+
new_ucmd!()
205+
.arg("--endian=big")
206+
.arg("-An")
207+
.arg("-tfH")
208+
.run_piped_stdin(&input[..])
209+
.success()
210+
.stdout_only(" 1 1\n");
211+
}
212+
213+
// Regression: 16-bit bfloat should print with canonical precision (no spurious digits)
214+
#[test]
215+
fn test_bfloat16_compact() {
216+
let input: [u8; 4] = [0x3f, 0x80, 0x3f, 0x80]; // two times 1.0 in big-endian bfloat16
217+
new_ucmd!()
218+
.arg("--endian=big")
219+
.arg("-An")
220+
.arg("-tfB")
221+
.run_piped_stdin(&input[..])
222+
.success()
223+
.stdout_only(" 1 1\n");
224+
}
225+
200226
#[test]
201227
fn test_f16() {
202228
let input: [u8; 14] = [
@@ -210,7 +236,7 @@ fn test_f16() {
210236
]; // 0x8400 -6.104e-5
211237
let expected_output = unindent(
212238
"
213-
0000000 1.0000000 0 -0 inf
239+
0000000 1 0 -0 inf
214240
0000010 -inf NaN -6.1035156e-5
215241
0000016
216242
",
@@ -237,7 +263,7 @@ fn test_fh() {
237263
]; // 0x8400 -6.1035156e-5
238264
let expected_output = unindent(
239265
"
240-
0000000 1.0000000 0 -0 inf
266+
0000000 1 0 -0 inf
241267
0000010 -inf NaN -6.1035156e-5
242268
0000016
243269
",
@@ -264,7 +290,7 @@ fn test_fb() {
264290
]; // -6.1035156e-5
265291
let expected_output = unindent(
266292
"
267-
0000000 1.0000000 0 -0 inf
293+
0000000 1 0 -0 inf
268294
0000010 -inf NaN -6.1035156e-5
269295
0000016
270296
",

0 commit comments

Comments
 (0)