Merge branch 'main' into mv-enotsup-xattr

oech3 · web-flow · commit 30541f6e49c9 · 2025-12-02T10:39:23.000+09:00
diff --git a/src/uu/od/src/prn_float.rs b/src/uu/od/src/prn_float.rs
@@ -37,8 +37,61 @@ pub static FORMAT_ITEM_BF16: FormatterItemInfo = FormatterItemInfo {
     formatter: FormatWriter::BFloatWriter(format_item_bf16),
 };
 
+/// Clean up a normalized float string by removing unnecessary padding and digits.
+/// - Strip leading spaces.
+/// - Trim trailing zeros after the decimal point (and the dot itself if empty).
+/// - Leave the exponent part (e/E...) untouched.
+fn trim_float_repr(raw: &str) -> String {
+    // Drop padding added by `format!` width specification
+    let mut s = raw.trim_start().to_string();
+
+    // Keep NaN/Inf representations as-is
+    let lower = s.to_ascii_lowercase();
+    if lower == "nan" || lower == "inf" || lower == "-inf" {
+        return s;
+    }
+
+    // Separate exponent from mantissa
+    let mut exp_part = String::new();
+    if let Some(idx) = s.find(['e', 'E']) {
+        exp_part = s[idx..].to_string();
+        s.truncate(idx);
+    }
+
+    // Trim trailing zeros in mantissa, then remove trailing dot if left alone
+    if s.contains('.') {
+        while s.ends_with('0') {
+            s.pop();
+        }
+        if s.ends_with('.') {
+            s.pop();
+        }
+    }
+
+    // If everything was trimmed, leave a single zero
+    if s.is_empty() || s == "-" || s == "+" {
+        s.push('0');
+    }
+
+    s.push_str(&exp_part);
+    s
+}
+
+/// Pad a floating value to a fixed width for column alignment while keeping
+/// the original precision (including trailing zeros). This mirrors the
+/// behavior of other float formatters (`f32`, `f64`) and keeps the output
+/// stable across platforms.
+fn pad_float_repr(raw: &str, width: usize) -> String {
+    format!("{raw:>width$}")
+}
+
 pub fn format_item_f16(f: f64) -> String {
-    format!(" {}", format_f16(f16::from_f64(f)))
+    let value = f16::from_f64(f);
+    let width = FORMAT_ITEM_F16.print_width - 1;
+    // Format once, trim redundant zeros, then re-pad to the canonical width
+    let raw = format_f16(value);
+    let trimmed = trim_float_repr(&raw);
+    format!(" {}", pad_float_repr(&trimmed, width))
 }
 
 pub fn format_item_f32(f: f64) -> String {
@@ -82,7 +135,10 @@ fn format_f64_exp_precision(f: f64, width: usize, precision: usize) -> String {
 
 pub fn format_item_bf16(f: f64) -> String {
     let bf = bf16::from_f32(f as f32);
-    format!(" {}", format_binary16_like(f, 15, 8, is_subnormal_bf16(bf)))
+    let width = FORMAT_ITEM_BF16.print_width - 1;
+    let raw = format_binary16_like(f64::from(bf), width, 8, is_subnormal_bf16(bf));
+    let trimmed = trim_float_repr(&raw);
+    format!(" {}", pad_float_repr(&trimmed, width))
 }
 
 fn format_f16(f: f16) -> String {
diff --git a/src/uucore/src/lib/features/checksum/validate.rs b/src/uucore/src/lib/features/checksum/validate.rs
@@ -3,15 +3,14 @@
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
 
-// spell-checker:ignore rsplit hexdigit bitlen bytelen invalidchecksum inva idchecksum xffname
+// spell-checker:ignore rsplit hexdigit bitlen invalidchecksum inva idchecksum xffname
 
 use std::borrow::Cow;
 use std::ffi::OsStr;
 use std::fmt::Display;
 use std::fs::File;
 use std::io::{self, BufReader, Read, Write, stdin};
 
-use data_encoding::BASE64;
 use os_display::Quotable;
 
 use crate::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, digest_reader, unescape_filename};
@@ -467,35 +466,45 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
 
 /// Extract the expected digest from the checksum string
 fn get_expected_digest_as_hex_string(
-    line_info: &LineInfo,
-    len_hint: Option<usize>,
+    checksum: &String,
+    byte_len_hint: Option<usize>,
 ) -> Option<Cow<'_, str>> {
-    let ck = &line_info.checksum;
-
-    let against_hint = |len| len_hint.is_none_or(|l| l == len);
-
-    if ck.len() % 2 != 0 {
+    if checksum.len() % 2 != 0 {
         // If the length of the digest is not a multiple of 2, then it
         // must be improperly formatted (1 hex digit is 2 characters)
         return None;
     }
 
-    // If the digest can be decoded as hexadecimal AND its length matches the
-    // one expected (in case it's given), just go with it.
-    if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && against_hint(ck.len()) {
-        return Some(Cow::Borrowed(ck));
+    let checks_hint = |len| byte_len_hint.is_none_or(|hint| hint == len);
+
+    // If the digest can be decoded as hexadecimal AND its byte length matches
+    // the one expected (in case it's given), just go with it.
+    if checksum.as_bytes().iter().all(u8::is_ascii_hexdigit) && checks_hint(checksum.len() / 2) {
+        return Some(checksum.as_str().into());
     }
 
-    // If hexadecimal digest fails for any reason, interpret the digest as base 64.
-    BASE64
-        .decode(ck.as_bytes()) // Decode the string as encoded base64
-        .map(hex::encode) // Encode it back as hexadecimal
-        .map(Cow::<str>::Owned)
-        .ok()
-        .and_then(|s| {
-            // Check the digest length
-            if against_hint(s.len()) { Some(s) } else { None }
-        })
+    // If hexadecimal digest fails for any reason, interpret the digest as base
+    // 64.
+
+    // But first, verify the encoded checksum length, which should be a
+    // multiple of 4.
+    if checksum.len() % 4 != 0 {
+        return None;
+    }
+
+    // Perform the decoding and be FORGIVING about it, to allow for checksums
+    // with invalid padding to still be decoded. This is enforced by
+    // `test_untagged_base64_matching_tag` in `test_cksum.rs`
+    //
+    // TODO: Ideally, we should not re-encode the result in hexadecimal, to avoid
+    // un-necessary computation.
+
+    match base64_simd::forgiving_decode_to_vec(checksum.as_bytes()) {
+        Ok(buffer) if checks_hint(buffer.len()) => Some(hex::encode(buffer).into()),
+        // The resulting length is not as expected
+        Ok(_) => None,
+        Err(_) => None,
+    }
 }
 
 /// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
@@ -691,12 +700,13 @@ fn process_algo_based_line(
     // If the digest bitlen is known, we can check the format of the expected
     // checksum with it.
     let digest_char_length_hint = match (algo_kind, algo_byte_len) {
-        (AlgoKind::Blake2b, Some(bytelen)) => Some(bytelen * 2),
+        (AlgoKind::Blake2b, Some(byte_len)) => Some(byte_len),
         _ => None,
     };
 
-    let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint)
-        .ok_or(LineCheckError::ImproperlyFormatted)?;
+    let expected_checksum =
+        get_expected_digest_as_hex_string(&line_info.checksum, digest_char_length_hint)
+            .ok_or(LineCheckError::ImproperlyFormatted)?;
 
     let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?;
 
@@ -719,7 +729,7 @@ fn process_non_algo_based_line(
         // Remove the leading asterisk if present - only for the first line
         filename_to_check = &filename_to_check[1..];
     }
-    let expected_checksum = get_expected_digest_as_hex_string(line_info, None)
+    let expected_checksum = get_expected_digest_as_hex_string(&line_info.checksum, None)
         .ok_or(LineCheckError::ImproperlyFormatted)?;
 
     // When a specific algorithm name is input, use it and use the provided
@@ -1173,7 +1183,7 @@ mod tests {
         let mut cached_line_format = None;
         let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
 
-        let result = get_expected_digest_as_hex_string(&line_info, None);
+        let result = get_expected_digest_as_hex_string(&line_info.checksum, None);
 
         assert_eq!(
             result.unwrap(),
@@ -1188,7 +1198,7 @@ mod tests {
         let mut cached_line_format = None;
         let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
 
-        let result = get_expected_digest_as_hex_string(&line_info, None);
+        let result = get_expected_digest_as_hex_string(&line_info.checksum, None);
 
         assert!(result.is_none());
     }
diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs
@@ -2458,6 +2458,71 @@ mod gnu_cksum_c {
         scene
     }
 
+    fn make_scene_with_comment() -> TestScenario {
+        let scene = make_scene();
+
+        scene
+            .fixtures
+            .append("CHECKSUMS", "# Very important comment\n");
+
+        scene
+    }
+
+    fn make_scene_with_invalid_line() -> TestScenario {
+        let scene = make_scene_with_comment();
+
+        scene.fixtures.append("CHECKSUMS", "invalid_line\n");
+
+        scene
+    }
+
+    #[test]
+    fn test_tagged_invalid_length() {
+        let (at, mut ucmd) = at_and_ucmd!();
+
+        at.write(
+            "sha2-bad-length.sum",
+            "SHA2-128 (/dev/null) = 38b060a751ac96384cd9327eb1b1e36a",
+        );
+
+        ucmd.arg("--check")
+            .arg("sha2-bad-length.sum")
+            .fails()
+            .stderr_contains("sha2-bad-length.sum: no properly formatted checksum lines found");
+    }
+
+    #[test]
+    #[cfg_attr(not(unix), ignore = "/dev/null is only available on UNIX")]
+    fn test_untagged_base64_matching_tag() {
+        let (at, mut ucmd) = at_and_ucmd!();
+
+        at.write("tag-prefix.sum", "SHA1+++++++++++++++++++++++=  /dev/null");
+
+        ucmd.arg("--check")
+            .arg("-a")
+            .arg("sha1")
+            .arg("tag-prefix.sum")
+            .fails()
+            .stderr_contains("WARNING: 1 computed checksum did NOT match");
+    }
+
+    #[test]
+    #[cfg_attr(windows, ignore = "Awkward filename is not supported on windows")]
+    fn test_awkward_filename() {
+        let ts = TestScenario::new(util_name!());
+        let at = &ts.fixtures;
+
+        let awkward_file = "abc (f) = abc";
+
+        at.touch(awkward_file);
+
+        let result = ts.ucmd().arg("-a").arg("sha1").arg(awkward_file).succeeds();
+
+        at.write_bytes("tag-awkward.sum", result.stdout());
+
+        ts.ucmd().arg("-c").arg("tag-awkward.sum").succeeds();
+    }
+
     #[test]
     #[ignore = "todo"]
     fn test_signed_checksums() {
@@ -2509,16 +2574,6 @@ mod gnu_cksum_c {
             .no_output();
     }
 
-    fn make_scene_with_comment() -> TestScenario {
-        let scene = make_scene();
-
-        scene
-            .fixtures
-            .append("CHECKSUMS", "# Very important comment\n");
-
-        scene
-    }
-
     #[test]
     fn test_status_with_comment() {
         let scene = make_scene_with_comment();
@@ -2532,14 +2587,6 @@ mod gnu_cksum_c {
             .no_output();
     }
 
-    fn make_scene_with_invalid_line() -> TestScenario {
-        let scene = make_scene_with_comment();
-
-        scene.fixtures.append("CHECKSUMS", "invalid_line\n");
-
-        scene
-    }
-
     #[test]
     fn test_check_strict() {
         let scene = make_scene_with_invalid_line();
diff --git a/tests/by-util/test_od.rs b/tests/by-util/test_od.rs
@@ -3,7 +3,7 @@
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
 
-// spell-checker:ignore abcdefghijklmnopqrstuvwxyz Anone fdbb littl
+// spell-checker:ignore abcdefghijklmnopqrstuvwxyz Anone fdbb littl bfloat
 
 #[cfg(unix)]
 use std::io::Read;
@@ -197,6 +197,32 @@ fn test_hex32() {
         .stdout_only(expected_output);
 }
 
+// Regression: 16-bit IEEE half should print with canonical precision (no spurious digits)
+#[test]
+fn test_float16_compact() {
+    let input: [u8; 4] = [0x3c, 0x00, 0x3c, 0x00]; // two times 1.0 in big-endian half
+    new_ucmd!()
+        .arg("--endian=big")
+        .arg("-An")
+        .arg("-tfH")
+        .run_piped_stdin(&input[..])
+        .success()
+        .stdout_only("               1               1\n");
+}
+
+// Regression: 16-bit bfloat should print with canonical precision (no spurious digits)
+#[test]
+fn test_bfloat16_compact() {
+    let input: [u8; 4] = [0x3f, 0x80, 0x3f, 0x80]; // two times 1.0 in big-endian bfloat16
+    new_ucmd!()
+        .arg("--endian=big")
+        .arg("-An")
+        .arg("-tfB")
+        .run_piped_stdin(&input[..])
+        .success()
+        .stdout_only("               1               1\n");
+}
+
 #[test]
 fn test_f16() {
     let input: [u8; 14] = [
@@ -210,7 +236,7 @@ fn test_f16() {
     ]; // 0x8400 -6.104e-5
     let expected_output = unindent(
         "
-            0000000       1.0000000               0              -0             inf
+            0000000               1               0              -0             inf
             0000010            -inf             NaN   -6.1035156e-5
             0000016
             ",
@@ -237,7 +263,7 @@ fn test_fh() {
     ]; // 0x8400 -6.1035156e-5
     let expected_output = unindent(
         "
-            0000000       1.0000000               0              -0             inf
+            0000000               1               0              -0             inf
             0000010            -inf             NaN   -6.1035156e-5
             0000016
         ",
@@ -264,7 +290,7 @@ fn test_fb() {
     ]; // -6.1035156e-5
     let expected_output = unindent(
         "
-            0000000       1.0000000               0              -0             inf
+            0000000               1               0              -0             inf
             0000010            -inf             NaN   -6.1035156e-5
             0000016
         ",