Skip to content

Commit 67480dd

Browse files
authored
Merge branch 'main' into fold
2 parents d59d98e + bc58d3d commit 67480dd

9 files changed

Lines changed: 128 additions & 57 deletions

File tree

.vscode/cspell.dictionaries/shell.wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ flamegraph
101101
flamegraphs
102102
gcov
103103
gmake
104+
gnuls
104105
grcov
105106
grep
106107
markdownlint

docs/src/performance.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ hyperfine \
4141
"{ls} -R ."
4242
```
4343

44+
For Ubuntu 25.10 and other distributions that use uutils by default, replace `bin/ls` with `bin/gnuls`. Also:
45+
4446
```
4547
# to improve the reproducibility of the results:
4648
taskset -c 0

src/uu/cut/locales/en-US.ftl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,6 @@ cut-error-delimiter-and-whitespace-conflict = invalid input: Only one of --delim
109109
cut-error-delimiter-must-be-single-character = the delimiter must be a single character
110110
cut-error-multiple-mode-args = invalid usage: expects no more than one of --fields (-f), --chars (-c) or --bytes (-b)
111111
cut-error-missing-mode-arg = invalid usage: expects one of --fields (-f), --chars (-c) or --bytes (-b)
112-
cut-error-delimiter-only-with-fields = invalid input: The '--delimiter' ('-d') option only usable if printing a sequence of fields
113-
cut-error-whitespace-only-with-fields = invalid input: The '-w' option only usable if printing a sequence of fields
114-
cut-error-only-delimited-only-with-fields = invalid input: The '--only-delimited' ('-s') option only usable if printing a sequence of fields
112+
cut-error-delimiter-only-with-fields = invalid input: The '--delimiter' ('-d') option can only be used when printing a sequence of fields
113+
cut-error-whitespace-only-with-fields = invalid input: The '-w' option can only be used when printing a sequence of fields
114+
cut-error-only-delimited-only-with-fields = invalid input: The '--only-delimited' ('-s') option can only be used when printing a sequence of fields

src/uu/pr/src/pr.rs

Lines changed: 24 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct FileLine {
9191
file_id: usize,
9292
page_number: usize,
9393
line_number: usize,
94-
line_content: String,
94+
line_content: Vec<u8>,
9595
}
9696

9797
impl FileLine {
@@ -101,29 +101,24 @@ impl FileLine {
101101
line_number: usize,
102102
buf: &[u8],
103103
options: &OutputOptions,
104-
) -> Result<Self, PrError> {
105-
// TODO Don't read bytes to String just to directly write them
106-
// out again anyway.
104+
) -> Self {
107105
let line_content = if let Some(expand_tabs) = &options.expand_tabs {
108-
// Anticipate a few expandable chars to reduce reallocations
109-
let mut line_content =
110-
String::with_capacity(buf.len() + buf.len() / 20 * expand_tabs.width as usize);
111-
// validate utf correctness
112-
let s = std::str::from_utf8(buf)?;
113-
for b in s.as_bytes() {
114-
apply_expand_tab(&mut line_content, *b, expand_tabs);
106+
let mut result =
107+
Vec::with_capacity(buf.len() + buf.len() / 20 * expand_tabs.width as usize);
108+
for b in buf {
109+
apply_expand_tab(&mut result, *b, expand_tabs);
115110
}
116-
line_content
111+
result
117112
} else {
118-
String::from_utf8(buf.to_vec())?
113+
buf.to_vec()
119114
};
120115

121-
Ok(Self {
116+
Self {
122117
file_id,
123118
page_number,
124119
line_number,
125120
line_content,
126-
})
121+
}
127122
}
128123
}
129124

@@ -858,24 +853,24 @@ fn read_to_end(path: &str) -> Result<Vec<u8>, std::io::Error> {
858853
}
859854
}
860855

861-
fn apply_expand_tab(chunk: &mut String, byte: u8, expand_options: &ExpandTabsOptions) {
856+
fn apply_expand_tab(chunk: &mut Vec<u8>, byte: u8, expand_options: &ExpandTabsOptions) {
862857
if byte == expand_options.input_char as u8 {
863858
// If the byte encountered is the input char we use width to calculate
864859
// the amount of spaces needed (if no input char given we stored '\t'
865860
// in our struct)
866861
let spaces_needed =
867862
expand_options.width as usize - (chunk.len() % expand_options.width as usize);
868-
chunk.extend(std::iter::repeat_n(' ', spaces_needed));
863+
chunk.extend(std::iter::repeat_n(b' ', spaces_needed));
869864
} else if byte == TAB as u8 {
870865
// If a byte got passed to the -e flag (eg -ea1) which is not '\t' GNU
871866
// still expands it but does not use an optionally given width parameter
872867
// but does the '\t' expansion with the default value (8)
873868
let spaces_needed = 8 - (chunk.len() % 8);
874-
chunk.extend(std::iter::repeat_n(' ', spaces_needed));
869+
chunk.extend(std::iter::repeat_n(b' ', spaces_needed));
875870
} else {
876871
// This arm means the byte is neither '\t' nor the bytes to be
877872
// expanded
878-
chunk.push(byte as char);
873+
chunk.push(byte);
879874
}
880875
}
881876

@@ -885,7 +880,7 @@ fn pr(path: &str, options: &OutputOptions) -> Result<i32, PrError> {
885880
// TODO Read incrementally.
886881
let buf = read_to_end(path)?;
887882

888-
let pages = get_pages(options, 0, &buf)?;
883+
let pages = get_pages(options, 0, &buf);
889884

890885
// Split the text into pages, and then print each line in each page.
891886
for page_with_page_number in pages {
@@ -901,14 +896,7 @@ fn pr(path: &str, options: &OutputOptions) -> Result<i32, PrError> {
901896
///
902897
/// Returns a list of the form `(page_num, lines)`.
903898
///
904-
/// # Errors
905-
///
906-
/// Returns an error if the bytes are not a valid UTF-8 string.
907-
fn get_pages(
908-
options: &OutputOptions,
909-
file_id: usize,
910-
buf: &[u8],
911-
) -> Result<Vec<(usize, Vec<FileLine>)>, PrError> {
899+
fn get_pages(options: &OutputOptions, file_id: usize, buf: &[u8]) -> Vec<(usize, Vec<FileLine>)> {
912900
let start_page = options.start_page;
913901
let end_page = options.end_page;
914902
let lines_needed_per_page = lines_to_read_for_page(options);
@@ -944,7 +932,7 @@ fn get_pages(
944932
// `\f` as its own line; instead ignore the empty line.
945933
} else {
946934
let file_line =
947-
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?;
935+
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options);
948936
page.push(file_line);
949937
}
950938

@@ -970,7 +958,7 @@ fn get_pages(
970958
// `\n` as its own line; instead ignore the empty line.
971959
} else {
972960
let file_line =
973-
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?;
961+
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options);
974962
page.push(file_line);
975963
line_num += 1;
976964
}
@@ -992,7 +980,7 @@ fn get_pages(
992980

993981
// Consider all trailing bytes as the last line.
994982
if prev < buf.len() {
995-
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..], options)?;
983+
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..], options);
996984
page.push(file_line);
997985
}
998986

@@ -1001,7 +989,7 @@ fn get_pages(
1001989
pages.push((page_num, page.clone()));
1002990
}
1003991

1004-
Ok(pages)
992+
pages
1005993
}
1006994

1007995
/// Key used to group lines together according to their file and page number.
@@ -1056,7 +1044,7 @@ fn get_file_line_groups(
10561044

10571045
// Split the text into pages and collect each line for
10581046
// subsequent grouping.
1059-
for (_, mut page) in get_pages(options, file_id, &buf)? {
1047+
for (_, mut page) in get_pages(options, file_id, &buf) {
10601048
all_lines.append(&mut page);
10611049
}
10621050
}
@@ -1291,7 +1279,9 @@ fn get_line_for_printing(
12911279
let blank_line = String::new();
12921280
let formatted_line_number = get_formatted_line_number(options, file_line.line_number, index);
12931281

1294-
let mut complete_line = format!("{formatted_line_number}{}", file_line.line_content);
1282+
// TODO: support non-UTF-8 bytes (currently replaced with U+FFFD)
1283+
let content = String::from_utf8_lossy(&file_line.line_content);
1284+
let mut complete_line = format!("{formatted_line_number}{content}");
12951285

12961286
let offset_spaces = &options.offset_spaces;
12971287

src/uucore/src/lib/features/checksum/mod.rs

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -250,14 +250,15 @@ pub enum SizedAlgoKind {
250250
Sha3(ShaLength),
251251
// Note: we store Blake2b's length as BYTES.
252252
Blake2b(Option<usize>),
253+
// Shake* length are stored in bits.
253254
Shake128(Option<usize>),
254255
Shake256(Option<usize>),
255256
}
256257

257258
impl SizedAlgoKind {
258-
pub fn from_unsized(kind: AlgoKind, byte_length: Option<usize>) -> UResult<Self> {
259+
pub fn from_unsized(kind: AlgoKind, output_length: Option<usize>) -> UResult<Self> {
259260
use AlgoKind as ak;
260-
match (kind, byte_length) {
261+
match (kind, output_length) {
261262
(
262263
ak::Sysv
263264
| ak::Bsd
@@ -305,19 +306,26 @@ impl SizedAlgoKind {
305306
}
306307

307308
pub fn to_tag(self) -> String {
308-
use SizedAlgoKind::*;
309309
match self {
310-
Md5 => "MD5".into(),
311-
Sm3 => "SM3".into(),
312-
Sha1 => "SHA1".into(),
313-
Blake3 => "BLAKE3".into(),
314-
Sha2(len) => format!("SHA{}", len.as_usize()),
315-
Sha3(len) => format!("SHA3-{}", len.as_usize()),
316-
Blake2b(Some(byte_len)) => format!("BLAKE2b-{}", byte_len * 8),
317-
Blake2b(None) => "BLAKE2b".into(),
318-
Shake128(_) => "SHAKE128".into(),
319-
Shake256(_) => "SHAKE256".into(),
320-
Sysv | Bsd | Crc | Crc32b => panic!("Should not be used for tagging"),
310+
Self::Md5 => "MD5".into(),
311+
Self::Sm3 => "SM3".into(),
312+
Self::Sha1 => "SHA1".into(),
313+
Self::Blake3 => "BLAKE3".into(),
314+
Self::Sha2(len) => format!("SHA{}", len.as_usize()),
315+
Self::Sha3(len) => format!("SHA3-{}", len.as_usize()),
316+
Self::Blake2b(Some(byte_len)) => format!("BLAKE2b-{}", byte_len * 8),
317+
Self::Blake2b(None) => "BLAKE2b".into(),
318+
Self::Shake128(opt_bit_len) => format!(
319+
"SHAKE128-{}",
320+
opt_bit_len.unwrap_or(Shake128::DEFAULT_BIT_SIZE)
321+
),
322+
Self::Shake256(opt_bit_len) => format!(
323+
"SHAKE256-{}",
324+
opt_bit_len.unwrap_or(Shake256::DEFAULT_BIT_SIZE)
325+
),
326+
Self::Sysv | Self::Bsd | Self::Crc | Self::Crc32b => {
327+
panic!("Should not be used for tagging")
328+
}
321329
}
322330
}
323331

src/uucore/src/lib/features/checksum/validate.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::checksum::{
1919
};
2020
use crate::error::{FromIo, UError, UIoError, UResult, USimpleError};
2121
use crate::quoting_style::{QuotingStyle, locale_aware_escape_name};
22-
use crate::sum::DigestOutput;
22+
use crate::sum::{self, DigestOutput};
2323
use crate::{
2424
os_str_as_bytes, os_str_from_bytes, read_os_string_lines, show, show_warning_caps, translate,
2525
};
@@ -643,6 +643,7 @@ fn identify_algo_name_and_length(
643643
AlgoKind::Sha2 | AlgoKind::Sha3 if [224, 256, 384, 512].contains(&bitlen) => {
644644
Some(bitlen)
645645
}
646+
AlgoKind::Shake128 | AlgoKind::Shake256 => Some(bitlen),
646647
// Either
647648
// the algo based line is provided with a bit length
648649
// with an algorithm that does not support it (only Blake2B does).
@@ -741,6 +742,9 @@ fn process_algo_based_line(
741742
// checksum with it.
742743
let digest_char_length_hint = match (algo_kind, algo_byte_len) {
743744
(AlgoKind::Blake2b, Some(byte_len)) => Some(byte_len),
745+
(AlgoKind::Shake128 | AlgoKind::Shake256, Some(bit_len)) => Some(bit_len.div_ceil(8)),
746+
(AlgoKind::Shake128, None) => Some(sum::Shake128::DEFAULT_BIT_SIZE.div_ceil(8)),
747+
(AlgoKind::Shake256, None) => Some(sum::Shake256::DEFAULT_BIT_SIZE.div_ceil(8)),
744748
_ => None,
745749
};
746750

tests/by-util/test_cksum.rs

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3150,13 +3150,19 @@ fn test_check_checkfile_with_io_error() {
31503150
"ac"
31513151
)]
31523152
fn test_shake128(#[case] args: &[&str], #[case] expected: &str) {
3153+
let bit_len = if args.is_empty() || args[1] == "0" {
3154+
"256"
3155+
} else {
3156+
args[1]
3157+
};
3158+
31533159
new_ucmd!()
31543160
.arg("-a")
31553161
.arg("shake128")
31563162
.args(args)
31573163
.pipe_in("xxx")
31583164
.succeeds()
3159-
.stdout_only(format!("SHAKE128 (-) = {expected}\n"));
3165+
.stdout_only(format!("SHAKE128-{bit_len} (-) = {expected}\n"));
31603166
}
31613167

31623168
#[rstest]
@@ -3213,11 +3219,71 @@ fn test_shake128(#[case] args: &[&str], #[case] expected: &str) {
32133219
"2f"
32143220
)]
32153221
fn test_shake256(#[case] args: &[&str], #[case] expected: &str) {
3222+
let bit_len = if args.is_empty() || args[1] == "0" {
3223+
"512"
3224+
} else {
3225+
args[1]
3226+
};
3227+
32163228
new_ucmd!()
32173229
.arg("-a")
32183230
.arg("shake256")
32193231
.args(args)
32203232
.pipe_in("xxx")
32213233
.succeeds()
3222-
.stdout_only(format!("SHAKE256 (-) = {expected}\n"));
3234+
.stdout_only(format!("SHAKE256-{bit_len} (-) = {expected}\n"));
3235+
}
3236+
3237+
#[test]
3238+
fn test_check_shake128_no_length() {
3239+
const INPUT_SHAKE128_CORRECT_LEN: &str =
3240+
"SHAKE128 (bar) = ac8549b2861a151896ab721bd29d7a20c1a3d1f75b31266f786f20d963fb0fdf";
3241+
const INPUT_SHAKE128_WRONG_LEN: &str = "SHAKE128 (bar) = ac8549b2861a151896ab721bd29d7a20";
3242+
3243+
let scene = TestScenario::new(util_name!());
3244+
let at = &scene.fixtures;
3245+
3246+
at.write("bar", "xxx");
3247+
3248+
scene
3249+
.ucmd()
3250+
.arg("-a")
3251+
.arg("shake128")
3252+
.arg("-c")
3253+
.pipe_in(INPUT_SHAKE128_CORRECT_LEN)
3254+
.succeeds();
3255+
3256+
scene
3257+
.ucmd()
3258+
.arg("-a")
3259+
.arg("shake128")
3260+
.arg("-c")
3261+
.pipe_in(INPUT_SHAKE128_WRONG_LEN)
3262+
.fails()
3263+
.stderr_only("cksum: 'standard input': no properly formatted checksum lines found\n");
3264+
}
3265+
3266+
#[test]
3267+
fn test_check_shake256_no_length() {
3268+
const INPUT_SHAKE256_CORRECT_LEN: &str = "SHAKE256 (bar) = 2fa631503c3ea5fe85131dbfa24805185474740e6dcb5f2a64f69d932bcb55f7b24958f3e3c4cc0e71f1fe6f054cd3fb28b9efb62b4f8f3fbe6d50d90f5c6eba";
3269+
const INPUT_SHAKE256_WRONG_LEN: &str =
3270+
"SHAKE256 (bar) = 2fa631503c3ea5fe85131dbfa24805185474740e6dcb5f2a64f69d932bcb55f7";
3271+
3272+
let scene = TestScenario::new(util_name!());
3273+
let at = &scene.fixtures;
3274+
3275+
at.write("bar", "xxx");
3276+
3277+
scene
3278+
.ucmd()
3279+
.arg("-c")
3280+
.pipe_in(INPUT_SHAKE256_CORRECT_LEN)
3281+
.succeeds();
3282+
3283+
scene
3284+
.ucmd()
3285+
.arg("-c")
3286+
.pipe_in(INPUT_SHAKE256_WRONG_LEN)
3287+
.fails()
3288+
.stderr_only("cksum: 'standard input': no properly formatted checksum lines found\n");
32233289
}

tests/by-util/test_cut.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ fn test_delimiter_with_byte_and_char() {
131131
new_ucmd!()
132132
.args(&[conflicting_arg, COMPLEX_SEQUENCE.sequence, "-d="])
133133
.fails_with_code(1)
134-
.stderr_is("cut: invalid input: The '--delimiter' ('-d') option only usable if printing a sequence of fields\n")
134+
.stderr_is("cut: invalid input: The '--delimiter' ('-d') option can only be used when printing a sequence of fields\n")
135135
;
136136
}
137137
}

util/gnu-patches/tests_cut_error_msg.patch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Index: gnu/tests/cut/cut.pl
2121
+my $inval_fld = "$prog: range '--' was invalid: failed to parse range\n";
2222
+my $inval_pos = "$prog: range '--' was invalid: failed to parse range\n";
2323
+my $no_endpoint = "$prog: range '-' was invalid: invalid range with no endpoint\n";
24-
+my $nofield = "$prog: invalid input: The '--delimiter' ('-d') option only usable if printing a sequence of fields\n";
24+
+my $nofield = "$prog: invalid input: The '--delimiter' ('-d') option can only be used when printing a sequence of fields\n";
2525

2626
my @Tests =
2727
(
@@ -53,7 +53,7 @@ Index: gnu/tests/cut/cut.pl
5353
['y', qw(-s -b4), {IN=>":\n"}, {OUT=>""}, {EXIT=>1},
5454
- {ERR=>"$prog: suppressing non-delimited lines makes sense\n"
5555
- . "\tonly when operating on fields\n$try"}],
56-
+ {ERR=>"$prog: invalid input: The '--only-delimited' ('-s') option only usable if printing a sequence of fields\n"}],
56+
+ {ERR=>"$prog: invalid input: The '--only-delimited' ('-s') option can only be used when printing a sequence of fields\n"}],
5757
# You must specify bytes or fields (or chars)
5858
['z', '', {IN=>":\n"}, {OUT=>""}, {EXIT=>1},
5959
- {ERR=>"$prog: you must specify a list of bytes, characters, or fields\n$try"}

0 commit comments

Comments
 (0)