Skip to content

Commit f5bdf7b

Browse files
committed
chore(factor): refactor & optimize parsing
Now UTF-8 checks are performed strictly once, the worst-case of BigUints needs not be parsed & allocated every time, and error handling has been tightened.
1 parent 9898e6f commit f5bdf7b

1 file changed

Lines changed: 65 additions & 63 deletions

File tree

src/uu/factor/src/factor.rs

Lines changed: 65 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ use std::fmt::Display;
1010
use std::io::BufRead;
1111
use std::io::{self, Write, stdin, stdout};
1212
use std::iter::once;
13-
use std::str::FromStr;
13+
use std::num::IntErrorKind;
1414

1515
use clap::{Arg, ArgAction, Command};
1616
use memchr::memchr3_iter;
1717
use num_bigint::BigUint;
18-
use num_traits::FromPrimitive;
18+
use num_prime::nt_funcs::{factorize64, factorize128, factors};
1919
use uucore::display::Quotable;
2020
use uucore::error::{FromIo, UResult, USimpleError, set_exit_code};
2121
use uucore::translate;
@@ -33,72 +33,77 @@ const DELIM_SPACE: u8 = b' ';
3333
const DELIM_TAB: u8 = b'\t';
3434
const DELIM_NULL: u8 = b'\0';
3535

36+
#[derive(Debug, PartialEq, Eq)]
37+
enum Number {
38+
U64(u64),
39+
U128(u128),
40+
BigUint(BigUint),
41+
}
42+
3643
fn write_factors_str(
3744
num_str: &[u8],
3845
w: &mut io::BufWriter<impl Write>,
3946
print_exponents: bool,
4047
) -> UResult<()> {
41-
let parsed = parse_num::<BigUint>(num_str);
48+
let parsed = parse_num(num_str);
4249
show_if_err!(&parsed);
4350
let Ok(x) = parsed else {
51+
// return Ok(). it's non-fatal and we should try the next number.
4452
return Ok(());
4553
};
4654

47-
if x > BigUint::from_u32(1).unwrap() {
55+
match x {
4856
// use num_prime's factorize64 algorithm for u64 integers
49-
if x <= BigUint::from_u64(u64::MAX).unwrap() {
50-
let prime_factors = num_prime::nt_funcs::factorize64(x.clone().to_u64_digits()[0]);
51-
write_result(w, &x, prime_factors, print_exponents)
52-
.map_err_context(|| translate!("factor-error-write-error"))?;
53-
}
57+
Number::U64(x) if x > 1 => write_result(w, &x, factorize64(x), print_exponents),
58+
Number::U64(x) => write_result(w, &x, BTreeMap::<u64, usize>::new(), print_exponents),
5459
// use num_prime's factorize128 algorithm for u128 integers
55-
else if x <= BigUint::from_u128(u128::MAX).unwrap() {
56-
let parsed = parse_num::<u128>(num_str);
57-
show_if_err!(&parsed);
58-
let Ok(x) = parsed else {
59-
return Ok(());
60-
};
61-
let prime_factors = num_prime::nt_funcs::factorize128(x);
62-
write_result(w, &x, prime_factors, print_exponents)
63-
.map_err_context(|| translate!("factor-error-write-error"))?;
64-
}
60+
Number::U128(x) => write_result(w, &x, factorize128(x), print_exponents),
6561
// use num_prime's fallible factorization for anything greater than u128::MAX
66-
else {
67-
let (prime_factors, remaining) = num_prime::nt_funcs::factors(x.clone(), None);
68-
if let Some(_remaining) = remaining {
62+
Number::BigUint(x) => {
63+
let (prime_factors, remaining) = factors(x.clone(), None);
64+
if remaining.is_some() {
6965
return Err(USimpleError::new(
7066
1,
7167
translate!("factor-error-factorization-incomplete"),
7268
));
7369
}
7470
write_result(w, &x, prime_factors, print_exponents)
75-
.map_err_context(|| translate!("factor-error-write-error"))?;
7671
}
77-
} else {
78-
let empty_primes: BTreeMap<BigUint, usize> = BTreeMap::new();
79-
write_result(w, &x, empty_primes, print_exponents)
80-
.map_err_context(|| translate!("factor-error-write-error"))?;
8172
}
82-
83-
Ok(())
73+
.map_err_context(|| translate!("factor-error-write-error"))
8474
}
8575

86-
fn parse_num<T: FromStr>(slice: &[u8]) -> UResult<T> {
87-
str::from_utf8(slice)
88-
.ok()
89-
.and_then(|str| str.trim_matches(DELIM_SPACE as char).parse().ok())
90-
.ok_or_else(|| {
91-
let num = NumError(slice).to_string();
92-
let num = if num.len() > slice.len() {
93-
num.quote() // Force quoting if there are invalid characters.
94-
} else {
95-
num.maybe_quote()
96-
};
97-
USimpleError::new(
98-
1,
99-
format!("warning: {num}: {}", translate!("factor-error-invalid-int")),
100-
)
101-
})
76+
fn parse_num(slice: &[u8]) -> UResult<Number> {
77+
let err_invalid = |s: &str, force_quoting| {
78+
let num = if force_quoting {
79+
s.quote() // Force quoting if there are invalid characters.
80+
} else {
81+
s.maybe_quote()
82+
};
83+
USimpleError::new(
84+
1,
85+
format!("warning: {num}: {}", translate!("factor-error-invalid-int")),
86+
)
87+
};
88+
let num = str::from_utf8(slice).map_err(|_| err_invalid(&NumError(slice).to_string(), true))?;
89+
90+
match num.parse::<u64>() {
91+
Ok(x) => return Ok(Number::U64(x)),
92+
// If overflown, attempt a greater width
93+
Err(e) if matches!(e.kind(), IntErrorKind::PosOverflow) => {}
94+
Err(_) => return Err(err_invalid(num, false)),
95+
}
96+
97+
match num.parse::<u128>() {
98+
Ok(x) => return Ok(Number::U128(x)),
99+
// If overflown, attempt a greater width
100+
Err(e) if matches!(e.kind(), IntErrorKind::PosOverflow) => {}
101+
Err(_) => return Err(err_invalid(num, false)),
102+
}
103+
104+
num.parse::<BigUint>()
105+
.map(Number::BigUint)
106+
.map_err(|_| err_invalid(num, false))
102107
}
103108

104109
/// This is a newtype wrapper over a potentially malformed UTF-8
@@ -168,7 +173,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
168173

169174
if let Some(values) = matches.get_many::<String>(options::NUMBER) {
170175
for number in values {
171-
write_factors_str(number.as_bytes(), &mut w, print_exponents)?;
176+
write_factors_str(number.trim().as_bytes(), &mut w, print_exponents)?;
172177
}
173178
} else {
174179
let stdin = stdin();
@@ -239,23 +244,24 @@ pub fn uu_app() -> Command {
239244

240245
#[cfg(test)]
241246
mod tests {
242-
use crate::BigUint;
247+
use crate::Number;
243248
use crate::parse_num;
244249

245250
#[test]
246251
fn test_correct_parsing() {
247-
let zero = parse_num::<u64>(b"0").unwrap();
248-
let u64_max = parse_num::<u64>(u64::MAX.to_string().as_bytes()).unwrap();
249-
let u128_one = parse_num::<u128>((u64::MAX as u128 + 1).to_string().as_bytes()).unwrap();
250-
let u128_max = parse_num::<u128>(u128::MAX.to_string().as_bytes()).unwrap();
251-
let bigint = parse_num::<BigUint>(u128::MAX.to_string().as_bytes()).unwrap() + 1u64;
252+
const U128_MAX_ONE: &str = "340282366920938463463374607431768211456";
253+
let zero = parse_num(b"0").unwrap();
254+
let u64_max = parse_num(u64::MAX.to_string().as_bytes()).unwrap();
255+
let u128_one = parse_num((u64::MAX as u128 + 1).to_string().as_bytes()).unwrap();
256+
let u128_max = parse_num(u128::MAX.to_string().as_bytes()).unwrap();
257+
let bigint = parse_num(U128_MAX_ONE.as_bytes()).unwrap();
252258
assert_eq!(
253259
(
254-
0,
255-
u64::MAX,
256-
(u64::MAX as u128 + 1),
257-
u128::MAX,
258-
BigUint::from(u128::MAX) + 1u64
260+
Number::U64(0),
261+
Number::U64(u64::MAX),
262+
Number::U128(u64::MAX as u128 + 1),
263+
Number::U128(u128::MAX),
264+
Number::BigUint(U128_MAX_ONE.parse().unwrap())
259265
),
260266
(zero, u64_max, u128_one, u128_max, bigint)
261267
);
@@ -264,11 +270,7 @@ mod tests {
264270
#[test]
265271
#[should_panic]
266272
fn test_incorrect_parsing() {
267-
parse_num::<u64>(b"abcd").unwrap();
268-
parse_num::<u64>(b"12\x00\xFF\x1D").unwrap();
269-
parse_num::<u128>(b"abcd").unwrap();
270-
parse_num::<u128>(b"12\x00\xFF\x1D").unwrap();
271-
parse_num::<BigUint>(b"abcd").unwrap();
272-
parse_num::<BigUint>(b"12\x00\xFF\x1D").unwrap();
273+
parse_num(b"abcd").unwrap();
274+
parse_num(b"12\x00\xFF\x1D").unwrap();
273275
}
274276
}

0 commit comments

Comments
 (0)