Skip to content

Commit 60b4d1b

Browse files
authored
shuf: try vec first and fallback to HashMap if it cause OOM (#11169)
1 parent 3136627 commit 60b4d1b

3 files changed

Lines changed: 25 additions & 12 deletions

File tree

src/uu/shuf/src/nonrepeating_iterator.rs

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,26 @@ enum Values {
4949
}
5050

5151
impl<'a> NonrepeatingIterator<'a> {
52-
pub(crate) fn new(range: RangeInclusive<u64>, rng: &'a mut WrappedRng) -> Self {
53-
const MAX_CAPACITY: usize = 128; // todo: optimize this
54-
let capacity = (range.size_hint().0).min(MAX_CAPACITY);
55-
let values = Values::Sparse(
56-
range,
57-
FxHashMap::with_capacity_and_hasher(capacity, rustc_hash::FxBuildHasher),
58-
);
52+
pub(crate) fn new(
53+
range: RangeInclusive<u64>,
54+
rng: &'a mut WrappedRng,
55+
head_count: Option<usize>,
56+
) -> Self {
57+
// Save RAM usage with shuf -i 1-huge_number -n small_number
58+
const TOO_LARGE_VEC_SIZE: usize = 16_777_216;
59+
let range_len = range.size_hint().0;
60+
let mut items = Vec::new();
61+
let values = if range_len < TOO_LARGE_VEC_SIZE && items.try_reserve(range_len).is_ok() {
62+
items.extend(range.rev());
63+
Values::Full(items)
64+
} else {
65+
const MAX_CAPACITY: usize = 128; // todo: optimize this
66+
let capacity = head_count.unwrap_or(MAX_CAPACITY).min(range_len);
67+
Values::Sparse(
68+
range,
69+
FxHashMap::with_capacity_and_hasher(capacity, rustc_hash::FxBuildHasher),
70+
)
71+
};
5972
NonrepeatingIterator { rng, values }
6073
}
6174

src/uu/shuf/src/shuf.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ impl Shufable for RangeInclusive<u64> {
357357
amount: u64,
358358
) -> UResult<impl Iterator<Item = UResult<Self::Item>>> {
359359
let amount = usize::try_from(amount).unwrap_or(usize::MAX);
360-
Ok(NonrepeatingIterator::new(self.clone(), rng).take(amount))
360+
Ok(NonrepeatingIterator::new(self.clone(), rng, Some(amount)).take(amount))
361361
}
362362
}
363363

tests/by-util/test_shuf.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,23 +93,23 @@ fn test_zero_termination_multi() {
9393

9494
#[test]
9595
fn test_very_large_range() {
96-
let num_samples = 10;
96+
let num_samples = 256;
9797
let result = new_ucmd!()
9898
.arg("-n")
9999
.arg(num_samples.to_string())
100-
.arg("-i0-1234567890")
100+
.arg("-i1-100000000000")
101101
.succeeds();
102102
result.no_stderr();
103103

104-
let result_seq: Vec<isize> = result
104+
let result_seq: Vec<u64> = result
105105
.stdout_str()
106106
.split('\n')
107107
.filter(|x| !x.is_empty())
108108
.map(|x| x.parse().unwrap())
109109
.collect();
110110
assert_eq!(result_seq.len(), num_samples, "Miscounted output length!");
111111
assert!(
112-
result_seq.iter().all(|x| (0..=1_234_567_890).contains(x)),
112+
result_seq.iter().all(|x| (0..=100_000_000_000).contains(x)),
113113
"Output includes element not from range: {}",
114114
result.stdout_str()
115115
);

0 commit comments

Comments
 (0)