Skip to content

Commit 06293ff

Browse files
committed
Auto merge of #155607 - KowalskiThomas:kowalski/perf-use-get_unchecked-for-pattern, r=Mark-Simulacrum
perf: use `get_unchecked` for `TwoWaySearcher` ## What is this PR? *This is related to #27721.* This PR is a proposal for a performance improvement in `std::pattern`. Profiling of [https://github.com/quickwit-oss/quickwit](https://github.com/quickwit-oss/quickwit) in production shows that `TwoWaySearcher::next` is one of the most CPU-time-consuming functions, so I thought I would give it a look. I read the [contribution guide](https://std-dev-guide.rust-lang.org/development/perf-benchmarking.html) and this seems to be a fitting proposal. It seems like `TwoWaySearcher::next` and `TwoWaySearcher::next_back` could be made faster by using `get_unchecked` in the inner loop comparisons instead of regular indexing, which is safe in the conditions where it would be done (indices are within bounds by construction). I added some `SAFETY` comments in the code to explain why this is safe, as I believe is customary in those cases (and according to [this page as well](https://std-dev-guide.rust-lang.org/policy/safety-comments.html)). ### Benchmarks I ran the existing bencharmks before/after the changes (only on my laptop, I can run them in other places if that's necessary). ``` ./x.py bench library/coretests -- pattern:: ``` We seem to be getting a ~7.5-12% performance improvement at a very low cost, which sounds worthwhile to me. But this is the first time I'm proposing a change in Rust, so I'm looking forward to feedback on this. ``` BEFORE CHANGES pattern::ends_with_char 3398.91ns/iter +/- 526.28 pattern::ends_with_str 3545.04ns/iter +/- 1108.76 pattern::starts_with_char 3348.31ns/iter +/- 352.38 pattern::starts_with_str 3710.59ns/iter +/- 435.57 AFTER CHANGES pattern::ends_with_char 3125.99ns/iter +/- 567.09 (-8.03%) pattern::ends_with_str 3106.43ns/iter +/- 258.33 (-12.38%) pattern::starts_with_char 3094.55ns/iter +/- 595.42 (-7.59%) pattern::starts_with_str 3365.75ns/iter +/- 268.88 (-9.29%) ``` System info for the benchmarks run <details> ``` Based on commit 8317fef rustc 1.96.0-dev binary: rustc commit-hash: unknown commit-date: unknown host: aarch64-apple-darwin release: 1.96.0-dev LLVM version: 22.1.2 Apple M4 Max 16 64 GB ProductName: macOS ProductVersion: 26.3 BuildVersion: 25D125 (this was run on AC and without any heavy load from other apps or whatnot) ``` </details>
2 parents 029c9e1 + 3a29341 commit 06293ff

3 files changed

Lines changed: 77 additions & 4 deletions

File tree

library/core/src/str/pattern.rs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,7 +1496,13 @@ impl TwoWaySearcher {
14961496
let start =
14971497
if long_period { self.crit_pos } else { cmp::max(self.crit_pos, self.memory) };
14981498
for i in start..needle.len() {
1499-
if needle[i] != haystack[self.position + i] {
1499+
// SAFETY: on every iteration of `'search`, the `haystack.get(self.position + needle_last)`
1500+
// check returned `Some`, so `self.position + needle_last < haystack.len()`.
1501+
// Since `i < needle.len()` implies `i <= needle_last`, we have
1502+
// `self.position + i < haystack.len()`.
1503+
// Every path that mutates `self.position` below either returns or re-enters `'search`,
1504+
// which re-runs the check before reaching the loop again.
1505+
if needle[i] != unsafe { *haystack.get_unchecked(self.position + i) } {
15001506
self.position += i - self.crit_pos + 1;
15011507
if !long_period {
15021508
self.memory = 0;
@@ -1508,7 +1514,13 @@ impl TwoWaySearcher {
15081514
// See if the left part of the needle matches
15091515
let start = if long_period { 0 } else { self.memory };
15101516
for i in (start..self.crit_pos).rev() {
1511-
if needle[i] != haystack[self.position + i] {
1517+
// SAFETY: on every iteration of `'search`, the `haystack.get(self.position + needle_last)`
1518+
// check returned `Some`, so `self.position + needle_last < haystack.len()`.
1519+
// Since `i < self.crit_pos <= needle.len()`, we have `i <= needle_last`, and thus
1520+
// `self.position + i <= self.position + needle_last < haystack.len()`.
1521+
// Every path that mutates `self.position` below either returns or re-enters `'search`,
1522+
// which re-runs the check before reaching the loop again.
1523+
if needle[i] != unsafe { *haystack.get_unchecked(self.position + i) } {
15121524
self.position += self.period;
15131525
if !long_period {
15141526
self.memory = needle.len() - self.period;
@@ -1583,7 +1595,14 @@ impl TwoWaySearcher {
15831595
cmp::min(self.crit_pos_back, self.memory_back)
15841596
};
15851597
for i in (0..crit).rev() {
1586-
if needle[i] != haystack[self.end - needle.len() + i] {
1598+
// SAFETY: On every iteration of `'search`, `haystack.get(self.end.wrapping_sub(needle.len()))`
1599+
// returned `Some`, so `self.end >= needle.len()` and `self.end - needle.len() < haystack.len()`.
1600+
// Since `self.end <= haystack.len()` and `i < needle.len()`, we have
1601+
// `self.end - needle.len() + i < self.end <= haystack.len()`, so
1602+
// `haystack.get_unchecked(self.end - needle.len() + i)` is safe.
1603+
// - The path that mutates `self.end` either re-enters `'search`, which re-runs the checks
1604+
// before reaching this loop again, or returns on match, so the invariant holds.
1605+
if needle[i] != unsafe { *haystack.get_unchecked(self.end - needle.len() + i) } {
15871606
self.end -= self.crit_pos_back - i;
15881607
if !long_period {
15891608
self.memory_back = needle.len();
@@ -1595,7 +1614,12 @@ impl TwoWaySearcher {
15951614
// See if the right part of the needle matches
15961615
let needle_end = if long_period { needle.len() } else { self.memory_back };
15971616
for i in self.crit_pos_back..needle_end {
1598-
if needle[i] != haystack[self.end - needle.len() + i] {
1617+
// SAFETY: The same `self.end - needle.len() + i < haystack.len()` argument as the
1618+
// left-part loop applies: the `haystack.get(self.end.wrapping_sub(needle.len()))`
1619+
// check at the top of `'search` established the bound for this iteration, and
1620+
// every mutation of `self.end` is followed by `continue 'search` (which re-runs
1621+
// the check) or a `return` (which exits before any further unsafe access).
1622+
if needle[i] != unsafe { *haystack.get_unchecked(self.end - needle.len() + i) } {
15991623
self.end -= self.period;
16001624
if !long_period {
16011625
self.memory_back = self.period;

library/coretests/benches/pattern.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,51 @@ fn ends_with_str(b: &mut Bencher) {
3939
}
4040
})
4141
}
42+
43+
fn make_haystack() -> String {
44+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem \
45+
sit amet dolor ultricies condimentum. Praesent iaculis purus elit, ac malesuada \
46+
quam malesuada in. Duis sed orci eros. Suspendisse sit amet magna mollis, mollis \
47+
nunc luctus, imperdiet mi. Integer fringilla non sem ut lacinia. Fusce varius \
48+
tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec tempus vel, \
49+
gravida nec quam. In est dui, tincidunt sed tempus interdum, adipiscing laoreet \
50+
ante. Etiam tempor, tellus quis sagittis interdum, nulla purus mattis sem, quis \
51+
auctor erat odio ac tellus. In nec nunc sit amet diam volutpat molestie at sed \
52+
ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan lorem ac dignissim \
53+
placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat."
54+
.repeat(50)
55+
}
56+
57+
#[bench]
58+
fn find_str(b: &mut Bencher) {
59+
let s = make_haystack();
60+
let haystack = black_box(s.as_str());
61+
b.bytes = haystack.len() as u64;
62+
b.iter(|| black_box(haystack.find("the english language")))
63+
}
64+
65+
#[bench]
66+
fn rfind_str(b: &mut Bencher) {
67+
let s = make_haystack();
68+
let haystack = black_box(s.as_str());
69+
b.bytes = haystack.len() as u64;
70+
b.iter(|| black_box(haystack.rfind("the english language")))
71+
}
72+
73+
#[bench]
74+
fn find_str_worst_case(b: &mut Bencher) {
75+
let near_miss = "the english languagX";
76+
let haystack_str = near_miss.repeat(2000);
77+
let haystack = black_box(haystack_str.as_str());
78+
b.bytes = haystack.len() as u64;
79+
b.iter(|| black_box(haystack.find("the english language")))
80+
}
81+
82+
#[bench]
83+
fn rfind_str_worst_case(b: &mut Bencher) {
84+
let near_miss = "the english languagX";
85+
let haystack_str = near_miss.repeat(2000);
86+
let haystack = black_box(haystack_str.as_str());
87+
b.bytes = haystack.len() as u64;
88+
b.iter(|| black_box(haystack.rfind("the english language")))
89+
}

typos.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ extend-exclude = [
1313
# generated lorem ipsum texts
1414
"library/alloctests/benches/str.rs",
1515
"library/alloctests/tests/str.rs",
16+
"library/coretests/benches/pattern.rs",
1617
]
1718

1819
[default.extend-words]

0 commit comments

Comments
 (0)