Skip to content

Commit 71b35b8

Browse files
authored
Rollup merge of #157224 - LiosK:round_char_boundary, r=Mark-Simulacrum
Manually unroll loop in `str::floor_char_boundary` This commit manually unrolls the while loops to at most three iterations, exploiting the UTF-8 invariant that a character is at most four bytes long. Benefits: - Prevents LLVM from generating unbounded unrolled code when `index` is statically known - e.g., `s.floor_char_boundary(20)` previously could emit up to 20 repeated loop bodies. - Eliminates the check at `index - 3`: the UTF-8 invariant guarantees that if `index - 2` is not a character boundary, `index - 3` must be, so the third iteration needs no conditional. - Allows out-of-order CPUs to issue all three byte loads in parallel, since their offsets are statically known. This commit will close #149466 with simpler code. The optimizer appears to be able to eliminate bound checks and panic paths.
2 parents 0efcd77 + cd6e34f commit 71b35b8

2 files changed

Lines changed: 51 additions & 16 deletions

File tree

library/alloctests/tests/str.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2431,6 +2431,16 @@ fn floor_char_boundary() {
24312431
check_many("🇯🇵", 0..4, 0);
24322432
check_many("🇯🇵", 4..8, 4);
24332433
check_many("🇯🇵", 8..10, 8);
2434+
2435+
// anticipate length- and index-based specializations
2436+
let s = "jpĵƥ日本🇯🇵jpĵƥ日本🇯🇵";
2437+
let expected = [
2438+
0, 1, 2, 2, 4, 4, 6, 6, 6, 9, 9, 9, 12, 12, 12, 12, 16, 16, 16, 16, 20, 21, 22, 22, 24, 24,
2439+
26, 26, 26, 29, 29, 29, 32, 32, 32, 32, 36, 36, 36, 36, 40, 40, 40, 40,
2440+
];
2441+
for (idx, &ret) in expected.iter().enumerate() {
2442+
check_many(s, [idx], ret);
2443+
}
24342444
}
24352445

24362446
#[test]
@@ -2477,4 +2487,14 @@ fn ceil_char_boundary() {
24772487

24782488
// above len
24792489
check_many("hello", 5..=10, 5);
2490+
2491+
// anticipate length- and index-based specializations
2492+
let s = "jpĵƥ日本🇯🇵jpĵƥ日本🇯🇵";
2493+
let expected = [
2494+
0, 1, 2, 4, 4, 6, 6, 9, 9, 9, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 20, 21, 22, 24, 24,
2495+
26, 26, 29, 29, 29, 32, 32, 32, 36, 36, 36, 36, 40, 40, 40, 40, 40, 40, 40,
2496+
];
2497+
for (idx, &ret) in expected.iter().enumerate() {
2498+
check_many(s, [idx], ret);
2499+
}
24802500
}

library/core/src/str/mod.rs

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ mod validations;
1515

1616
use self::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
1717
use crate::char::{self, EscapeDebugExtArgs};
18+
use crate::hint::assert_unchecked;
1819
use crate::range::Range;
1920
use crate::slice::{self, SliceIndex};
2021
use crate::ub_checks::assert_unsafe_precondition;
@@ -421,21 +422,35 @@ impl str {
421422
#[inline]
422423
pub const fn floor_char_boundary(&self, index: usize) -> usize {
423424
if index >= self.len() {
424-
self.len()
425-
} else {
426-
let mut i = index;
427-
while i > 0 {
428-
if self.as_bytes()[i].is_utf8_char_boundary() {
429-
break;
430-
}
431-
i -= 1;
432-
}
425+
return self.len();
426+
}
427+
if self.as_bytes()[index].is_utf8_char_boundary() {
428+
return index;
429+
}
430+
// Unlike `ceil_char_boundary`, the loop is unrolled manually to prevent the compiler from
431+
// generating excessive unrolled loop bodies when `index` is statically known.
433432

434-
// The character boundary will be within four bytes of the index
435-
debug_assert!(i >= index.saturating_sub(3));
433+
// The first byte of `&str` must always be a char boundary, so we can assume `i > 0` below
434+
// for any `i` where `self.as_bytes()[i]` is not a char boundary.
435+
debug_assert!(self.as_bytes()[0].is_utf8_char_boundary());
436436

437-
i
437+
// SAFETY: `self.as_bytes()[0]` is always a char boundary with valid `&str`
438+
unsafe { assert_unchecked(index >= 1) };
439+
if self.as_bytes()[index - 1].is_utf8_char_boundary() {
440+
return index - 1;
441+
}
442+
443+
// SAFETY: `self.as_bytes()[0]` is always a char boundary with valid `&str`
444+
unsafe { assert_unchecked(index >= 2) };
445+
if self.as_bytes()[index - 2].is_utf8_char_boundary() {
446+
return index - 2;
438447
}
448+
449+
// `self.as_bytes()[0]` is always a char boundary with valid `&str`
450+
debug_assert!(index >= 3);
451+
// The character boundary will be within four bytes of the index
452+
debug_assert!(self.as_bytes()[index - 3].is_utf8_char_boundary());
453+
index - 3
439454
}
440455

441456
/// Finds the closest `x` not below `index` where [`is_char_boundary(x)`] is `true`.
@@ -467,14 +482,14 @@ impl str {
467482
self.len()
468483
} else {
469484
let mut i = index;
470-
while i < self.len() {
471-
if self.as_bytes()[i].is_utf8_char_boundary() {
485+
while !self.as_bytes()[i].is_utf8_char_boundary() {
486+
i += 1;
487+
if i >= self.len() {
472488
break;
473489
}
474-
i += 1;
475490
}
476491

477-
// The character boundary will be within four bytes of the index
492+
// The character boundary will be within four bytes of the index
478493
debug_assert!(i <= index + 3);
479494

480495
i

0 commit comments

Comments
 (0)