Skip to content

Commit 262cd76

Browse files
committed
Optimize SliceIndex<str> for RangeInclusive
Replace `self.end() == usize::MAX` and `self.end() + 1 > slice.len()` with `self.end() >= slice.len()`. Same reasoning as previous commit. Also consolidate the str panicking functions into function.
1 parent 625b180 commit 262cd76

6 files changed

Lines changed: 100 additions & 84 deletions

File tree

library/alloctests/tests/str.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -630,13 +630,13 @@ mod slice_index {
630630
// note: using 0 specifically ensures that the result of overflowing is 0..0,
631631
// so that `get` doesn't simply return None for the wrong reason.
632632
bad: data[0..=usize::MAX];
633-
message: "maximum usize";
633+
message: "out of bounds";
634634
}
635635

636636
in mod rangetoinclusive {
637637
data: "hello";
638638
bad: data[..=usize::MAX];
639-
message: "maximum usize";
639+
message: "out of bounds";
640640
}
641641
}
642642
}
@@ -659,49 +659,49 @@ mod slice_index {
659659
data: super::DATA;
660660
bad: data[super::BAD_START..super::GOOD_END];
661661
message:
662-
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
662+
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
663663
}
664664

665665
in mod range_2 {
666666
data: super::DATA;
667667
bad: data[super::GOOD_START..super::BAD_END];
668668
message:
669-
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
669+
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
670670
}
671671

672672
in mod rangefrom {
673673
data: super::DATA;
674674
bad: data[super::BAD_START..];
675675
message:
676-
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
676+
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
677677
}
678678

679679
in mod rangeto {
680680
data: super::DATA;
681681
bad: data[..super::BAD_END];
682682
message:
683-
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
683+
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
684684
}
685685

686686
in mod rangeinclusive_1 {
687687
data: super::DATA;
688688
bad: data[super::BAD_START..=super::GOOD_END_INCL];
689689
message:
690-
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
690+
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
691691
}
692692

693693
in mod rangeinclusive_2 {
694694
data: super::DATA;
695695
bad: data[super::GOOD_START..=super::BAD_END_INCL];
696696
message:
697-
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
697+
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
698698
}
699699

700700
in mod rangetoinclusive {
701701
data: super::DATA;
702702
bad: data[..=super::BAD_END_INCL];
703703
message:
704-
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
704+
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
705705
}
706706
}
707707
}
@@ -716,7 +716,9 @@ mod slice_index {
716716

717717
// check the panic includes the prefix of the sliced string
718718
#[test]
719-
#[should_panic(expected = "byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")]
719+
#[should_panic(
720+
expected = "end byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet"
721+
)]
720722
fn test_slice_fail_truncated_1() {
721723
let _ = &LOREM_PARAGRAPH[..1024];
722724
}

library/core/src/range.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -352,15 +352,6 @@ impl<Idx: Step> RangeInclusive<Idx> {
352352
}
353353
}
354354

355-
impl RangeInclusive<usize> {
356-
/// Converts to an exclusive `Range` for `SliceIndex` implementations.
357-
/// The caller is responsible for dealing with `last == usize::MAX`.
358-
#[inline]
359-
pub(crate) const fn into_slice_range(self) -> Range<usize> {
360-
Range { start: self.start, end: self.last + 1 }
361-
}
362-
}
363-
364355
#[stable(feature = "new_range_inclusive_api", since = "CURRENT_RUSTC_VERSION")]
365356
#[rustc_const_unstable(feature = "const_range", issue = "none")]
366357
impl<T> const RangeBounds<T> for RangeInclusive<T> {

library/core/src/str/mod.rs

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,34 +85,50 @@ fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! {
8585
let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH);
8686
let s_trunc = &s[..trunc_len];
8787
let ellipsis = if trunc_len < s.len() { "[...]" } else { "" };
88+
let len = s.len();
8889

89-
// 1. out of bounds
90-
if begin > s.len() || end > s.len() {
91-
let oob_index = if begin > s.len() { begin } else { end };
92-
panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}");
93-
}
94-
95-
// 2. begin <= end
96-
assert!(
97-
begin <= end,
98-
"begin <= end ({} <= {}) when slicing `{}`{}",
99-
begin,
100-
end,
101-
s_trunc,
102-
ellipsis
103-
);
104-
105-
// 3. character boundary
106-
let index = if !s.is_char_boundary(begin) { begin } else { end };
107-
// find the character
108-
let char_start = s.floor_char_boundary(index);
109-
// `char_start` must be less than len and a char boundary
110-
let ch = s[char_start..].chars().next().unwrap();
111-
let char_range = char_start..char_start + ch.len_utf8();
112-
panic!(
113-
"byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
114-
index, ch, char_range, s_trunc, ellipsis
115-
);
90+
// 1. begin is OOB.
91+
if begin > len {
92+
panic!("start byte index {begin} is out of bounds of `{s_trunc}`{ellipsis}");
93+
}
94+
95+
// 2. end is OOB.
96+
if end > len {
97+
panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
98+
}
99+
100+
// 3. range is backwards.
101+
if begin > end {
102+
panic!("begin <= end ({begin} <= {end}) when slicing `{s_trunc}`{ellipsis}")
103+
}
104+
105+
// 4. begin is inside a character.
106+
if !s.is_char_boundary(begin) {
107+
let floor = s.floor_char_boundary(begin);
108+
let ceil = s.ceil_char_boundary(begin);
109+
let range = floor..ceil;
110+
let ch = s[floor..ceil].chars().next().unwrap();
111+
panic!(
112+
"start byte index {begin} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
113+
)
114+
}
115+
116+
// 5. end is inside a character.
117+
if !s.is_char_boundary(end) {
118+
let floor = s.floor_char_boundary(end);
119+
let ceil = s.ceil_char_boundary(end);
120+
let range = floor..ceil;
121+
let ch = s[floor..ceil].chars().next().unwrap();
122+
panic!(
123+
"end byte index {end} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
124+
)
125+
}
126+
127+
// 6. end is OOB and range is inclusive (end == len).
128+
// This test cannot be combined with 2. above because for cases like
129+
// `"abcαβγ"[4..9]` the error is that 4 is inside 'α', not that 9 is OOB.
130+
debug_assert_eq!(end, len);
131+
panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
116132
}
117133

118134
impl str {

library/core/src/str/traits.rs

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,6 @@ where
7676
}
7777
}
7878

79-
#[inline(never)]
80-
#[cold]
81-
#[track_caller]
82-
const fn str_index_overflow_fail() -> ! {
83-
panic!("attempted to index str up to maximum usize");
84-
}
85-
8679
/// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
8780
///
8881
/// Returns a slice of the whole string, i.e., returns `&self` or `&mut
@@ -640,11 +633,11 @@ unsafe impl const SliceIndex<str> for ops::RangeInclusive<usize> {
640633
type Output = str;
641634
#[inline]
642635
fn get(self, slice: &str) -> Option<&Self::Output> {
643-
if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) }
636+
if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) }
644637
}
645638
#[inline]
646639
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
647-
if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
640+
if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) }
648641
}
649642
#[inline]
650643
unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
@@ -658,17 +651,37 @@ unsafe impl const SliceIndex<str> for ops::RangeInclusive<usize> {
658651
}
659652
#[inline]
660653
fn index(self, slice: &str) -> &Self::Output {
661-
if *self.end() == usize::MAX {
662-
str_index_overflow_fail();
654+
let Self { mut start, mut end, exhausted } = self;
655+
let len = slice.len();
656+
if end < len {
657+
end = end + 1;
658+
start = if exhausted { end } else { start };
659+
if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) {
660+
// SAFETY: just checked that `start` and `end` are on a char boundary,
661+
// and we are passing in a safe reference, so the return value will also be one.
662+
// We also checked char boundaries, so this is valid UTF-8.
663+
unsafe { return &*(start..end).get_unchecked(slice) }
664+
}
663665
}
664-
self.into_slice_range().index(slice)
666+
667+
super::slice_error_fail(slice, start, end)
665668
}
666669
#[inline]
667670
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
668-
if *self.end() == usize::MAX {
669-
str_index_overflow_fail();
671+
let Self { mut start, mut end, exhausted } = self;
672+
let len = slice.len();
673+
if end < len {
674+
end = end + 1;
675+
start = if exhausted { end } else { start };
676+
if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) {
677+
// SAFETY: just checked that `start` and `end` are on a char boundary,
678+
// and we are passing in a safe reference, so the return value will also be one.
679+
// We also checked char boundaries, so this is valid UTF-8.
680+
unsafe { return &mut *(start..end).get_unchecked_mut(slice) }
681+
}
670682
}
671-
self.into_slice_range().index_mut(slice)
683+
684+
super::slice_error_fail(slice, start, end)
672685
}
673686
}
674687

@@ -678,35 +691,29 @@ unsafe impl const SliceIndex<str> for range::RangeInclusive<usize> {
678691
type Output = str;
679692
#[inline]
680693
fn get(self, slice: &str) -> Option<&Self::Output> {
681-
if self.last == usize::MAX { None } else { self.into_slice_range().get(slice) }
694+
ops::RangeInclusive::from(self).get(slice)
682695
}
683696
#[inline]
684697
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
685-
if self.last == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
698+
ops::RangeInclusive::from(self).get_mut(slice)
686699
}
687700
#[inline]
688701
unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
689702
// SAFETY: the caller must uphold the safety contract for `get_unchecked`.
690-
unsafe { self.into_slice_range().get_unchecked(slice) }
703+
unsafe { ops::RangeInclusive::from(self).get_unchecked(slice) }
691704
}
692705
#[inline]
693706
unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
694707
// SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
695-
unsafe { self.into_slice_range().get_unchecked_mut(slice) }
708+
unsafe { ops::RangeInclusive::from(self).get_unchecked_mut(slice) }
696709
}
697710
#[inline]
698711
fn index(self, slice: &str) -> &Self::Output {
699-
if self.last == usize::MAX {
700-
str_index_overflow_fail();
701-
}
702-
self.into_slice_range().index(slice)
712+
ops::RangeInclusive::from(self).index(slice)
703713
}
704714
#[inline]
705715
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
706-
if self.last == usize::MAX {
707-
str_index_overflow_fail();
708-
}
709-
self.into_slice_range().index_mut(slice)
716+
ops::RangeInclusive::from(self).index_mut(slice)
710717
}
711718
}
712719

tests/codegen-llvm/str-range-indexing.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,18 @@ macro_rules! tests {
3535
// CHECK: ret
3636
tests!(Range<usize>, get_range, index_range);
3737

38-
// 9 comparisons required:
39-
// end != usize::MAX && start <= end + 1
40-
// && (start == 0 || (start >= len && start == len) || bytes[start] >= -0x40)
41-
// && ( (end + 1 >= len && end + 1 == len) || bytes[end + 1] >= -0x40)
38+
// 7 comparisons required:
39+
// end < len && start <= end + 1
40+
// && (start == 0 || start >= len || bytes[start] >= -0x40)
41+
// && ( end + 1 >= len || bytes[end + 1] >= -0x40)
4242

4343
// CHECK-LABEL: @get_range_inclusive
44-
// CHECK-COUNT-9: %{{.+}} = icmp
44+
// CHECK-COUNT-7: %{{.+}} = icmp
4545
// CHECK-NOT: %{{.+}} = icmp
4646
// CHECK: ret
4747

4848
// CHECK-LABEL: @index_range_inclusive
49-
// CHECK-COUNT-9: %{{.+}} = icmp
49+
// CHECK-COUNT-7: %{{.+}} = icmp
5050
// CHECK-NOT: %{{.+}} = icmp
5151
// CHECK: ret
5252
tests!(RangeInclusive<usize>, get_range_inclusive, index_range_inclusive);
@@ -65,16 +65,16 @@ tests!(RangeInclusive<usize>, get_range_inclusive, index_range_inclusive);
6565
// CHECK: ret
6666
tests!(RangeTo<usize>, get_range_to, index_range_to);
6767

68-
// 4 comparisons required:
69-
// end != usize::MAX && (end + 1 >= len && end + 1 == len) || bytes[end + 1] >= -0x40)
68+
// 3 comparisons required:
69+
// end < len && (end + 1 >= len || bytes[end + 1] >= -0x40)
7070

7171
// CHECK-LABEL: @get_range_to_inclusive
72-
// CHECK-COUNT-4: %{{.+}} = icmp
72+
// CHECK-COUNT-3: %{{.+}} = icmp
7373
// CHECK-NOT: %{{.+}} = icmp
7474
// CHECK: ret
7575

7676
// CHECK-LABEL: @index_range_to_inclusive
77-
// CHECK-COUNT-4: %{{.+}} = icmp
77+
// CHECK-COUNT-3: %{{.+}} = icmp
7878
// CHECK-NOT: %{{.+}} = icmp
7979
// CHECK: ret
8080
tests!(RangeToInclusive<usize>, get_range_to_inclusive, index_range_to_inclusive);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11

22
thread 'main' ($TID) panicked at $DIR/const-eval-select-backtrace-std.rs:6:8:
3-
byte index 1 is out of bounds of ``
3+
start byte index 1 is out of bounds of ``
44
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

0 commit comments

Comments
 (0)