Skip to content

Commit 3693b41

Browse files
committed
Further optimize SliceIndex<str> impl for Range<usize>
We can shave a further 2 `icmp`s by inlining `is_char_boundary` and rearranging the checks.
1 parent 818811b commit 3693b41

2 files changed

Lines changed: 40 additions & 56 deletions

File tree

library/core/src/str/traits.rs

Lines changed: 33 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,32 @@ unsafe impl const SliceIndex<str> for ops::RangeFull {
118118
}
119119
}
120120

121+
#[inline(always)]
122+
const fn check_range(slice: &str, range: crate::range::Range<usize>) -> bool {
123+
let crate::range::Range { start, end } = range;
124+
let bytes = slice.as_bytes();
125+
126+
if start > end || end > slice.len() {
127+
return false;
128+
}
129+
130+
if start == slice.len() {
131+
// If `start == slice.len()`, then `end == slice.len()` must also be true.
132+
return true;
133+
}
134+
135+
// SAFETY:
136+
// `start > end || end > slice.len()` is false, so `start <= end <= slice.len()` is true.
137+
// `start == slice.len()` is false, so `start < slice.len()` is also true.
138+
//
139+
// No need to check for `end == 0`, becuase if `end == 0` is true then `start == slice.len()`
140+
// would also be true, which is already handled above.
141+
unsafe {
142+
(start == 0 || bytes.as_ptr().add(start).read().is_utf8_char_boundary())
143+
&& (end == slice.len() || bytes.as_ptr().add(end).read().is_utf8_char_boundary())
144+
}
145+
}
146+
121147
/// Implements substring slicing with syntax `&self[begin .. end]` or `&mut
122148
/// self[begin .. end]`.
123149
///
@@ -159,30 +185,11 @@ unsafe impl const SliceIndex<str> for ops::Range<usize> {
159185
type Output = str;
160186
#[inline]
161187
fn get(self, slice: &str) -> Option<&Self::Output> {
162-
if self.start <= self.end
163-
&& slice.is_char_boundary(self.start)
164-
&& slice.is_char_boundary(self.end)
165-
{
166-
// SAFETY: just checked that `start` and `end` are on a char boundary,
167-
// and we are passing in a safe reference, so the return value will also be one.
168-
// We also checked char boundaries, so this is valid UTF-8.
169-
Some(unsafe { &*self.get_unchecked(slice) })
170-
} else {
171-
None
172-
}
188+
range::Range::from(self).get(slice)
173189
}
174190
#[inline]
175191
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
176-
if self.start <= self.end
177-
&& slice.is_char_boundary(self.start)
178-
&& slice.is_char_boundary(self.end)
179-
{
180-
// SAFETY: just checked that `start` and `end` are on a char boundary.
181-
// We know the pointer is unique because we got it from `slice`.
182-
Some(unsafe { &mut *self.get_unchecked_mut(slice) })
183-
} else {
184-
None
185-
}
192+
range::Range::from(self).get_mut(slice)
186193
}
187194
#[inline]
188195
#[track_caller]
@@ -235,26 +242,11 @@ unsafe impl const SliceIndex<str> for ops::Range<usize> {
235242
}
236243
#[inline]
237244
fn index(self, slice: &str) -> &Self::Output {
238-
let (start, end) = (self.start, self.end);
239-
match self.get(slice) {
240-
Some(s) => s,
241-
None => super::slice_error_fail(slice, start, end),
242-
}
245+
range::Range::from(self).index(slice)
243246
}
244247
#[inline]
245248
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
246-
// is_char_boundary checks that the index is in [0, .len()]
247-
// cannot reuse `get` as above, because of NLL trouble
248-
if self.start <= self.end
249-
&& slice.is_char_boundary(self.start)
250-
&& slice.is_char_boundary(self.end)
251-
{
252-
// SAFETY: just checked that `start` and `end` are on a char boundary,
253-
// and we are passing in a safe reference, so the return value will also be one.
254-
unsafe { &mut *self.get_unchecked_mut(slice) }
255-
} else {
256-
super::slice_error_fail(slice, self.start, self.end)
257-
}
249+
range::Range::from(self).index_mut(slice)
258250
}
259251
}
260252

@@ -264,10 +256,7 @@ unsafe impl const SliceIndex<str> for range::Range<usize> {
264256
type Output = str;
265257
#[inline]
266258
fn get(self, slice: &str) -> Option<&Self::Output> {
267-
if self.start <= self.end
268-
&& slice.is_char_boundary(self.start)
269-
&& slice.is_char_boundary(self.end)
270-
{
259+
if check_range(slice, self) {
271260
// SAFETY: just checked that `start` and `end` are on a char boundary,
272261
// and we are passing in a safe reference, so the return value will also be one.
273262
// We also checked char boundaries, so this is valid UTF-8.
@@ -278,10 +267,7 @@ unsafe impl const SliceIndex<str> for range::Range<usize> {
278267
}
279268
#[inline]
280269
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
281-
if self.start <= self.end
282-
&& slice.is_char_boundary(self.start)
283-
&& slice.is_char_boundary(self.end)
284-
{
270+
if check_range(slice, self) {
285271
// SAFETY: just checked that `start` and `end` are on a char boundary.
286272
// We know the pointer is unique because we got it from `slice`.
287273
Some(unsafe { &mut *self.get_unchecked_mut(slice) })
@@ -350,10 +336,7 @@ unsafe impl const SliceIndex<str> for range::Range<usize> {
350336
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
351337
// is_char_boundary checks that the index is in [0, .len()]
352338
// cannot reuse `get` as above, because of NLL trouble
353-
if self.start <= self.end
354-
&& slice.is_char_boundary(self.start)
355-
&& slice.is_char_boundary(self.end)
356-
{
339+
if check_range(slice, self) {
357340
// SAFETY: just checked that `start` and `end` are on a char boundary,
358341
// and we are passing in a safe reference, so the return value will also be one.
359342
unsafe { &mut *self.get_unchecked_mut(slice) }

tests/codegen-llvm/str-range-indexing.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,19 @@ macro_rules! tests {
1818
};
1919
}
2020

21-
// 9 comparisons required:
22-
// start <= end
23-
// && (start == 0 || (start >= len && start == len) || bytes[start] >= -0x40)
24-
// && (end == 0 || (end >= len && end == len) || bytes[end] >= -0x40)
21+
// 7 comparisons required:
22+
// start <= end && end <= len
23+
// && (start == len ||
24+
// ( (start == 0 || bytes[start] >= -0x40)
25+
// && (end == len || bytes[end] >= -0x40)))
2526

2627
// CHECK-LABEL: @get_range
27-
// CHECK-COUNT-9: %{{.+}} = icmp
28+
// CHECK-COUNT-7: %{{.+}} = icmp
2829
// CHECK-NOT: %{{.+}} = icmp
2930
// CHECK: ret
3031

3132
// CHECK-LABEL: @index_range
32-
// CHECK-COUNT-9: %{{.+}} = icmp
33+
// CHECK-COUNT-7: %{{.+}} = icmp
3334
// CHECK-NOT: %{{.+}} = icmp
3435
// CHECK: ret
3536
tests!(Range<usize>, get_range, index_range);

0 commit comments

Comments
 (0)