Skip to content

Commit 492232c

Browse files
committed
Further optimize SliceIndex<str> impl for Range<usize>
We can shave a further 2 `icmp`s by inlining `is_char_boundary` and rearranging the checks.
1 parent 818811b commit 492232c

2 files changed

Lines changed: 37 additions & 56 deletions

File tree

library/core/src/str/traits.rs

Lines changed: 30 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,29 @@ unsafe impl const SliceIndex<str> for ops::RangeFull {
118118
}
119119
}
120120

121+
#[inline(always)]
122+
const fn check_range(slice: &str, range: crate::range::Range<usize>) -> bool {
123+
let crate::range::Range { start, end } = range;
124+
let bytes = slice.as_bytes();
125+
126+
if start > end || end > slice.len() {
127+
return false;
128+
}
129+
130+
if start == slice.len() {
131+
// If `start == slice.len()`, then `end == slice.len()` must also be true.
132+
return true;
133+
}
134+
135+
// SAFETY:
136+
// `start > end || end > slice.len()` is false, so `start <= end <= slice.len()` is true.
137+
// `start == slice.len()` is false, so `start < slice.len()` is also true.
138+
unsafe {
139+
(start == 0 || bytes.as_ptr().add(start).read().is_utf8_char_boundary())
140+
&& (end == slice.len() || bytes.as_ptr().add(end).read().is_utf8_char_boundary())
141+
}
142+
}
143+
121144
/// Implements substring slicing with syntax `&self[begin .. end]` or `&mut
122145
/// self[begin .. end]`.
123146
///
@@ -159,30 +182,11 @@ unsafe impl const SliceIndex<str> for ops::Range<usize> {
159182
type Output = str;
160183
#[inline]
161184
fn get(self, slice: &str) -> Option<&Self::Output> {
162-
if self.start <= self.end
163-
&& slice.is_char_boundary(self.start)
164-
&& slice.is_char_boundary(self.end)
165-
{
166-
// SAFETY: just checked that `start` and `end` are on a char boundary,
167-
// and we are passing in a safe reference, so the return value will also be one.
168-
// We also checked char boundaries, so this is valid UTF-8.
169-
Some(unsafe { &*self.get_unchecked(slice) })
170-
} else {
171-
None
172-
}
185+
range::Range::from(self).get(slice)
173186
}
174187
#[inline]
175188
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
176-
if self.start <= self.end
177-
&& slice.is_char_boundary(self.start)
178-
&& slice.is_char_boundary(self.end)
179-
{
180-
// SAFETY: just checked that `start` and `end` are on a char boundary.
181-
// We know the pointer is unique because we got it from `slice`.
182-
Some(unsafe { &mut *self.get_unchecked_mut(slice) })
183-
} else {
184-
None
185-
}
189+
range::Range::from(self).get_mut(slice)
186190
}
187191
#[inline]
188192
#[track_caller]
@@ -235,26 +239,11 @@ unsafe impl const SliceIndex<str> for ops::Range<usize> {
235239
}
236240
#[inline]
237241
fn index(self, slice: &str) -> &Self::Output {
238-
let (start, end) = (self.start, self.end);
239-
match self.get(slice) {
240-
Some(s) => s,
241-
None => super::slice_error_fail(slice, start, end),
242-
}
242+
range::Range::from(self).index(slice)
243243
}
244244
#[inline]
245245
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
246-
// is_char_boundary checks that the index is in [0, .len()]
247-
// cannot reuse `get` as above, because of NLL trouble
248-
if self.start <= self.end
249-
&& slice.is_char_boundary(self.start)
250-
&& slice.is_char_boundary(self.end)
251-
{
252-
// SAFETY: just checked that `start` and `end` are on a char boundary,
253-
// and we are passing in a safe reference, so the return value will also be one.
254-
unsafe { &mut *self.get_unchecked_mut(slice) }
255-
} else {
256-
super::slice_error_fail(slice, self.start, self.end)
257-
}
246+
range::Range::from(self).index_mut(slice)
258247
}
259248
}
260249

@@ -264,10 +253,7 @@ unsafe impl const SliceIndex<str> for range::Range<usize> {
264253
type Output = str;
265254
#[inline]
266255
fn get(self, slice: &str) -> Option<&Self::Output> {
267-
if self.start <= self.end
268-
&& slice.is_char_boundary(self.start)
269-
&& slice.is_char_boundary(self.end)
270-
{
256+
if check_range(slice, self) {
271257
// SAFETY: just checked that `start` and `end` are on a char boundary,
272258
// and we are passing in a safe reference, so the return value will also be one.
273259
// We also checked char boundaries, so this is valid UTF-8.
@@ -278,10 +264,7 @@ unsafe impl const SliceIndex<str> for range::Range<usize> {
278264
}
279265
#[inline]
280266
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
281-
if self.start <= self.end
282-
&& slice.is_char_boundary(self.start)
283-
&& slice.is_char_boundary(self.end)
284-
{
267+
if check_range(slice, self) {
285268
// SAFETY: just checked that `start` and `end` are on a char boundary.
286269
// We know the pointer is unique because we got it from `slice`.
287270
Some(unsafe { &mut *self.get_unchecked_mut(slice) })
@@ -350,10 +333,7 @@ unsafe impl const SliceIndex<str> for range::Range<usize> {
350333
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
351334
// is_char_boundary checks that the index is in [0, .len()]
352335
// cannot reuse `get` as above, because of NLL trouble
353-
if self.start <= self.end
354-
&& slice.is_char_boundary(self.start)
355-
&& slice.is_char_boundary(self.end)
356-
{
336+
if check_range(slice, self) {
357337
// SAFETY: just checked that `start` and `end` are on a char boundary,
358338
// and we are passing in a safe reference, so the return value will also be one.
359339
unsafe { &mut *self.get_unchecked_mut(slice) }

tests/codegen-llvm/str-range-indexing.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,19 @@ macro_rules! tests {
1818
};
1919
}
2020

21-
// 9 comparisons required:
22-
// start <= end
23-
// && (start == 0 || (start >= len && start == len) || bytes[start] >= -0x40)
24-
// && (end == 0 || (end >= len && end == len) || bytes[end] >= -0x40)
21+
// 7 comparisons required:
22+
// start <= end && end <= len
23+
// && (start == len ||
24+
// ( (start == 0 || bytes[start] >= -0x40)
25+
// && (end == len || bytes[end] >= -0x40)))
2526

2627
// CHECK-LABEL: @get_range
27-
// CHECK-COUNT-9: %{{.+}} = icmp
28+
// CHECK-COUNT-7: %{{.+}} = icmp
2829
// CHECK-NOT: %{{.+}} = icmp
2930
// CHECK: ret
3031

3132
// CHECK-LABEL: @index_range
32-
// CHECK-COUNT-9: %{{.+}} = icmp
33+
// CHECK-COUNT-7: %{{.+}} = icmp
3334
// CHECK-NOT: %{{.+}} = icmp
3435
// CHECK: ret
3536
tests!(Range<usize>, get_range, index_range);

0 commit comments

Comments
 (0)