Skip to content

Commit d35668f

Browse files
Rollup merge of rust-lang#156119 - Kmeakin:km/optimize-str-index, r=Mark-Simulacrum
Further optimize `SliceIndex<str>` impl for `Range<usize>` We can shave a further 2 `icmp`s by inlining `is_char_boundary` and rearranging the checks. Follow up to rust-lang#145024
2 parents 1b47481 + ef9eff9 commit d35668f

2 files changed

Lines changed: 46 additions & 60 deletions

File tree

library/core/src/str/traits.rs

Lines changed: 39 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,35 @@ const unsafe impl SliceIndex<str> for ops::RangeFull {
118118
}
119119
}
120120

121+
/// Check that a range is in bounds for slicing a string.
122+
/// If this returns true, it is safe to call `slice.get_unchecked(range)` or
123+
/// `slice.get_unchecked_mut(range)`.
124+
#[inline(always)]
125+
const fn check_range(slice: &str, range: crate::range::Range<usize>) -> bool {
126+
let crate::range::Range { start, end } = range;
127+
let bytes = slice.as_bytes();
128+
129+
if start > end || end > slice.len() {
130+
return false;
131+
}
132+
133+
if start == slice.len() {
134+
// If `start == slice.len()`, then `end == slice.len()` must also be true.
135+
return true;
136+
}
137+
138+
// SAFETY:
139+
// `start > end || end > slice.len()` is false, so `start <= end <= slice.len()` is true.
140+
// `start == slice.len()` is false, so `start < slice.len()` is also true.
141+
//
142+
// No need to check for `end == 0`, because if `end == 0` is true then `start == slice.len()`
143+
// would also be true, which is already handled above.
144+
unsafe {
145+
(start == 0 || bytes.as_ptr().add(start).read().is_utf8_char_boundary())
146+
&& (end == slice.len() || bytes.as_ptr().add(end).read().is_utf8_char_boundary())
147+
}
148+
}
149+
121150
/// Implements substring slicing with syntax `&self[begin .. end]` or `&mut
122151
/// self[begin .. end]`.
123152
///
@@ -159,30 +188,11 @@ const unsafe impl SliceIndex<str> for ops::Range<usize> {
159188
type Output = str;
160189
#[inline]
161190
fn get(self, slice: &str) -> Option<&Self::Output> {
162-
if self.start <= self.end
163-
&& slice.is_char_boundary(self.start)
164-
&& slice.is_char_boundary(self.end)
165-
{
166-
// SAFETY: just checked that `start` and `end` are on a char boundary,
167-
// and we are passing in a safe reference, so the return value will also be one.
168-
// We also checked char boundaries, so this is valid UTF-8.
169-
Some(unsafe { &*self.get_unchecked(slice) })
170-
} else {
171-
None
172-
}
191+
range::Range::from(self).get(slice)
173192
}
174193
#[inline]
175194
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
176-
if self.start <= self.end
177-
&& slice.is_char_boundary(self.start)
178-
&& slice.is_char_boundary(self.end)
179-
{
180-
// SAFETY: just checked that `start` and `end` are on a char boundary.
181-
// We know the pointer is unique because we got it from `slice`.
182-
Some(unsafe { &mut *self.get_unchecked_mut(slice) })
183-
} else {
184-
None
185-
}
195+
range::Range::from(self).get_mut(slice)
186196
}
187197
#[inline]
188198
#[track_caller]
@@ -235,26 +245,11 @@ const unsafe impl SliceIndex<str> for ops::Range<usize> {
235245
}
236246
#[inline]
237247
fn index(self, slice: &str) -> &Self::Output {
238-
let (start, end) = (self.start, self.end);
239-
match self.get(slice) {
240-
Some(s) => s,
241-
None => super::slice_error_fail(slice, start, end),
242-
}
248+
range::Range::from(self).index(slice)
243249
}
244250
#[inline]
245251
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
246-
// is_char_boundary checks that the index is in [0, .len()]
247-
// cannot reuse `get` as above, because of NLL trouble
248-
if self.start <= self.end
249-
&& slice.is_char_boundary(self.start)
250-
&& slice.is_char_boundary(self.end)
251-
{
252-
// SAFETY: just checked that `start` and `end` are on a char boundary,
253-
// and we are passing in a safe reference, so the return value will also be one.
254-
unsafe { &mut *self.get_unchecked_mut(slice) }
255-
} else {
256-
super::slice_error_fail(slice, self.start, self.end)
257-
}
252+
range::Range::from(self).index_mut(slice)
258253
}
259254
}
260255

@@ -264,11 +259,8 @@ const unsafe impl SliceIndex<str> for range::Range<usize> {
264259
type Output = str;
265260
#[inline]
266261
fn get(self, slice: &str) -> Option<&Self::Output> {
267-
if self.start <= self.end
268-
&& slice.is_char_boundary(self.start)
269-
&& slice.is_char_boundary(self.end)
270-
{
271-
// SAFETY: just checked that `start` and `end` are on a char boundary,
262+
if check_range(slice, self) {
263+
// SAFETY: just checked that `self` is in bounds,
272264
// and we are passing in a safe reference, so the return value will also be one.
273265
// We also checked char boundaries, so this is valid UTF-8.
274266
Some(unsafe { &*self.get_unchecked(slice) })
@@ -278,11 +270,8 @@ const unsafe impl SliceIndex<str> for range::Range<usize> {
278270
}
279271
#[inline]
280272
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
281-
if self.start <= self.end
282-
&& slice.is_char_boundary(self.start)
283-
&& slice.is_char_boundary(self.end)
284-
{
285-
// SAFETY: just checked that `start` and `end` are on a char boundary.
273+
if check_range(slice, self) {
274+
// SAFETY: just checked that `self` is in bounds.
286275
// We know the pointer is unique because we got it from `slice`.
287276
Some(unsafe { &mut *self.get_unchecked_mut(slice) })
288277
} else {
@@ -348,13 +337,9 @@ const unsafe impl SliceIndex<str> for range::Range<usize> {
348337
}
349338
#[inline]
350339
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
351-
// is_char_boundary checks that the index is in [0, .len()]
352340
// cannot reuse `get` as above, because of NLL trouble
353-
if self.start <= self.end
354-
&& slice.is_char_boundary(self.start)
355-
&& slice.is_char_boundary(self.end)
356-
{
357-
// SAFETY: just checked that `start` and `end` are on a char boundary,
341+
if check_range(slice, self) {
342+
// SAFETY: just checked that `self` is in bounds,
358343
// and we are passing in a safe reference, so the return value will also be one.
359344
unsafe { &mut *self.get_unchecked_mut(slice) }
360345
} else {

tests/codegen-llvm/str-range-indexing.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,19 @@ macro_rules! tests {
1818
};
1919
}
2020

21-
// 9 comparisons required:
22-
// start <= end
23-
// && (start == 0 || (start >= len && start == len) || bytes[start] >= -0x40)
24-
// && (end == 0 || (end >= len && end == len) || bytes[end] >= -0x40)
21+
// 7 comparisons required:
22+
// start <= end && end <= len
23+
// && (start == len ||
24+
// ( (start == 0 || bytes[start] >= -0x40)
25+
// && (end == len || bytes[end] >= -0x40)))
2526

2627
// CHECK-LABEL: @get_range
27-
// CHECK-COUNT-9: %{{.+}} = icmp
28+
// CHECK-COUNT-7: %{{.+}} = icmp
2829
// CHECK-NOT: %{{.+}} = icmp
2930
// CHECK: ret
3031

3132
// CHECK-LABEL: @index_range
32-
// CHECK-COUNT-9: %{{.+}} = icmp
33+
// CHECK-COUNT-7: %{{.+}} = icmp
3334
// CHECK-NOT: %{{.+}} = icmp
3435
// CHECK: ret
3536
tests!(Range<usize>, get_range, index_range);

0 commit comments

Comments
 (0)