Skip to content

Commit 2f37e39

Browse files
committed
Add UTF-8 boundary constraints, fix TwoWay period, and improve docs
Address review feedback: - Add is_char_boundary constraints to CharSearcher/MCES abstractions - Fix overflow in kani::assume using subtraction form - Relax TwoWaySearcher period constraint to allow needle_len + 1 - Clarify safety comment about unsafe code under cfg(kani) - Add char boundary constraint to Chars::advance_by abstraction - Document harness scope - Remove extra blank lines
1 parent ad3b155 commit 2f37e39

2 files changed

Lines changed: 26 additions & 6 deletions

File tree

library/core/src/str/iter.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ impl<'a> Iterator for Chars<'a> {
112112
let bytes_len = self.iter.len();
113113
let bytes_consumed: usize = kani::any();
114114
kani::assume(bytes_consumed <= bytes_len);
115+
// Ensure the new position is a valid char boundary.
116+
let rem = unsafe { crate::str::from_utf8_unchecked(self.iter.as_slice()) };
117+
kani::assume(rem.is_char_boundary(bytes_consumed));
115118
if bytes_consumed > 0 {
116119
// SAFETY: bytes_consumed <= bytes_len = self.iter.len()
117120
unsafe { self.iter.advance_by(bytes_consumed).unwrap_unchecked() };
@@ -1645,6 +1648,9 @@ pub mod verify {
16451648
use super::*;
16461649

16471650
// ========== Chars ==========
1651+
// Note: harnesses use single-char strings (1-4 bytes). The verification
1652+
// generalizes to arbitrary-length strings via the nondeterministic searcher
1653+
// abstractions in pattern.rs, which are haystack-content-independent.
16481654

16491655
/// Verify safety of Chars::next.
16501656
/// Unsafe ops: next_code_point + char::from_u32_unchecked.

library/core/src/str/pattern.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
490490
kani::assume(a >= self.finger);
491491
kani::assume(a <= self.finger_back); // avoid overflow in a + w
492492
kani::assume(w <= self.finger_back - a);
493+
kani::assume(self.haystack.is_char_boundary(a));
494+
kani::assume(self.haystack.is_char_boundary(a + w));
493495
self.finger = a + w;
494496
Some((a, self.finger))
495497
} else {
@@ -527,8 +529,9 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
527529
let old_finger = self.finger;
528530
let w: usize = kani::any();
529531
kani::assume(w >= 1 && w <= 4);
530-
kani::assume(old_finger + w <= self.finger_back);
532+
kani::assume(w <= self.finger_back - old_finger);
531533
self.finger = old_finger + w;
534+
kani::assume(self.haystack.is_char_boundary(self.finger));
532535
Some((old_finger, self.finger))
533536
} else {
534537
self.finger = self.finger_back;
@@ -629,6 +632,8 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
629632
kani::assume(a >= self.finger);
630633
kani::assume(a <= self.finger_back); // avoid overflow in a + w
631634
kani::assume(w <= self.finger_back - a);
635+
kani::assume(self.haystack.is_char_boundary(a));
636+
kani::assume(self.haystack.is_char_boundary(a + w));
632637
self.finger_back = a;
633638
Some((a, a + w))
634639
} else {
@@ -661,8 +666,9 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
661666
let old_finger_back = self.finger_back;
662667
let w: usize = kani::any();
663668
kani::assume(w >= 1 && w <= 4);
664-
kani::assume(self.finger + w <= old_finger_back);
669+
kani::assume(w <= old_finger_back - self.finger);
665670
self.finger_back = old_finger_back - w;
671+
kani::assume(self.haystack.is_char_boundary(self.finger_back));
666672
Some((self.finger_back, old_finger_back))
667673
} else {
668674
self.finger_back = self.finger;
@@ -851,6 +857,8 @@ unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
851857
kani::assume(char_len >= 1 && char_len <= 4);
852858
kani::assume(i <= self.haystack.len());
853859
kani::assume(char_len <= self.haystack.len() - i);
860+
kani::assume(self.haystack.is_char_boundary(i));
861+
kani::assume(self.haystack.is_char_boundary(i + char_len));
854862
Some((i, i + char_len))
855863
} else {
856864
None
@@ -876,6 +884,8 @@ unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
876884
kani::assume(char_len >= 1 && char_len <= 4);
877885
kani::assume(i <= self.haystack.len());
878886
kani::assume(char_len <= self.haystack.len() - i);
887+
kani::assume(self.haystack.is_char_boundary(i));
888+
kani::assume(self.haystack.is_char_boundary(i + char_len));
879889
Some((i, i + char_len))
880890
} else {
881891
None
@@ -922,6 +932,8 @@ unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a,
922932
kani::assume(char_len >= 1 && char_len <= 4);
923933
kani::assume(i <= self.haystack.len());
924934
kani::assume(char_len <= self.haystack.len() - i);
935+
kani::assume(self.haystack.is_char_boundary(i));
936+
kani::assume(self.haystack.is_char_boundary(i + char_len));
925937
Some((i, i + char_len))
926938
} else {
927939
None
@@ -947,6 +959,8 @@ unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a,
947959
kani::assume(char_len >= 1 && char_len <= 4);
948960
kani::assume(i <= self.haystack.len());
949961
kani::assume(char_len <= self.haystack.len() - i);
962+
kani::assume(self.haystack.is_char_boundary(i));
963+
kani::assume(self.haystack.is_char_boundary(i + char_len));
950964
Some((i, i + char_len))
951965
} else {
952966
None
@@ -1831,7 +1845,9 @@ impl TwoWaySearcher {
18311845
// Under Kani, abstract away maximal_suffix computation which has deeply
18321846
// nested loops intractable for CBMC. Instead, produce a nondeterministic
18331847
// TwoWaySearcher satisfying the type invariant. This is sound because:
1834-
// - All TwoWaySearcher code is safe Rust (no UB possible regardless of field values)
1848+
// - All TwoWaySearcher code is safe Rust (no UB possible regardless of field values);
1849+
// under #[cfg(kani)], the unsafe helper routines (e.g., raw pointer reads in
1850+
// maximal_suffix) are not exercised by the Kani abstractions
18351851
// - The StrSearcher wrapper's UTF-8 boundary correction is what we actually verify
18361852
// - The real new() is tested by Rust's own test suite for correctness
18371853
#[cfg(kani)]
@@ -1843,7 +1859,7 @@ impl TwoWaySearcher {
18431859
let crit_pos_back: usize = kani::any();
18441860
kani::assume(crit_pos_back < needle_len);
18451861
let period: usize = kani::any();
1846-
kani::assume(period >= 1 && period <= needle_len);
1862+
kani::assume(period >= 1 && period <= needle_len + 1);
18471863
let is_long: bool = kani::any();
18481864
TwoWaySearcher {
18491865
crit_pos,
@@ -2538,8 +2554,6 @@ unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool {
25382554
}
25392555
}
25402556

2541-
2542-
25432557
#[cfg(kani)]
25442558
#[unstable(feature = "kani", issue = "none")]
25452559
pub mod verify {

0 commit comments

Comments
 (0)