Skip to content

Commit 4a9c0fc

Browse files
committed
Add UTF-8 boundary constraints, fix overflow, and improve docs
Address review feedback: - Add is_char_boundary constraints to CharSearcher and MCES abstractions - Fix potential overflow in kani::assume using subtraction form - Document stubs as deliberate overapproximations - Document ASCII-only test_haystack rationale - Remove duplicate doc line
1 parent d763699 commit 4a9c0fc

1 file changed

Lines changed: 29 additions & 8 deletions

File tree

library/core/src/str/pattern.rs

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
490490
kani::assume(a >= self.finger);
491491
kani::assume(a <= self.finger_back); // avoid overflow in a + w
492492
kani::assume(w <= self.finger_back - a);
493+
kani::assume(self.haystack.is_char_boundary(a));
494+
kani::assume(self.haystack.is_char_boundary(a + w));
493495
self.finger = a + w;
494496
Some((a, self.finger))
495497
} else {
@@ -527,8 +529,9 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
527529
let old_finger = self.finger;
528530
let w: usize = kani::any();
529531
kani::assume(w >= 1 && w <= 4);
530-
kani::assume(old_finger + w <= self.finger_back);
532+
kani::assume(w <= self.finger_back - old_finger);
531533
self.finger = old_finger + w;
534+
kani::assume(self.haystack.is_char_boundary(self.finger));
532535
Some((old_finger, self.finger))
533536
} else {
534537
self.finger = self.finger_back;
@@ -629,6 +632,8 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
629632
kani::assume(a >= self.finger);
630633
kani::assume(a <= self.finger_back); // avoid overflow in a + w
631634
kani::assume(w <= self.finger_back - a);
635+
kani::assume(self.haystack.is_char_boundary(a));
636+
kani::assume(self.haystack.is_char_boundary(a + w));
632637
self.finger_back = a;
633638
Some((a, a + w))
634639
} else {
@@ -661,8 +666,9 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
661666
let old_finger_back = self.finger_back;
662667
let w: usize = kani::any();
663668
kani::assume(w >= 1 && w <= 4);
664-
kani::assume(self.finger + w <= old_finger_back);
669+
kani::assume(w <= old_finger_back - self.finger);
665670
self.finger_back = old_finger_back - w;
671+
kani::assume(self.haystack.is_char_boundary(self.finger_back));
666672
Some((self.finger_back, old_finger_back))
667673
} else {
668674
self.finger_back = self.finger;
@@ -851,6 +857,8 @@ unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
851857
kani::assume(char_len >= 1 && char_len <= 4);
852858
kani::assume(i <= self.haystack.len());
853859
kani::assume(char_len <= self.haystack.len() - i);
860+
kani::assume(self.haystack.is_char_boundary(i));
861+
kani::assume(self.haystack.is_char_boundary(i + char_len));
854862
Some((i, i + char_len))
855863
} else {
856864
None
@@ -876,6 +884,8 @@ unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
876884
kani::assume(char_len >= 1 && char_len <= 4);
877885
kani::assume(i <= self.haystack.len());
878886
kani::assume(char_len <= self.haystack.len() - i);
887+
kani::assume(self.haystack.is_char_boundary(i));
888+
kani::assume(self.haystack.is_char_boundary(i + char_len));
879889
Some((i, i + char_len))
880890
} else {
881891
None
@@ -922,6 +932,8 @@ unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a,
922932
kani::assume(char_len >= 1 && char_len <= 4);
923933
kani::assume(i <= self.haystack.len());
924934
kani::assume(char_len <= self.haystack.len() - i);
935+
kani::assume(self.haystack.is_char_boundary(i));
936+
kani::assume(self.haystack.is_char_boundary(i + char_len));
925937
Some((i, i + char_len))
926938
} else {
927939
None
@@ -947,6 +959,8 @@ unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a,
947959
kani::assume(char_len >= 1 && char_len <= 4);
948960
kani::assume(i <= self.haystack.len());
949961
kani::assume(char_len <= self.haystack.len() - i);
962+
kani::assume(self.haystack.is_char_boundary(i));
963+
kani::assume(self.haystack.is_char_boundary(i + char_len));
950964
Some((i, i + char_len))
951965
} else {
952966
None
@@ -2347,10 +2361,15 @@ pub mod verify_searchers {
23472361
}
23482362

23492363
/// Generate a haystack covering structural cases.
2350-
/// These concrete strings cover the key structural cases:
2364+
/// These concrete ASCII strings cover the key structural cases:
23512365
/// - Empty (finger == finger_back)
23522366
/// - Single char (one iteration)
23532367
/// - Multi-char (iteration logic)
2368+
///
2369+
/// ASCII-only is sufficient because the #[cfg(kani)] abstractions constrain
2370+
/// returned indices to `is_char_boundary` positions, and the harnesses verify
2371+
/// boundary-preservation in postconditions. The abstractions themselves are
2372+
/// haystack-content-independent overapproximations.
23542373
fn test_haystack() -> &'static str {
23552374
let choice: u8 = kani::any();
23562375
match choice % 3 {
@@ -2371,8 +2390,10 @@ pub mod verify_searchers {
23712390
// complex memchr implementation.
23722391
//=========================================================================
23732392

2374-
/// Abstract stub for memchr: returns the first index of byte `x` in `text`,
2375-
/// or None if not found.
2393+
/// Abstract stub for memchr: overapproximation that returns *some* index
2394+
/// where `text[index] == x`, or None. Does not enforce "first occurrence"
2395+
/// semantics — this is sound because our proofs verify safety properties
2396+
/// that hold for ANY valid matching index, not just the first.
23762397
fn stub_memchr(x: u8, text: &[u8]) -> Option<usize> {
23772398
if kani::any() {
23782399
let index: usize = kani::any();
@@ -2384,8 +2405,9 @@ pub mod verify_searchers {
23842405
}
23852406
}
23862407

2387-
/// Abstract stub for memrchr: returns the last index of byte `x` in `text`,
2388-
/// or None if not found.
2408+
/// Abstract stub for memrchr: overapproximation that returns *some* index
2409+
/// where `text[index] == x`, or None. Does not enforce "last occurrence"
2410+
/// semantics — sound for the same reason as stub_memchr above.
23892411
fn stub_memrchr(x: u8, text: &[u8]) -> Option<usize> {
23902412
if kani::any() {
23912413
let index: usize = kani::any();
@@ -2563,7 +2585,6 @@ pub mod verify_searchers {
25632585
// MultiCharEqSearcher Verification (Group B -- all safe code)
25642586
//=========================================================================
25652587

2566-
/// Verify into_searcher establishes MultiCharEqSearcher invariant.
25672588
/// Verify into_searcher establishes the MultiCharEqSearcher type invariant.
25682589
/// Uses empty haystack because MCES is entirely safe code (no unsafe blocks),
25692590
/// and CharIndices over non-empty strings creates an intractably large CBMC model.

0 commit comments

Comments
 (0)