fix(symbolic-proofs): preserve list invariants and harden stats

Stevengre · Stevengre · commit c1b27dacd9b5 · 2026-04-13T03:15:12.000Z
diff --git a/kmir/src/kmir/_prove.py b/kmir/src/kmir/_prove.py
@@ -30,7 +30,6 @@
 
 _LOGGER: Final = logging.getLogger(__name__)
 
-
 def prove(opts: ProveOpts) -> APRProof:
     if not opts.rs_file.is_file():
         raise ValueError(f'Input file does not exist: {opts.rs_file}')
@@ -128,7 +127,6 @@ def _prove(opts: ProveOpts, target_path: Path, label: str, *, allow_rpc_recovery
         break_every_step=opts.break_every_step,
         break_on_function=opts.break_on_function,
     )
-
     try:
         if opts.max_workers and opts.max_workers > 1:
             _prove_parallel(kmir, proof, opts=opts, label=label, cut_point_rules=cut_point_rules)
diff --git a/kmir/src/kmir/kast.py b/kmir/src/kmir/kast.py
@@ -380,7 +380,11 @@ def _symbolic_value(self, ty: Ty, mutable: bool) -> tuple[KInner, Iterable[KInne
                     mlEqualsTrue(leInt(variant_var, token(max_variant))),
                 ]
                 args = self._fresh_var('ENUM_ARGS')
-                return KApply('Value::Aggregate', (KApply('variantIdx', (variant_var,)), args)), idx_range, None
+                return (
+                    KApply('Value::Aggregate', (KApply('variantIdx', (variant_var,)), args)),
+                    idx_range + [mlEqualsTrue(KApply('allValues', (args,)))],
+                    None,
+                )
 
             case StructT(_, _, fields):
                 field_vars: list[KInner] = []
@@ -397,14 +401,18 @@ def _symbolic_value(self, ty: Ty, mutable: bool) -> tuple[KInner, Iterable[KInne
 
             case UnionT():
                 args = self._fresh_var('ARG_UNION')
-                return KApply('Value::Aggregate', (KApply('variantIdx', (token(0),)), args)), [], None
+                return (
+                    KApply('Value::Aggregate', (KApply('variantIdx', (token(0),)), args)),
+                    [mlEqualsTrue(KApply('allValues', (args,)))],
+                    None,
+                )
 
             case ArrayT(_, None):
                 elems = self._fresh_var('ARG_ARRAY')
                 l = self._fresh_var('ARG_ARRAY_LEN')
                 return (
                     KApply('Value::Range', (elems,)),
-                    [mlEqualsTrue(eqInt(KApply('sizeList', (elems,)), l))],
+                    [mlEqualsTrue(eqInt(KApply('sizeList', (elems,)), l)), mlEqualsTrue(KApply('allValues', (elems,)))],
                     KApply(
                         'Metadata',
                         (
diff --git a/kmir/src/kmir/kdist/mir-semantics/kmir-ast.md b/kmir/src/kmir/kdist/mir-semantics/kmir-ast.md
@@ -33,5 +33,7 @@ module KMIR-AST
 
   syntax TypeMappings ::= List{TypeMapping, ""} [group(mir-list), symbol(TypeMappings::append), terminator-symbol(TypeMappings::empty)]
 
+  syntax Bool ::= allValues ( List ) [function, total, symbol(allValues)]
+
 endmodule
 ```
diff --git a/kmir/src/kmir/kdist/mir-semantics/lemmas/kmir-lemmas.md b/kmir/src/kmir/kdist/mir-semantics/lemmas/kmir-lemmas.md
@@ -16,6 +16,7 @@ module KMIR-LEMMAS
   imports INT-SYMBOLIC
   imports BOOL
 
+  imports KMIR-AST
   imports RT-DATA
 ```
 ## Simplifications for lists to avoid spurious branching on error cases in control flow
@@ -33,6 +34,39 @@ The lists used in the semantics are cons-lists, so only rules with a head elemen
     [simplification, symbolic(REST)]
 
   rule 0 <=Int size(_LIST:List) => true [simplification]
+
+  // `#reserveSlots` grows `ownedSlots` and `<slotStore>` in lockstep. These simplifications
+  // let `frameLocal` peel away irrelevant tail updates when reading an older local, and
+  // directly return the newly-added local when the read reaches the matching tail slot.
+  rule frameLocal(_STORE[SLOT <- LOCAL], SLOTS ListItem(SLOT), size(SLOTS)) => LOCAL
+    requires isTypedLocal(LOCAL)
+    [simplification]
+
+  rule frameLocal(STORE[SLOT <- _], SLOTS ListItem(SLOT), IDX) => frameLocal(STORE, SLOTS, IDX)
+    requires 0 <=Int IDX andBool IDX <Int size(SLOTS)
+    [simplification]
+
+  // --------------------------------------------------
+  rule allValues(.List) => true
+  rule allValues(ListItem(_:Value) REST) => allValues(REST)
+  rule allValues(ListItem(_) _REST) => false [owise]
+
+  // Symbolic prove-rs inputs use fresh `List` variables to stand for arrays, slices,
+  // and aggregate argument lists whose elements are still runtime `Value`s. Carrying
+  // that invariant explicitly lets reads and writes avoid spurious branches on the
+  // underlying builtin `List:get` / `List:set` definedness checks.
+  rule isValue(ELEMS[IDX])
+    => true
+    requires allValues(ELEMS)
+     andBool 0 <=Int IDX
+     andBool IDX <Int size(ELEMS)
+    [simplification, symbolic(ELEMS)]
+
+  rule #Ceil(ELEMS[IDX <- _VAL:Value])
+    => #Ceil(ELEMS)
+     #And {true #Equals allValues(ELEMS)}
+     #And {true #Equals 0 <=Int IDX andBool IDX <Int size(ELEMS)}
+    [simplification, symbolic(ELEMS)]
 ```
 
 The hooked `range` function selects a segment from a list, by removing elements from front and back.
diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/data.md b/kmir/src/kmir/kdist/mir-semantics/rt/data.md
@@ -60,17 +60,6 @@ More often than not, a slot or list element must be selected by index and is req
     requires 0 <=Int IDX andBool IDX <Int size(SLOTS)
      [preserves-definedness]
 
-  // Fresh callee slots are appended to the frame list and written into the store in lockstep.
-  // These simplifications let current-frame reads reduce through the most recent updates even
-  // when the slot ids themselves are symbolic fresh values.
-  rule frameLocal(_STORE[SLOT <- LOCAL], SLOTS ListItem(SLOT), size(SLOTS)) => LOCAL
-    requires isTypedLocal(LOCAL)
-    [simplification]
-
-  rule frameLocal(STORE[SLOT <- _], SLOTS ListItem(SLOT), IDX) => frameLocal(STORE, SLOTS, IDX)
-    requires 0 <=Int IDX andBool IDX <Int size(SLOTS)
-    [simplification]
-
   // indexing values out of TypedValue, runtime slots, and Value lists
   syntax Value ::= getSlotValue ( Map, Int ) [function]
                  | frameValue   ( Map, List, Int ) [function]
@@ -241,13 +230,23 @@ If we are setting a value at a `Place` which has `Projection`s in it, then we mu
      andBool isNewLocal(getSlot(STORE, SLOT))
     [preserves-definedness] // valid lookup checked
 
-  rule <k> #setLocalValue(place(local(I), .ProjectionElems), VAL:Value)
-        => #setSlotValue(#frameSlotId(SLOTS, I), VAL)
-        ...
-       </k>
+  rule <k> #setLocalValue(place(local(I), .ProjectionElems), VAL:Value) => .K ... </k>
+       <currentFrame> <ownedSlots> SLOTS </ownedSlots> ... </currentFrame>
+       <slotStore>
+          STORE => STORE[#frameSlotId(SLOTS, I) <- typedValue(VAL, tyOfLocal(frameLocal(STORE, SLOTS, I)), mutabilityOf(frameLocal(STORE, SLOTS, I)))]
+       </slotStore>
+    requires 0 <=Int I andBool I <Int size(SLOTS)
+     andBool isTypedValue(frameLocal(STORE, SLOTS, I))
+    [preserves-definedness] // valid slot indexing and lookup checked
+
+  rule <k> #setLocalValue(place(local(I), .ProjectionElems), VAL:Value) => .K ... </k>
        <currentFrame> <ownedSlots> SLOTS </ownedSlots> ... </currentFrame>
+       <slotStore>
+          STORE => STORE[#frameSlotId(SLOTS, I) <- typedValue(VAL, tyOfLocal(frameLocal(STORE, SLOTS, I)), mutabilityOf(frameLocal(STORE, SLOTS, I)))]
+       </slotStore>
     requires 0 <=Int I andBool I <Int size(SLOTS)
-    [preserves-definedness] // valid slot indexing checked
+     andBool isNewLocal(frameLocal(STORE, SLOTS, I))
+    [preserves-definedness] // valid slot indexing and lookup checked
 
   rule <k> #setLocalValue(place(local(I), PROJ), VAL:Value)
         => #traverseProjection(toSlot(#frameSlotId(SLOTS, I)), frameValue(STORE, SLOTS, I), PROJ, .Contexts)
diff --git a/kmir/src/kmir/utils.py b/kmir/src/kmir/utils.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import heapq
 import re
 from pathlib import Path
 from typing import TYPE_CHECKING, Sequence
@@ -177,53 +178,62 @@ def classify(node_id: int) -> str:
     reachable_leaf_count = 0
     leaf_lines: list[str] = []
 
-    def _path_nodes(source_id: int, path: Sequence[KCFG.Successor]) -> list[int]:
+    def _successor_edges(source_id: int) -> list[tuple[int, int]]:
         from pyk.kcfg.kcfg import KCFG as _KCFG
 
-        node_ids = [source_id]
-        current = source_id
-        for succ in path:
-            target_id: int | None = None
-            if isinstance(succ, _KCFG.EdgeLike):
-                target_id = succ.target.id
-            elif isinstance(succ, _KCFG.MultiEdge):
-                targets = list(succ.targets)
-                if len(targets) == 1:
-                    target_id = targets[0].id
-            if target_id is not None and target_id != current:
-                node_ids.append(target_id)
-                current = target_id
-        return node_ids
-
-    for leaf in sorted(leaves, key=lambda n: n.id):
-        paths = kcfg.paths_between(proof.init, leaf.id)
-        if not paths:
-            leaf_lines.append(f'  leaf {leaf.id}: unreachable from init')
-            continue
+        edges: list[tuple[int, int]] = []
+        for succ in kcfg.successors(source_id):
+            match succ:
+                case _KCFG.Edge(target=target, depth=depth):
+                    edges.append((target.id, depth))
+                case _KCFG.MergedEdge(target=target, edges=merged_edges):
+                    edges.append((target.id, min(edge.depth for edge in merged_edges)))
+                case _KCFG.Cover(target=target):
+                    edges.append((target.id, 0))
+                case _KCFG.Split(targets=targets):
+                    edges.extend((target.id, 0) for target in targets)
+                case _KCFG.NDBranch(targets=targets):
+                    edges.extend((target.id, 1) for target in targets)
+                case _:
+                    raise ValueError(f'Cannot handle Successor type: {type(succ)}')
+        return edges
 
-        path_infos: list[tuple[int, tuple[int, ...]]] = []
-        seen_sequences: set[tuple[int, ...]] = set()
+    shortest_steps: dict[int, int] = {proof.init: 0}
+    shortest_prev: dict[int, int] = {}
+    worklist: list[tuple[int, int]] = [(0, proof.init)]
 
-        for path in paths:
-            steps = kcfg.path_length(path)
-            node_seq = tuple(_path_nodes(proof.init, path))
-            if node_seq in seen_sequences:
-                continue
-            seen_sequences.add(node_seq)
-            path_infos.append((steps, node_seq))
+    while worklist:
+        curr_steps, node_id = heapq.heappop(worklist)
+        if curr_steps != shortest_steps.get(node_id):
+            continue
+        for target_id, weight in sorted(_successor_edges(node_id)):
+            next_steps = curr_steps + weight
+            prev_steps = shortest_steps.get(target_id)
+            # Keep the first equal-cost predecessor. Rewriting predecessors on
+            # ties can create zero-cost cycles through Cover/Split edges and
+            # make path reconstruction loop forever.
+            if prev_steps is None or next_steps < prev_steps:
+                shortest_steps[target_id] = next_steps
+                shortest_prev[target_id] = node_id
+                heapq.heappush(worklist, (next_steps, target_id))
+
+    def _shortest_path_nodes(target_id: int) -> list[int]:
+        node_ids = [target_id]
+        while node_ids[-1] != proof.init:
+            node_ids.append(shortest_prev[node_ids[-1]])
+        node_ids.reverse()
+        return node_ids
 
-        if not path_infos:
+    for leaf in sorted(leaves, key=lambda n: n.id):
+        min_steps = shortest_steps.get(leaf.id)
+        if min_steps is None:
             leaf_lines.append(f'  leaf {leaf.id}: unreachable from init')
             continue
 
-        total_steps += min(steps for steps, _ in path_infos)
+        total_steps += min_steps
         reachable_leaf_count += 1
-        path_infos.sort(key=lambda info: (info[0], info[1]))
-
-        for idx, (steps, node_seq) in enumerate(path_infos, start=1):
-            suffix = '' if len(path_infos) == 1 else f' (path {idx}/{len(path_infos)})'
-            seq_str = ' -> '.join(str(nid) for nid in node_seq)
-            leaf_lines.append(f'  leaf {leaf.id}{suffix}: steps {steps}, path {seq_str}')
+        seq_str = ' -> '.join(str(nid) for nid in _shortest_path_nodes(leaf.id))
+        leaf_lines.append(f'  leaf {leaf.id}: shortest steps {min_steps}, path {seq_str}')
 
     lines.append(f'  total leaves (non-root): {len(leaves)}')
     lines.append(f'  reachable leaves       : {reachable_leaf_count}')
diff --git a/kmir/src/tests/integration/test_cli.py b/kmir/src/tests/integration/test_cli.py
@@ -22,6 +22,7 @@
 # they don't differ between local checkouts and CI (e.g. symbolic-args-fail.main.cli-stats-leaves).
 _REPO_ROOT = str(Path(__file__).resolve().parents[4])
 _PATH_REPLACEMENTS: dict[str, str] = {_REPO_ROOT + '/': '<REPO>/'}
+_SNAPSHOT_PROVE_MAX_DEPTH = 50
 
 
 def _prove_and_store(
@@ -32,7 +33,8 @@ def _prove_and_store(
     is_smir: bool = False,
     max_depth: int | None = None,
 ) -> APRProof:
-    opts = ProveOpts(rs_or_json, proof_dir=tmp_path, smir=is_smir, start_symbol=start_symbol, max_depth=max_depth)
+    proof_max_depth = _SNAPSHOT_PROVE_MAX_DEPTH if max_depth is None else max_depth
+    opts = ProveOpts(rs_or_json, proof_dir=tmp_path, smir=is_smir, start_symbol=start_symbol, max_depth=proof_max_depth)
     apr_proof = kmir.prove_program(opts)
     apr_proof.write_proof_data()
     return apr_proof
diff --git a/kmir/src/tests/integration/test_integration.py b/kmir/src/tests/integration/test_integration.py
@@ -73,6 +73,11 @@
     'ptr-cast-array-to-singleton-wrapped-array-fail',
 ]
 
+PROVE_EXPECTED_FAILURES = {
+    ('symbolic-args-fail', 'eats_all_args'): False,
+}
+SNAPSHOT_PROVE_MAX_DEPTH = 50
+
 
 @pytest.mark.parametrize(
     'rs_file',
@@ -87,7 +92,7 @@ def test_prove(rs_file: Path, kmir: KMIR, update_expected_output: bool) -> None:
     if update_expected_output and not should_show:
         pytest.skip()
 
-    prove_opts = ProveOpts(rs_file, smir=is_smir, terminate_on_thunk=True)
+    prove_opts = ProveOpts(rs_file, smir=is_smir, terminate_on_thunk=True, max_depth=SNAPSHOT_PROVE_MAX_DEPTH)
     printer = PrettyPrinter(kmir.definition)
     cterm_show = CTermShow(printer.print)
 
@@ -98,6 +103,7 @@ def test_prove(rs_file: Path, kmir: KMIR, update_expected_output: bool) -> None:
     for start_symbol in start_symbols:
         prove_opts.start_symbol = start_symbol
         apr_proof = kmir.prove_program(prove_opts)
+        should_fail = PROVE_EXPECTED_FAILURES.get((rs_file.stem, start_symbol), rs_file.stem.endswith('fail'))
 
         if should_show:
             display_opts = ShowOpts(
diff --git a/kmir/src/tests/unit/test_utils.py b/kmir/src/tests/unit/test_utils.py
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from pyk.cterm import CSubst, CTerm
+from pyk.kast.inner import KApply
+from pyk.kcfg.kcfg import KCFG
+
+from kmir.utils import render_statistics
+
+
+@dataclass
+class _FakeKCFG:
+    nodes: tuple[KCFG.Node, ...]
+    leaves: tuple[KCFG.Node, ...]
+    root_ids: frozenset[int]
+    successor_map: dict[int, tuple[object, ...]]
+
+    def is_root(self, node_id: int) -> bool:
+        return node_id in self.root_ids
+
+    def successors(self, node_id: int) -> tuple[object, ...]:
+        return self.successor_map.get(node_id, ())
+
+    def is_split(self, _node_id: int) -> bool:
+        return False
+
+    def is_ndbranch(self, _node_id: int) -> bool:
+        return False
+
+    def is_stuck(self, _node_id: int) -> bool:
+        return False
+
+
+@dataclass
+class _FakeProof:
+    kcfg: _FakeKCFG
+    init: int
+    pending_ids: frozenset[int]
+
+    def is_target(self, _node_id: int) -> bool:
+        return False
+
+    def is_terminal(self, _node_id: int) -> bool:
+        return False
+
+    def is_refuted(self, _node_id: int) -> bool:
+        return False
+
+    def is_bounded(self, _node_id: int) -> bool:
+        return False
+
+    def is_pending(self, node_id: int) -> bool:
+        return node_id in self.pending_ids
+
+    def is_failing(self, _node_id: int) -> bool:
+        return False
+
+
+def test_render_statistics_handles_zero_cost_predecessor_cycles() -> None:
+    kcfg = KCFG()
+    loop_target = kcfg.create_node(CTerm(KApply('<loopTarget>')))
+    init = kcfg.create_node(CTerm(KApply('<init>')))
+    leaf = kcfg.create_node(CTerm(KApply('<leaf>')))
+
+    fake_kcfg = _FakeKCFG(
+        nodes=(loop_target, init, leaf),
+        leaves=(leaf,),
+        root_ids=frozenset({init.id}),
+        successor_map={
+            init.id: (KCFG.Cover(init, loop_target, CSubst()),),
+            loop_target.id: (
+                KCFG.Cover(loop_target, init, CSubst()),
+                KCFG.Edge(loop_target, leaf, 1, ()),
+            ),
+        },
+    )
+    proof = _FakeProof(fake_kcfg, init=init.id, pending_ids=frozenset({leaf.id}))
+
+    lines = render_statistics(proof)
+
+    assert f'  leaf {leaf.id}: shortest steps 1, path {init.id} -> {loop_target.id} -> {leaf.id}' in lines