Add cut-point rules for specific functions / intrinsics (via definition)) (#960)

dkcumming · web-flow · commit 2c84b7f3517e · 2026-03-03T12:23:12.000+10:00
This PR builds upon #931 modifying the approach in response to the comments on that PR. For full context read #931 _first_. The `kmir prove-rs` flag `--break-on-function` is implemented in this PR as a compiled definition with hooked function to retrieve the function names to match on. This is similar to the already existing pattern that compiles the static data of a KMIR configuration into the definition. This allows for functions to be provided both when creating the initial proof, and when reading from disc (triggers a recompile of llvm if different flags are provided). I added a test to demonstrate this working on functions and intrinsics, only matching those provided. I do not have a test from reading a partial proof and adding different function names - I did test it but it seemed a bit overboard for a test just now. I did try the method with [K shell access impure function](https://github.com/runtimeverification/k/blob/master/k-distribution/include/kframework/builtin/domains.md#shell-access), however this created branching for every function call since the result was stored in a symbolic value. I couldn't figure out how to get that working concretely (I don't think it is possible but might be wrong).
diff --git a/kmir/src/kmir/__main__.py b/kmir/src/kmir/__main__.py
@@ -410,6 +410,13 @@ def _arg_parser() -> ArgumentParser:
         action='store_true',
         help='Break on every MIR step (statements and terminators)',
     )
+    prove_args.add_argument(
+        '--break-on-function',
+        dest='break_on_function',
+        action='append',
+        default=None,
+        help='Break when calling functions / intrinsics matching this name (repeatable)',
+    )
 
     proof_args = ArgumentParser(add_help=False)
     proof_args.add_argument('id', metavar='PROOF_ID', help='The id of the proof to view')
@@ -638,6 +645,7 @@ def _parse_args(ns: Namespace) -> KMirOpts:
                 break_every_step=ns.break_every_step,
                 terminate_on_thunk=ns.terminate_on_thunk,
                 add_module=ns.add_module,
+                break_on_function=ns.break_on_function or [],
             )
         case 'link':
             return LinkOpts(
diff --git a/kmir/src/kmir/_prove.py b/kmir/src/kmir/_prove.py
@@ -63,6 +63,7 @@ def _prove_rs(opts: ProveRSOpts, target_path: Path, label: str) -> APRProof:
             symbolic=True,
             haskell_target=opts.haskell_target,
             llvm_lib_target=opts.llvm_lib_target,
+            break_on_function=opts.break_on_function or None,
         )
     else:
         _LOGGER.info(f'Constructing initial proof: {label}')
@@ -92,6 +93,7 @@ def _prove_rs(opts: ProveRSOpts, target_path: Path, label: str) -> APRProof:
             symbolic=True,
             haskell_target=opts.haskell_target,
             llvm_lib_target=opts.llvm_lib_target,
+            break_on_function=opts.break_on_function or None,
         )
 
         proof = apr_proof_from_smir(
@@ -122,6 +124,7 @@ def _prove_rs(opts: ProveRSOpts, target_path: Path, label: str) -> APRProof:
         break_on_terminator_unreachable=opts.break_on_terminator_unreachable,
         break_every_terminator=opts.break_every_terminator,
         break_every_step=opts.break_every_step,
+        break_on_function=opts.break_on_function,
     )
 
     if opts.max_workers and opts.max_workers > 1:
@@ -251,6 +254,7 @@ def _cut_point_rules(
     break_on_terminator_unreachable: bool,
     break_every_terminator: bool,
     break_every_step: bool,
+    break_on_function: list[str] | None = None,
 ) -> list[str]:
     cut_point_rules = []
     if break_on_thunk:
@@ -291,6 +295,9 @@ def _cut_point_rules(
         or break_every_step
     ):
         cut_point_rules.append('KMIR-CONTROL-FLOW.termCallFunction')
+    if break_on_function:
+        cut_point_rules.append('KMIR-CONTROL-FLOW.termCallFunctionFilter')
+        cut_point_rules.append('KMIR-CONTROL-FLOW.termCallIntrinsicFilter')
     if break_on_terminator_assert or break_every_terminator or break_every_step:
         cut_point_rules.append('KMIR-CONTROL-FLOW.termAssert')
     if break_on_terminator_drop or break_every_terminator or break_every_step:
diff --git a/kmir/src/kmir/kdist/mir-semantics/kmir.md b/kmir/src/kmir/kdist/mir-semantics/kmir.md
@@ -28,8 +28,10 @@ See [`rt/configuration.md`](./rt/configuration.md) for a detailed description of
 ```k
 module KMIR-CONTROL-FLOW
   imports BOOL
+  imports COLLECTIONS
   imports LIST
   imports MAP
+  imports STRING
   imports K-EQUAL
 
   imports MONO
@@ -325,6 +327,15 @@ where the returned result should go.
          => #execIntrinsic(FUNC, ARGS, DEST, SPAN) ~> #continueAt(TARGET)
         </k>
     requires isIntrinsicFunction(FUNC)
+     andBool notBool #functionNameMatchesEnv(getFunctionName(FUNC))
+
+  // Intrinsic function call to a function in the break-on set - same as termCallIntrinsic but separate rule id for cut-point
+  rule [termCallIntrinsicFilter]:
+        <k> #execTerminatorCall(_, FUNC, ARGS, DEST, TARGET, _UNWIND, SPAN) ~> _
+         => #execIntrinsic(FUNC, ARGS, DEST, SPAN) ~> #continueAt(TARGET)
+        </k>
+    requires isIntrinsicFunction(FUNC)
+     andBool #functionNameMatchesEnv(getFunctionName(FUNC))
 
   // Regular function call - full state switching and stack setup
   rule [termCallFunction]:
@@ -342,11 +353,72 @@ where the returned result should go.
        </currentFrame>
        <stack> STACK => ListItem(StackFrame(OLDCALLER, OLDDEST, OLDTARGET, OLDUNWIND, LOCALS)) STACK </stack>
     requires notBool isIntrinsicFunction(FUNC)
+     andBool notBool #functionNameMatchesEnv(getFunctionName(FUNC))
+
+  // Function call to a function in the break-on set - same as termCallFunction but separate rule id for cut-point
+  rule [termCallFunctionFilter]:
+       <k> #execTerminatorCall(FTY, FUNC, ARGS, DEST, TARGET, UNWIND, SPAN) ~> _
+        => #setUpCalleeData(FUNC, ARGS, SPAN)
+       </k>
+       <currentFunc> CALLER => FTY </currentFunc>
+       <currentFrame>
+         <currentBody> _ </currentBody>
+         <caller> OLDCALLER => CALLER </caller>
+         <dest> OLDDEST => DEST </dest>
+         <target> OLDTARGET => TARGET </target>
+         <unwind> OLDUNWIND => UNWIND </unwind>
+         <locals> LOCALS </locals>
+       </currentFrame>
+       <stack> STACK => ListItem(StackFrame(OLDCALLER, OLDDEST, OLDTARGET, OLDUNWIND, LOCALS)) STACK </stack>
+    requires notBool isIntrinsicFunction(FUNC)
+     andBool #functionNameMatchesEnv(getFunctionName(FUNC))
 
   syntax Bool ::= isIntrinsicFunction(MonoItemKind) [function]
   rule isIntrinsicFunction(IntrinsicFunction(_)) => true
   rule isIntrinsicFunction(_) => false [owise]
 
+  syntax String ::= getFunctionName(MonoItemKind) [function, total]
+  //---------------------------------------------------------------
+  rule getFunctionName(monoItemFn(symbol(NAME), _, _)) => NAME
+  rule getFunctionName(monoItemStatic(symbol(NAME), _, _)) => NAME
+  rule getFunctionName(monoItemGlobalAsm(_)) => ""
+  rule getFunctionName(IntrinsicFunction(symbol(NAME))) => NAME
+
+  // Check whether a function name matches any filter in the break-on-functions list.
+  syntax Bool ::= #functionNameMatchesEnv(String) [function, total]
+  //----------------------------------------------------------------
+  rule #functionNameMatchesEnv(NAME) => #functionNameMatchesEnvStr(NAME, #breakOnFunctionsString(0))
+
+  // The Int argument is unused; it exists only so the Haskell backend can
+  // pattern-match on it and not error since zero-argument functions cannot use [owise].
+  syntax String ::= #breakOnFunctionsString(Int) [function, total, symbol(breakOnFunctionsString)]
+  //-----------------------------------------------------------------------------------------------
+  rule #breakOnFunctionsString(_) => "" [owise] // This gets overridden by corresponding python function
+
+  syntax Bool ::= #functionNameMatchesEnvStr(String, String) [function, total]
+  //--------------------------------------------------------------------------
+  rule #functionNameMatchesEnvStr(_, "") => false
+  rule #functionNameMatchesEnvStr(NAME, ENV) => #functionNameMatchesAnyList(NAME, #splitSemicolon(ENV))
+    requires ENV =/=String ""
+
+  syntax List ::= #splitSemicolon(String) [function, total]
+  //--------------------------------------------------------
+  rule #splitSemicolon(S) => #splitSemicolonAux(S, findString(S, ";", 0))
+
+  syntax List ::= #splitSemicolonAux(String, Int) [function, total]
+  //-----------------------------------------------------------------
+  rule #splitSemicolonAux(S, -1) => ListItem(S)
+  rule #splitSemicolonAux(S, I) =>
+      ListItem(substrString(S, 0, I)) #splitSemicolon(substrString(S, I +Int 1, lengthString(S)))
+    requires I >=Int 0
+
+  syntax Bool ::= #functionNameMatchesAnyList(String, List) [function, total]
+  //-------------------------------------------------------------------------
+  rule #functionNameMatchesAnyList(_, .List) => false
+  rule #functionNameMatchesAnyList(NAME, ListItem(FILTER:String) REST) =>
+      0 <=Int findString(NAME, FILTER, 0) orBool #functionNameMatchesAnyList(NAME, REST)
+  rule #functionNameMatchesAnyList(_, _) => false [owise]
+
   syntax KItem ::= #continueAt(MaybeBasicBlockIdx)
   rule <k> #continueAt(someBasicBlockIdx(TARGET)) => #execBlockIdx(TARGET) ... </k>
   rule <k> #continueAt(noBasicBlockIdx) => .K ... </k>
diff --git a/kmir/src/kmir/kmir.py b/kmir/src/kmir/kmir.py
@@ -63,6 +63,7 @@ def from_kompiled_kore(
         llvm_target: str | None = None,
         llvm_lib_target: str | None = None,
         haskell_target: str | None = None,
+        break_on_function: list[str] | None = None,
     ) -> KMIR:
         from .kompile import kompile_smir
 
@@ -75,6 +76,7 @@ def from_kompiled_kore(
             llvm_target=llvm_target,
             llvm_lib_target=llvm_lib_target,
             haskell_target=haskell_target,
+            break_on_function=break_on_function,
         )
         return kompiled_smir.create_kmir(bug_report_file=bug_report)
 
diff --git a/kmir/src/kmir/kompile.py b/kmir/src/kmir/kompile.py
@@ -65,6 +65,7 @@ class KompileDigest:
     llvm_target: str
     llvm_lib_target: str
     haskell_target: str
+    break_on_function: str
 
     @staticmethod
     def load(target_dir: Path) -> KompileDigest:
@@ -80,6 +81,7 @@ def load(target_dir: Path) -> KompileDigest:
             llvm_target=data['llvm-target'],
             llvm_lib_target=data['llvm-lib-target'],
             haskell_target=data['haskell-target'],
+            break_on_function=data.get('break-on-function', ''),
         )
 
     def write(self, target_dir: Path) -> None:
@@ -91,6 +93,7 @@ def write(self, target_dir: Path) -> None:
                     'llvm-target': self.llvm_target,
                     'llvm-lib-target': self.llvm_lib_target,
                     'haskell-target': self.haskell_target,
+                    'break-on-function': self.break_on_function,
                 },
             ),
         )
@@ -205,6 +208,7 @@ def kompile_smir(
     llvm_target: str | None = None,
     llvm_lib_target: str | None = None,
     haskell_target: str | None = None,
+    break_on_function: list[str] | None = None,
 ) -> KompiledSMIR:
     kompile_digest: KompileDigest | None = None
     try:
@@ -222,6 +226,7 @@ def kompile_smir(
         llvm_target=llvm_target,
         llvm_lib_target=llvm_lib_target,
         haskell_target=haskell_target,
+        break_on_function=';'.join(break_on_function) if break_on_function else '',
     )
 
     target_hs_path = target_dir / 'haskell'
@@ -242,7 +247,7 @@ def kompile_smir(
 
     haskell_def_dir = kdist.which(haskell_target)
     kmir = KMIR(haskell_def_dir)
-    smir_rules: list[Sentence] = list(make_kore_rules(kmir, smir_info))
+    smir_rules: list[Sentence] = list(make_kore_rules(kmir, smir_info, break_on_function=break_on_function))
     _LOGGER.info(f'Generated {len(smir_rules)} function equations to add to `definition.kore')
 
     # Load and convert extra module rules if provided
@@ -437,7 +442,9 @@ def _make_stratified_rules(
     return [*declarations, *dispatch, *defaults, *equations]
 
 
-def make_kore_rules(kmir: KMIR, smir_info: SMIRInfo) -> Sequence[Sentence]:
+def make_kore_rules(
+    kmir: KMIR, smir_info: SMIRInfo, *, break_on_function: list[str] | None = None
+) -> Sequence[Sentence]:
     # kprint tool is too chatty
     kprint_logger = logging.getLogger('pyk.ktool.kprint')
     kprint_logger.setLevel(logging.WARNING)
@@ -489,7 +496,12 @@ def get_int_arg(app: KInner) -> int:
         kmir, 'lookupAlloc', 'AllocId', 'Evaluation', 'allocId', allocs, invalid_alloc_n
     )
 
-    return [*equations, *type_equations, *alloc_equations]
+    # Generate break-on-function filter rule if filters are provided
+    break_on_rules: list[Axiom] = []
+    if break_on_function:
+        break_on_rules.append(_mk_break_on_functions_rule(kmir, break_on_function))
+
+    return [*equations, *type_equations, *alloc_equations, *break_on_rules]
 
 
 def _functions(kmir: KMIR, smir_info: SMIRInfo) -> dict[int, KInner]:
@@ -544,6 +556,30 @@ def _mk_equation(kmir: KMIR, fun: str, arg: KInner, arg_sort: str, result: KInne
     return rule.to_axiom()
 
 
+def _mk_break_on_functions_rule(kmir: KMIR, break_on_function: list[str]) -> Axiom:
+    """Generate Kore rule for filtering function breaks: `#breakOnFunctionsString(0) => "filter1;filter2;..."`"""
+    from pyk.kore.prelude import int_dv
+    from pyk.kore.rule import FunctionRule
+
+    filter_string = ';'.join(break_on_function)
+    fun_app = App('LblbreakOnFunctionsString', (), (int_dv(0),))
+    result_kore = kmir.kast_to_kore(stringToken(filter_string), KSort('String'))
+
+    rule = FunctionRule(
+        lhs=fun_app,
+        rhs=result_kore,
+        req=None,
+        ens=None,
+        sort=SortApp('SortString'),
+        arg_sorts=(SortApp('SortInt'),),
+        anti_left=None,
+        priority=50,
+        uid='breakOnFunctionsString-generated',
+        label='breakOnFunctionsString-generated',
+    )
+    return rule.to_axiom()
+
+
 def _decode_alloc(smir_info: SMIRInfo, raw_alloc: Any) -> tuple[KInner, KInner]:
     from .decoding import UnableToDecodeValue, decode_alloc_or_unable
 
diff --git a/kmir/src/kmir/options.py b/kmir/src/kmir/options.py
@@ -85,6 +85,7 @@ class ProveOpts(KMirOpts):
     break_every_terminator: bool
     break_every_step: bool
     terminate_on_thunk: bool
+    break_on_function: list[str]
 
     def __init__(
         self,
@@ -113,6 +114,7 @@ def __init__(
         break_every_terminator: bool = False,
         break_every_step: bool = False,
         terminate_on_thunk: bool = False,
+        break_on_function: list[str] | None = None,
     ) -> None:
         self.proof_dir = Path(proof_dir).resolve() if proof_dir is not None else None
         self.haskell_target = haskell_target
@@ -138,6 +140,7 @@ def __init__(
         self.break_every_terminator = break_every_terminator
         self.break_every_step = break_every_step
         self.terminate_on_thunk = terminate_on_thunk
+        self.break_on_function = break_on_function if break_on_function is not None else []
 
 
 @dataclass
@@ -182,6 +185,7 @@ def __init__(
         break_every_step: bool = False,
         terminate_on_thunk: bool = False,
         add_module: Path | None = None,
+        break_on_function: list[str] | None = None,
     ) -> None:
         self.rs_file = rs_file
         self.proof_dir = Path(proof_dir).resolve() if proof_dir is not None else None
@@ -213,6 +217,7 @@ def __init__(
         self.break_every_step = break_every_step
         self.terminate_on_thunk = terminate_on_thunk
         self.add_module = add_module
+        self.break_on_function = break_on_function if break_on_function is not None else []
 
 
 @dataclass
diff --git a/kmir/src/tests/integration/data/prove-rs/break-on-function.rs b/kmir/src/tests/integration/data/prove-rs/break-on-function.rs
@@ -0,0 +1,15 @@
+#![feature(core_intrinsics)]
+
+fn foo() {
+    let x = std::hint::black_box(42);
+    bar();
+    assert!(x == 42);
+}
+
+fn bar() {
+    std::intrinsics::assert_inhabited::<i32>();
+}
+
+fn main() {
+    foo();
+}
diff --git a/kmir/src/tests/integration/data/prove-rs/show/break-on-function.main.cli-break-on-function.expected b/kmir/src/tests/integration/data/prove-rs/show/break-on-function.main.cli-break-on-function.expected
@@ -0,0 +1,50 @@
+
+┌─ 1 (root, init)
+│   #execTerminator ( terminator ( ... kind: terminatorKindCall ( ... func: operandC
+│   span: src/rust/library/std/src/rt.rs:194
+│
+│  (7 steps)
+├─ 3
+│   #execTerminatorCall ( ty ( 31 ) , monoItemFn ( ... name: symbol ( "foo" ) , id:
+│   function: main
+│   span: /prove-rs/break-on-function.rs:4
+│
+│  (1 step)
+├─ 4
+│   #setUpCalleeData ( monoItemFn ( ... name: symbol ( "foo" ) , id: defId ( 7 ) , b
+│   function: foo
+│   span: /prove-rs/break-on-function.rs:4
+│
+│  (5 steps)
+├─ 5
+│   #execTerminatorCall ( ty ( 26 ) , monoItemFn ( ... name: symbol ( "std::hint::bl
+│   function: foo
+│   span: /rust/library/core/src/hint.rs:389
+│
+│  (1 step)
+├─ 6
+│   #setUpCalleeData ( monoItemFn ( ... name: symbol ( "std::hint::black_box::<i32>"
+│   function: std::hint::black_box::<i32>
+│   span: /rust/library/core/src/hint.rs:389
+│
+│  (12 steps)
+├─ 7
+│   #execTerminatorCall ( ty ( 25 ) , IntrinsicFunction ( symbol ( "black_box" ) ) ,
+│   function: std::hint::black_box::<i32>
+│   span: /rust/library/core/src/hint.rs:389
+│
+│  (1 step)
+├─ 8
+│   #execIntrinsic ( IntrinsicFunction ( symbol ( "black_box" ) ) , operandMove ( pl
+│   function: std::hint::black_box::<i32>
+│   span: /rust/library/core/src/hint.rs:389
+│
+│  (41 steps)
+├─ 9 (terminal)
+│   #EndProgram ~> .K
+│   function: main
+│
+┊  constraint: true
+┊  subst: ...
+└─ 2 (leaf, target, terminal)
+    #EndProgram ~> .K
diff --git a/kmir/src/tests/integration/test_cli.py b/kmir/src/tests/integration/test_cli.py