feat(quantifiers): Add pure expression inliner infrastructure

feliperodri · feliperodri · commit b6da0e1618f3 · 2026-03-29T22:31:03.000-04:00
Add Expr::substitute_symbol() and inline_as_pure_expr() for inlining
function calls as side-effect-free expression trees. This infrastructure
enables the quantifier-pure-expressions branch to generate CBMC
quantifier bodies without StatementExpression nodes.

Changes:
- cprover_bindings: Expr::substitute_symbol() — recursive symbol
  replacement across all ExprValue variants, with 6 unit tests
- goto_ctx.rs: inline_as_pure_expr() — inlines function calls by
  extracting return expressions, resolving intermediate variables,
  and substituting parameters. Handles StatementExpression flattening
  for checked arithmetic (drops Assert/Assume runtime checks).
- docs/dev/pure-expression-inliner.md — developer documentation
  including soundness implications

Soundness note: The pure inliner drops overflow and division-by-zero
checks when flattening StatementExpression nodes from checked arithmetic.
This is documented as a known trade-off — CBMC requires pure expressions
in quantifier bodies, and runtime checks are side effects. A future
improvement could hoist these checks outside the quantifier.

The existing handle_quantifiers post-pass is NOT modified — this is
purely additive infrastructure. All existing tests pass unchanged.

Full regression: 1289 tests passed, 0 failed.
diff --git a/cprover_bindings/src/goto_program/expr.rs b/cprover_bindings/src/goto_program/expr.rs
@@ -353,6 +353,74 @@ impl Expr {
 
 /// Predicates
 impl Expr {
+    /// Replace all occurrences of `Symbol { identifier: old_id }` with `replacement`.
+    /// Produces a new expression tree with all substitutions applied.
+    /// Used for quantifier codegen to inline function bodies as pure expressions.
+    pub fn substitute_symbol(self, old_id: &InternedString, replacement: &Expr) -> Expr {
+        let loc = self.location;
+        let typ = self.typ.clone();
+        let ann = self.size_of_annotation.clone();
+        let mk = |value: ExprValue| Expr {
+            value: Box::new(value),
+            typ: typ.clone(),
+            location: loc,
+            size_of_annotation: ann.clone(),
+        };
+        let sub = |e: Expr| e.substitute_symbol(old_id, replacement);
+        let sub_vec = |v: Vec<Expr>| v.into_iter().map(|e| sub(e)).collect();
+
+        match *self.value {
+            ExprValue::Symbol { identifier } if identifier == *old_id => {
+                replacement.clone().with_location(loc)
+            }
+            ExprValue::AddressOf(e) => mk(AddressOf(sub(e))),
+            ExprValue::Dereference(e) => mk(Dereference(sub(e))),
+            ExprValue::Typecast(e) => mk(Typecast(sub(e))),
+            ExprValue::UnOp { op, e } => mk(UnOp { op, e: sub(e) }),
+            ExprValue::BinOp { op, lhs, rhs } => mk(BinOp { op, lhs: sub(lhs), rhs: sub(rhs) }),
+            ExprValue::If { c, t, e } => mk(If { c: sub(c), t: sub(t), e: sub(e) }),
+            ExprValue::Index { array, index } => mk(Index { array: sub(array), index: sub(index) }),
+            ExprValue::Member { lhs, field } => mk(Member { lhs: sub(lhs), field }),
+            ExprValue::FunctionCall { function, arguments } => {
+                mk(FunctionCall { function: sub(function), arguments: sub_vec(arguments) })
+            }
+            ExprValue::Array { elems } => mk(Array { elems: sub_vec(elems) }),
+            ExprValue::Struct { values } => mk(Struct { values: sub_vec(values) }),
+            ExprValue::Assign { left, right } => mk(Assign { left: sub(left), right: sub(right) }),
+            ExprValue::ReadOk { ptr, size } => mk(ReadOk { ptr: sub(ptr), size: sub(size) }),
+            ExprValue::ArrayOf { elem } => mk(ArrayOf { elem: sub(elem) }),
+            ExprValue::ByteExtract { e, offset } => mk(ByteExtract { e: sub(e), offset }),
+            ExprValue::SelfOp { op, e } => mk(SelfOp { op, e: sub(e) }),
+            ExprValue::Union { value, field } => mk(Union { value: sub(value), field }),
+            ExprValue::Forall { variable, domain } => {
+                mk(Forall { variable: sub(variable), domain: sub(domain) })
+            }
+            ExprValue::Exists { variable, domain } => {
+                mk(Exists { variable: sub(variable), domain: sub(domain) })
+            }
+            ExprValue::Vector { elems } => mk(Vector { elems: sub_vec(elems) }),
+            ExprValue::ShuffleVector { vector1, vector2, indexes } => mk(ShuffleVector {
+                vector1: sub(vector1),
+                vector2: sub(vector2),
+                indexes: sub_vec(indexes),
+            }),
+            // Leaf nodes and statement expressions — no substitution
+            ExprValue::Symbol { .. }
+            | ExprValue::IntConstant(_)
+            | ExprValue::BoolConstant(_)
+            | ExprValue::CBoolConstant(_)
+            | ExprValue::DoubleConstant(_)
+            | ExprValue::FloatConstant(_)
+            | ExprValue::Float16Constant(_)
+            | ExprValue::Float128Constant(_)
+            | ExprValue::PointerConstant(_)
+            | ExprValue::StringConstant { .. }
+            | ExprValue::Nondet
+            | ExprValue::EmptyUnion
+            | ExprValue::StatementExpression { .. } => self,
+        }
+    }
+
     pub fn is_int_constant(&self) -> bool {
         match *self.value {
             IntConstant(_) => true,
@@ -1762,3 +1830,98 @@ impl Expr {
         exprs
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn sym(name: &str) -> Expr {
+        Expr::symbol_expression(name, Type::signed_int(32))
+    }
+
+    fn int(val: i64) -> Expr {
+        Expr::int_constant(val, Type::signed_int(32))
+    }
+
+    #[test]
+    fn substitute_symbol_leaf_match() {
+        let old: InternedString = "x".into();
+        let replacement = int(42);
+        let result = sym("x").substitute_symbol(&old, &replacement);
+        assert!(matches!(result.value(), ExprValue::IntConstant(v) if *v == 42.into()));
+    }
+
+    #[test]
+    fn substitute_symbol_leaf_no_match() {
+        let old: InternedString = "x".into();
+        let replacement = int(42);
+        let result = sym("y").substitute_symbol(&old, &replacement);
+        assert!(
+            matches!(result.value(), ExprValue::Symbol { identifier } if identifier.to_string() == "y")
+        );
+    }
+
+    #[test]
+    fn substitute_symbol_in_binop() {
+        let old: InternedString = "x".into();
+        let replacement = int(10);
+        // x + 1 → 10 + 1
+        let expr = sym("x").plus(int(1));
+        let result = expr.substitute_symbol(&old, &replacement);
+        if let ExprValue::BinOp { lhs, rhs, .. } = result.value() {
+            assert!(matches!(lhs.value(), ExprValue::IntConstant(v) if *v == 10.into()));
+            assert!(matches!(rhs.value(), ExprValue::IntConstant(v) if *v == 1.into()));
+        } else {
+            panic!("Expected BinOp");
+        }
+    }
+
+    #[test]
+    fn substitute_symbol_nested() {
+        let old: InternedString = "x".into();
+        let replacement = int(5);
+        // (x + x) * 2 → (5 + 5) * 2
+        let expr = sym("x").plus(sym("x")).mul(int(2));
+        let result = expr.substitute_symbol(&old, &replacement);
+        if let ExprValue::BinOp { lhs, .. } = result.value() {
+            if let ExprValue::BinOp { lhs: ll, rhs: lr, .. } = lhs.value() {
+                assert!(matches!(ll.value(), ExprValue::IntConstant(v) if *v == 5.into()));
+                assert!(matches!(lr.value(), ExprValue::IntConstant(v) if *v == 5.into()));
+            } else {
+                panic!("Expected inner BinOp");
+            }
+        } else {
+            panic!("Expected outer BinOp");
+        }
+    }
+
+    #[test]
+    fn substitute_symbol_in_typecast() {
+        let old: InternedString = "x".into();
+        let replacement = int(7);
+        let expr = sym("x").cast_to(Type::signed_int(64));
+        let result = expr.substitute_symbol(&old, &replacement);
+        if let ExprValue::Typecast(inner) = result.value() {
+            assert!(matches!(inner.value(), ExprValue::IntConstant(v) if *v == 7.into()));
+        } else {
+            panic!("Expected Typecast");
+        }
+    }
+
+    #[test]
+    fn substitute_preserves_unrelated_symbols() {
+        let old: InternedString = "x".into();
+        let replacement = int(1);
+        // y + x → y + 1
+        let expr = sym("y").plus(sym("x"));
+        let result = expr.substitute_symbol(&old, &replacement);
+        if let ExprValue::BinOp { lhs, rhs, .. } = result.value() {
+            assert!(
+                matches!(lhs.value(), ExprValue::Symbol { identifier } if identifier.to_string() == "y")
+            );
+            assert!(matches!(rhs.value(), ExprValue::IntConstant(v) if *v == 1.into()));
+        } else {
+            panic!("Expected BinOp");
+        }
+    }
+}
diff --git a/docs/dev/pure-expression-inliner.md b/docs/dev/pure-expression-inliner.md
@@ -0,0 +1,73 @@
+# Pure Expression Inliner
+
+## Overview
+
+The pure expression inliner (`inline_as_pure_expr`) is a function call inlining
+mechanism that produces side-effect-free expression trees. Unlike the original
+`inline_function_calls_in_expr` which wraps inlined bodies in CBMC
+`StatementExpression` nodes, this produces expressions using only pure
+constructs: `BinOp`, `UnOp`, `If` (ternary), `Typecast`, etc.
+
+## Motivation
+
+CBMC's quantifier expressions (`forall`, `exists`) reject side effects in their
+bodies. The original inliner produced `StatementExpression` nodes which CBMC
+treats as side effects, causing invariant violations. The pure inliner eliminates
+this by producing expression trees that CBMC can process directly.
+
+## How It Works
+
+For a function call `f(arg1, arg2)` where `f` is defined as:
+```c
+ret_type f(param1, param2) {
+    local1 = expr1(param1);
+    local2 = expr2(local1, param2);
+    return local2;
+}
+```
+
+The pure inliner:
+1. Collects all assignments: `{local1 → expr1(param1), local2 → expr2(local1, param2)}`
+2. Finds the return symbol: `local2`
+3. Resolves intermediates: `local2` → `expr2(local1, param2)` → `expr2(expr1(param1), param2)`
+4. Substitutes parameters: `expr2(expr1(arg1), arg2)`
+5. Flattens `StatementExpression` nodes (e.g., checked arithmetic → just the operation)
+6. Recursively inlines any remaining function calls
+
+## Soundness Implications
+
+**Checked arithmetic in quantifier bodies**: When flattening `StatementExpression`
+nodes (e.g., from checked division or remainder), the pure inliner drops the
+`Assert` and `Assume` statements that check for overflow and division by zero.
+This means:
+
+- **Division by zero** inside a quantifier body will NOT be detected. For example,
+  `forall!(|i in (0, 10)| arr[i] / x == 0)` where `x` could be zero will not
+  produce a division-by-zero check.
+- **Arithmetic overflow** inside a quantifier body will NOT be detected.
+
+This is a known trade-off: CBMC requires pure expressions in quantifier bodies,
+and runtime checks are inherently side effects. Users should ensure that
+arithmetic operations in quantifier predicates cannot overflow or divide by zero.
+
+**Future improvement**: The dropped assertions could be hoisted outside the
+quantifier as preconditions, preserving soundness while keeping the quantifier
+body pure.
+
+## Limitations
+
+- **No control flow**: Functions with `if`/`else` or `match` that produce
+  multiple assignments to the return variable are not fully supported. The
+  inliner takes the last assignment, which may not be correct for all paths.
+- **No loops**: Functions containing loops cannot be inlined as pure expressions.
+- **No recursion**: Recursive functions are detected and cause a panic.
+- **Checked arithmetic**: Overflow/division-by-zero checks (`Assert` + `Assume`
+  statements) are dropped when flattening `StatementExpression` nodes. This
+  means the pure expression doesn't include these runtime checks.
+
+## Files
+
+- `cprover_bindings/src/goto_program/expr.rs` — `Expr::substitute_symbol()`
+- `kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs` — `inline_as_pure_expr()`,
+  `inline_call_as_pure_expr()`, `collect_assignments_from_stmt()`,
+  `find_return_symbol_in_stmt()`, `resolve_intermediates_iterative()`
diff --git a/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs b/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs