Skip to content

Commit e861fa8

Browse files
haasonsaasclaude
andauthored
feat: Add Lean 4 formalization with hybrid Lean/Coq resolution (#3)
* feat: Add Lean 4 formalization with hybrid Lean/Coq resolution Adds three modules following the existing Coq prover patterns: - LeanTranslator: translates claims to Lean 4 theorems via IR or regex, using omega/native_decide tactics - LeanProver: invokes the lean binary with sorry/admit detection, caching, timeout handling, and proper ProofStatus assignment - HybridLeanCoqResolver: tries one backend first with configurable fallback, exposing prove_specification() for drop-in use 16 new tests covering translation, prover status paths, assumption detection, timeout, and hybrid fallback behavior. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: Address review feedback on Lean prover status and witness inference - LeanProver: use ProofStatus.default_for_solver_result() for failed proofs instead of hardcoding REFUTED — a tactic insufficiency is INCONCLUSIVE, not a definitive disproof - LeanTranslator._exists: infer witness from property bounds (x > 5 → 6, x >= 10 → 10, x = 3 → 3) instead of hardcoding 1 - Add test for witness inference Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: Quantify free variables in implication theorems _implication now extracts single-letter free variables from the hypothesis/conclusion and wraps them in ∀ binders, matching the Coq translator's behavior. Without this, implications like "if x > 5 then x > 3" generated invalid Lean 4 with undeclared variables. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: Use native_decide for multiplication, fix translation failure status - _multiplication: use native_decide instead of omega for nonlinear arithmetic, matching factorial/fibonacci/gcd - HybridLeanCoqResolver: translation failures now return INCONCLUSIVE instead of UNAVAILABLE (which means binary-not-found) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: Preserve informative result when both provers fail When Lean fails and Coq fallback also fails (e.g. UNAVAILABLE), return the Lean result instead of discarding it. Symmetric logic applied to both _lean_then_coq and _coq_then_lean — the initial prover's result is preserved when the fallback doesn't prove either. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 56ae8a3 commit e861fa8

5 files changed

Lines changed: 894 additions & 0 deletions

File tree

formal_verification/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
"""
77

88
from .detector import FormalVerificationConflictDetector
9+
from .hybrid_lean_coq_resolver import HybridLeanCoqResolver
10+
from .lean_prover import LeanProver
11+
from .lean_translator import LeanTranslator
912
from .proof_protocol import (
1013
PreservationAuditor,
1114
build_claim_ir,
@@ -25,6 +28,9 @@
2528
"CoqProver",
2629
"FormalSpec",
2730
"FormalVerificationConflictDetector",
31+
"HybridLeanCoqResolver",
32+
"LeanProver",
33+
"LeanTranslator",
2834
"PreservationAudit",
2935
"PreservationAuditor",
3036
"PreservationLabel",
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""Hybrid Lean + Coq conflict resolver.
2+
3+
Tries one prover first, falls back to the other on failure. Exposes
4+
the standard ``prove_specification`` interface so it plugs into the
5+
existing FormalVerificationConflictDetector without changes.
6+
"""
7+
8+
import logging
9+
10+
from .lean_prover import LeanProver
11+
from .lean_translator import LeanTranslator
12+
from .prover import CoqProver
13+
from .types import FormalSpec, ProofResult
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
class HybridLeanCoqResolver:
19+
"""Lean-first prover with Coq fallback (or vice versa)."""
20+
21+
def __init__(
22+
self,
23+
prefer_lean: bool = True,
24+
use_fallback: bool = True,
25+
timeout_seconds: int = 30,
26+
):
27+
"""Initialize hybrid resolver.
28+
29+
Args:
30+
prefer_lean: If True, try Lean first; otherwise try Coq first.
31+
use_fallback: If True, try the other prover when the first fails.
32+
timeout_seconds: Per-prover timeout.
33+
"""
34+
self.prefer_lean = prefer_lean
35+
self.use_fallback = use_fallback
36+
self.lean_translator = LeanTranslator()
37+
self.lean_prover = LeanProver(timeout_seconds=timeout_seconds)
38+
self.coq_prover = CoqProver(timeout_seconds=timeout_seconds)
39+
40+
def prove_specification(self, spec: FormalSpec) -> ProofResult:
41+
"""Prove a specification, trying both backends if needed.
42+
43+
Args:
44+
spec: Formal specification (may contain Coq or Lean code).
45+
46+
Returns:
47+
Best ProofResult obtained across backends.
48+
"""
49+
if self.prefer_lean:
50+
return self._lean_then_coq(spec)
51+
return self._coq_then_lean(spec)
52+
53+
def _lean_then_coq(self, spec: FormalSpec) -> ProofResult:
54+
"""Try Lean first, fall back to Coq."""
55+
lean_spec = self._ensure_lean_spec(spec)
56+
if lean_spec is not None:
57+
lean_result = self.lean_prover.prove_specification(lean_spec)
58+
if lean_result.proven:
59+
return lean_result
60+
else:
61+
lean_result = None
62+
63+
if not self.use_fallback:
64+
if lean_result is not None:
65+
return lean_result
66+
return self._untranslatable(spec, "lean")
67+
68+
coq_result = self.coq_prover.prove_specification(spec)
69+
if coq_result.proven:
70+
return coq_result
71+
# Prefer the more informative result from whichever prover ran.
72+
return lean_result if lean_result is not None else coq_result
73+
74+
def _coq_then_lean(self, spec: FormalSpec) -> ProofResult:
75+
"""Try Coq first, fall back to Lean."""
76+
coq_result = self.coq_prover.prove_specification(spec)
77+
if coq_result.proven:
78+
return coq_result
79+
80+
if not self.use_fallback:
81+
return coq_result
82+
83+
lean_spec = self._ensure_lean_spec(spec)
84+
if lean_spec is None:
85+
return coq_result
86+
87+
lean_result = self.lean_prover.prove_specification(lean_spec)
88+
if lean_result.proven:
89+
return lean_result
90+
# Both failed — prefer the initial result.
91+
return coq_result
92+
93+
def _ensure_lean_spec(self, spec: FormalSpec) -> FormalSpec | None:
94+
"""Translate the spec's claim to Lean if needed."""
95+
return self.lean_translator.translate(spec.claim)
96+
97+
@staticmethod
98+
def _untranslatable(spec: FormalSpec, prover: str) -> ProofResult:
99+
"""Return an INCONCLUSIVE result when translation failed."""
100+
return ProofResult(
101+
spec=spec,
102+
proven=False,
103+
proof_time_ms=0,
104+
error_message=f"Could not translate claim for {prover} prover",
105+
counter_example=None,
106+
proof_output="",
107+
prover_name=prover,
108+
solver_status="inconclusive",
109+
)

formal_verification/lean_prover.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
"""Lean 4 theorem prover interface.
2+
3+
Proves Lean 4 theorems by invoking the ``lean`` binary, following the
4+
same FormalSpec -> ProofResult contract as CoqProver.
5+
"""
6+
7+
import logging
8+
import re
9+
import subprocess
10+
import tempfile
11+
import time
12+
from pathlib import Path
13+
14+
from .proof_cache import ProofCache
15+
from .types import FormalSpec, ProofResult, ProofStatus
16+
17+
logger = logging.getLogger(__name__)
18+
19+
# Patterns that indicate an unverified assumption in Lean 4 code.
20+
_ASSUMPTION_PATTERNS: dict[str, str] = {
21+
"sorry": r"\bsorry\b",
22+
"admit": r"\badmit\b",
23+
"axiom": r"\baxiom\b",
24+
}
25+
26+
27+
class LeanProver:
28+
"""Interface to the Lean 4 type-checker for formal verification."""
29+
30+
def __init__(self, timeout_seconds: int = 30, use_cache: bool = True):
31+
"""Initialize Lean prover.
32+
33+
Args:
34+
timeout_seconds: Maximum seconds for a proof attempt.
35+
use_cache: Whether to cache proof results.
36+
"""
37+
self.timeout_seconds = timeout_seconds
38+
self.use_cache = use_cache
39+
self.cache = ProofCache() if use_cache else None
40+
self.lean_available = self._check_binary("lean")
41+
42+
if not self.lean_available:
43+
logger.warning("Lean 4 theorem prover not available")
44+
45+
@staticmethod
46+
def _check_binary(binary: str) -> bool:
47+
"""Check if a binary is installed and callable."""
48+
try:
49+
result = subprocess.run(
50+
[binary, "--version"], capture_output=True, timeout=5
51+
)
52+
return result.returncode == 0
53+
except (subprocess.TimeoutExpired, FileNotFoundError):
54+
return False
55+
56+
@staticmethod
57+
def _detect_unverified_assumptions(lean_code: str) -> list[str]:
58+
"""Detect assumptions that prevent a sound proof."""
59+
markers: list[str] = []
60+
for label, pattern in _ASSUMPTION_PATTERNS.items():
61+
if re.search(pattern, lean_code):
62+
markers.append(label)
63+
return markers
64+
65+
def prove_specification(self, spec: FormalSpec) -> ProofResult:
66+
"""Attempt to prove a formal specification using Lean 4.
67+
68+
Args:
69+
spec: The formal specification to prove (Lean code in coq_code).
70+
71+
Returns:
72+
ProofResult with proof status and timing information.
73+
"""
74+
lean_code = spec.coq_code
75+
76+
if self.use_cache and self.cache:
77+
cached = self.cache.get(spec)
78+
if cached:
79+
return cached
80+
81+
start_time = time.time()
82+
83+
# Reject code that contains sorry / admit / axiom.
84+
assumptions = self._detect_unverified_assumptions(lean_code)
85+
if assumptions:
86+
return ProofResult(
87+
spec=spec,
88+
proven=False,
89+
proof_time_ms=(time.time() - start_time) * 1000,
90+
error_message=(
91+
"Specification contains unverified assumptions: "
92+
+ ", ".join(assumptions)
93+
),
94+
counter_example=None,
95+
proof_output="",
96+
prover_name="lean",
97+
solver_status=ProofStatus.FORMALIZED_UNPROVED.value,
98+
assumptions_present=True,
99+
)
100+
101+
if not self.lean_available:
102+
return ProofResult(
103+
spec=spec,
104+
proven=False,
105+
proof_time_ms=0,
106+
error_message="Lean 4 theorem prover not available",
107+
counter_example=None,
108+
proof_output="",
109+
prover_name="lean",
110+
solver_status=ProofStatus.UNAVAILABLE.value,
111+
)
112+
113+
try:
114+
result = self._run_lean(lean_code, start_time, spec)
115+
except subprocess.TimeoutExpired:
116+
logger.warning("Lean proof timeout for: %s", spec.spec_text)
117+
result = ProofResult(
118+
spec=spec,
119+
proven=False,
120+
proof_time_ms=self.timeout_seconds * 1000,
121+
error_message="Proof attempt timed out",
122+
counter_example=None,
123+
proof_output="",
124+
prover_name="lean",
125+
solver_status=ProofStatus.TIMEOUT.value,
126+
)
127+
128+
if self.use_cache and self.cache:
129+
self.cache.put(spec, result)
130+
131+
return result
132+
133+
def _run_lean(
134+
self, lean_code: str, start_time: float, spec: FormalSpec
135+
) -> ProofResult:
136+
"""Invoke ``lean`` on a temporary file and interpret the result."""
137+
with tempfile.TemporaryDirectory() as tmp:
138+
source = Path(tmp) / "proof.lean"
139+
source.write_text(lean_code, encoding="utf-8")
140+
141+
proc = subprocess.run(
142+
["lean", source.name],
143+
capture_output=True,
144+
timeout=self.timeout_seconds,
145+
cwd=tmp,
146+
)
147+
148+
elapsed_ms = (time.time() - start_time) * 1000
149+
stdout = proc.stdout.decode("utf-8") if proc.stdout else ""
150+
stderr = proc.stderr.decode("utf-8") if proc.stderr else ""
151+
152+
if proc.returncode == 0:
153+
logger.debug("Lean proof succeeded for: %s", spec.spec_text)
154+
return ProofResult(
155+
spec=spec,
156+
proven=True,
157+
proof_time_ms=elapsed_ms,
158+
error_message=None,
159+
counter_example=None,
160+
proof_output=stdout,
161+
prover_name="lean",
162+
solver_status=ProofStatus.MACHINE_CHECKED.value,
163+
checker_name="lean",
164+
)
165+
166+
logger.debug(
167+
"Lean proof failed for: %s, error: %s",
168+
spec.spec_text,
169+
stderr[:100],
170+
)
171+
status = ProofStatus.default_for_solver_result(
172+
proven=False, prover_name="lean", counter_example=None,
173+
)
174+
return ProofResult(
175+
spec=spec,
176+
proven=False,
177+
proof_time_ms=elapsed_ms,
178+
error_message=stderr or "Lean type-check failed",
179+
counter_example=None,
180+
proof_output=stdout,
181+
prover_name="lean",
182+
solver_status=status.value,
183+
)

0 commit comments

Comments
 (0)