-
Notifications
You must be signed in to change notification settings - Fork 23
feat: Add Lean 4 formalization with hybrid Lean/Coq resolution #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4f52637
9dfdfda
5bfa289
d155229
9b8792a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| """Hybrid Lean + Coq conflict resolver. | ||
|
|
||
| Tries one prover first, falls back to the other on failure. Exposes | ||
| the standard ``prove_specification`` interface so it plugs into the | ||
| existing FormalVerificationConflictDetector without changes. | ||
| """ | ||
|
|
||
| import logging | ||
|
|
||
| from .lean_prover import LeanProver | ||
| from .lean_translator import LeanTranslator | ||
| from .prover import CoqProver | ||
| from .types import FormalSpec, ProofResult | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class HybridLeanCoqResolver: | ||
| """Lean-first prover with Coq fallback (or vice versa).""" | ||
|
|
||
| def __init__( | ||
| self, | ||
| prefer_lean: bool = True, | ||
| use_fallback: bool = True, | ||
| timeout_seconds: int = 30, | ||
| ): | ||
| """Initialize hybrid resolver. | ||
|
|
||
| Args: | ||
| prefer_lean: If True, try Lean first; otherwise try Coq first. | ||
| use_fallback: If True, try the other prover when the first fails. | ||
| timeout_seconds: Per-prover timeout. | ||
| """ | ||
| self.prefer_lean = prefer_lean | ||
| self.use_fallback = use_fallback | ||
| self.lean_translator = LeanTranslator() | ||
| self.lean_prover = LeanProver(timeout_seconds=timeout_seconds) | ||
| self.coq_prover = CoqProver(timeout_seconds=timeout_seconds) | ||
|
|
||
| def prove_specification(self, spec: FormalSpec) -> ProofResult: | ||
| """Prove a specification, trying both backends if needed. | ||
|
|
||
| Args: | ||
| spec: Formal specification (may contain Coq or Lean code). | ||
|
|
||
| Returns: | ||
| Best ProofResult obtained across backends. | ||
| """ | ||
| if self.prefer_lean: | ||
| return self._lean_then_coq(spec) | ||
| return self._coq_then_lean(spec) | ||
|
|
||
| def _lean_then_coq(self, spec: FormalSpec) -> ProofResult: | ||
| """Try Lean first, fall back to Coq.""" | ||
| lean_spec = self._ensure_lean_spec(spec) | ||
| if lean_spec is not None: | ||
| lean_result = self.lean_prover.prove_specification(lean_spec) | ||
| if lean_result.proven: | ||
| return lean_result | ||
| else: | ||
| lean_result = None | ||
|
|
||
| if not self.use_fallback: | ||
| if lean_result is not None: | ||
| return lean_result | ||
| return self._untranslatable(spec, "lean") | ||
|
|
||
| coq_result = self.coq_prover.prove_specification(spec) | ||
| if coq_result.proven: | ||
| return coq_result | ||
| # Prefer the more informative result from whichever prover ran. | ||
| return lean_result if lean_result is not None else coq_result | ||
|
|
||
| def _coq_then_lean(self, spec: FormalSpec) -> ProofResult: | ||
| """Try Coq first, fall back to Lean.""" | ||
| coq_result = self.coq_prover.prove_specification(spec) | ||
| if coq_result.proven: | ||
| return coq_result | ||
|
|
||
| if not self.use_fallback: | ||
| return coq_result | ||
|
|
||
| lean_spec = self._ensure_lean_spec(spec) | ||
| if lean_spec is None: | ||
| return coq_result | ||
|
|
||
| lean_result = self.lean_prover.prove_specification(lean_spec) | ||
| if lean_result.proven: | ||
| return lean_result | ||
| # Both failed — prefer the initial result. | ||
| return coq_result | ||
|
|
||
| def _ensure_lean_spec(self, spec: FormalSpec) -> FormalSpec | None: | ||
| """Translate the spec's claim to Lean if needed.""" | ||
| return self.lean_translator.translate(spec.claim) | ||
|
|
||
| @staticmethod | ||
| def _untranslatable(spec: FormalSpec, prover: str) -> ProofResult: | ||
| """Return an INCONCLUSIVE result when translation failed.""" | ||
| return ProofResult( | ||
| spec=spec, | ||
| proven=False, | ||
| proof_time_ms=0, | ||
| error_message=f"Could not translate claim for {prover} prover", | ||
| counter_example=None, | ||
| proof_output="", | ||
| prover_name=prover, | ||
| solver_status="inconclusive", | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,183 @@ | ||
| """Lean 4 theorem prover interface. | ||
|
|
||
| Proves Lean 4 theorems by invoking the ``lean`` binary, following the | ||
| same FormalSpec -> ProofResult contract as CoqProver. | ||
| """ | ||
|
|
||
| import logging | ||
| import re | ||
| import subprocess | ||
| import tempfile | ||
| import time | ||
| from pathlib import Path | ||
|
|
||
| from .proof_cache import ProofCache | ||
| from .types import FormalSpec, ProofResult, ProofStatus | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| # Patterns that indicate an unverified assumption in Lean 4 code. | ||
| _ASSUMPTION_PATTERNS: dict[str, str] = { | ||
| "sorry": r"\bsorry\b", | ||
| "admit": r"\badmit\b", | ||
| "axiom": r"\baxiom\b", | ||
| } | ||
|
|
||
|
|
||
| class LeanProver: | ||
| """Interface to the Lean 4 type-checker for formal verification.""" | ||
|
|
||
| def __init__(self, timeout_seconds: int = 30, use_cache: bool = True): | ||
| """Initialize Lean prover. | ||
|
|
||
| Args: | ||
| timeout_seconds: Maximum seconds for a proof attempt. | ||
| use_cache: Whether to cache proof results. | ||
| """ | ||
| self.timeout_seconds = timeout_seconds | ||
| self.use_cache = use_cache | ||
| self.cache = ProofCache() if use_cache else None | ||
| self.lean_available = self._check_binary("lean") | ||
|
|
||
| if not self.lean_available: | ||
| logger.warning("Lean 4 theorem prover not available") | ||
|
|
||
| @staticmethod | ||
| def _check_binary(binary: str) -> bool: | ||
| """Check if a binary is installed and callable.""" | ||
| try: | ||
| result = subprocess.run( | ||
| [binary, "--version"], capture_output=True, timeout=5 | ||
| ) | ||
| return result.returncode == 0 | ||
| except (subprocess.TimeoutExpired, FileNotFoundError): | ||
| return False | ||
|
|
||
| @staticmethod | ||
| def _detect_unverified_assumptions(lean_code: str) -> list[str]: | ||
| """Detect assumptions that prevent a sound proof.""" | ||
| markers: list[str] = [] | ||
| for label, pattern in _ASSUMPTION_PATTERNS.items(): | ||
| if re.search(pattern, lean_code): | ||
| markers.append(label) | ||
| return markers | ||
|
|
||
| def prove_specification(self, spec: FormalSpec) -> ProofResult: | ||
| """Attempt to prove a formal specification using Lean 4. | ||
|
|
||
| Args: | ||
| spec: The formal specification to prove (Lean code in coq_code). | ||
|
|
||
| Returns: | ||
| ProofResult with proof status and timing information. | ||
| """ | ||
| lean_code = spec.coq_code | ||
|
|
||
| if self.use_cache and self.cache: | ||
| cached = self.cache.get(spec) | ||
| if cached: | ||
| return cached | ||
|
|
||
| start_time = time.time() | ||
|
|
||
| # Reject code that contains sorry / admit / axiom. | ||
| assumptions = self._detect_unverified_assumptions(lean_code) | ||
| if assumptions: | ||
| return ProofResult( | ||
| spec=spec, | ||
| proven=False, | ||
| proof_time_ms=(time.time() - start_time) * 1000, | ||
| error_message=( | ||
| "Specification contains unverified assumptions: " | ||
| + ", ".join(assumptions) | ||
| ), | ||
| counter_example=None, | ||
| proof_output="", | ||
| prover_name="lean", | ||
| solver_status=ProofStatus.FORMALIZED_UNPROVED.value, | ||
| assumptions_present=True, | ||
| ) | ||
|
|
||
| if not self.lean_available: | ||
| return ProofResult( | ||
| spec=spec, | ||
| proven=False, | ||
| proof_time_ms=0, | ||
| error_message="Lean 4 theorem prover not available", | ||
| counter_example=None, | ||
| proof_output="", | ||
| prover_name="lean", | ||
| solver_status=ProofStatus.UNAVAILABLE.value, | ||
| ) | ||
|
|
||
| try: | ||
| result = self._run_lean(lean_code, start_time, spec) | ||
| except subprocess.TimeoutExpired: | ||
| logger.warning("Lean proof timeout for: %s", spec.spec_text) | ||
| result = ProofResult( | ||
| spec=spec, | ||
| proven=False, | ||
| proof_time_ms=self.timeout_seconds * 1000, | ||
| error_message="Proof attempt timed out", | ||
| counter_example=None, | ||
| proof_output="", | ||
| prover_name="lean", | ||
| solver_status=ProofStatus.TIMEOUT.value, | ||
| ) | ||
|
|
||
| if self.use_cache and self.cache: | ||
| self.cache.put(spec, result) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Timeout results incorrectly cached unlike CoqProverMedium Severity The Reviewed by Cursor Bugbot for commit 9b8792a. Configure here. |
||
|
|
||
| return result | ||
|
Comment on lines
+121
to
+131
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: An unhandled Suggested FixExpand the Prompt for AI Agent |
||
|
|
||
| def _run_lean( | ||
| self, lean_code: str, start_time: float, spec: FormalSpec | ||
| ) -> ProofResult: | ||
| """Invoke ``lean`` on a temporary file and interpret the result.""" | ||
| with tempfile.TemporaryDirectory() as tmp: | ||
| source = Path(tmp) / "proof.lean" | ||
| source.write_text(lean_code, encoding="utf-8") | ||
|
|
||
| proc = subprocess.run( | ||
| ["lean", source.name], | ||
| capture_output=True, | ||
| timeout=self.timeout_seconds, | ||
| cwd=tmp, | ||
| ) | ||
|
|
||
| elapsed_ms = (time.time() - start_time) * 1000 | ||
| stdout = proc.stdout.decode("utf-8") if proc.stdout else "" | ||
| stderr = proc.stderr.decode("utf-8") if proc.stderr else "" | ||
|
|
||
| if proc.returncode == 0: | ||
| logger.debug("Lean proof succeeded for: %s", spec.spec_text) | ||
| return ProofResult( | ||
| spec=spec, | ||
| proven=True, | ||
| proof_time_ms=elapsed_ms, | ||
| error_message=None, | ||
| counter_example=None, | ||
| proof_output=stdout, | ||
| prover_name="lean", | ||
| solver_status=ProofStatus.MACHINE_CHECKED.value, | ||
| checker_name="lean", | ||
| ) | ||
|
|
||
| logger.debug( | ||
| "Lean proof failed for: %s, error: %s", | ||
| spec.spec_text, | ||
| stderr[:100], | ||
| ) | ||
| status = ProofStatus.default_for_solver_result( | ||
| proven=False, prover_name="lean", counter_example=None, | ||
| ) | ||
| return ProofResult( | ||
| spec=spec, | ||
| proven=False, | ||
| proof_time_ms=elapsed_ms, | ||
| error_message=stderr or "Lean type-check failed", | ||
| counter_example=None, | ||
| proof_output=stdout, | ||
| prover_name="lean", | ||
| solver_status=status.value, | ||
| ) | ||
|
cursor[bot] marked this conversation as resolved.
|
||


Uh oh!
There was an error while loading. Please reload this page.