Skip to content

Commit 9dfdfda

Browse files
haasonsaasclaude
andcommitted
fix: Address review feedback on Lean prover status and witness inference
- LeanProver: use ProofStatus.default_for_solver_result() for failed proofs instead of hardcoding REFUTED — a tactic insufficiency is INCONCLUSIVE, not a definitive disproof - LeanTranslator._exists: infer witness from property bounds (x > 5 → 6, x >= 10 → 10, x = 3 → 3) instead of hardcoding 1 - Add test for witness inference Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4f52637 commit 9dfdfda

3 files changed

Lines changed: 58 additions & 6 deletions

File tree

formal_verification/lean_prover.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ def _run_lean(
168168
spec.spec_text,
169169
stderr[:100],
170170
)
171+
status = ProofStatus.default_for_solver_result(
172+
proven=False, prover_name="lean", counter_example=None,
173+
)
171174
return ProofResult(
172175
spec=spec,
173176
proven=False,
@@ -176,5 +179,5 @@ def _run_lean(
176179
counter_example=None,
177180
proof_output=stdout,
178181
prover_name="lean",
179-
solver_status=ProofStatus.REFUTED.value,
182+
solver_status=status.value,
180183
)

formal_verification/lean_translator.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,10 @@ def _exists(
244244
self, claim: Claim, variable: str, prop: str
245245
) -> FormalSpec:
246246
prop_lean = self._to_lean_expr(prop)
247+
witness = self._infer_witness(variable, prop_lean)
247248
lean = (
248249
f"theorem exists_claim : ∃ {variable} : Nat, "
249-
f"{prop_lean} := ⟨1, by omega⟩"
250+
f"{prop_lean} := ⟨{witness}, by omega⟩"
250251
)
251252
return FormalSpec(
252253
claim=claim,
@@ -256,6 +257,35 @@ def _exists(
256257
claim_ir=claim.claim_ir,
257258
)
258259

260+
@staticmethod
261+
def _infer_witness(variable: str, prop: str) -> str:
262+
"""Extract a plausible witness from a Lean property expression.
263+
264+
Handles patterns like ``x > 5`` (witness 6), ``x = 3`` (witness 3),
265+
``x >= 10`` (witness 10). Falls back to ``0`` when no bound is found.
266+
"""
267+
# "x > N" → N + 1
268+
m = re.search(rf"\b{re.escape(variable)}\s*>\s*(\d+)", prop)
269+
if m:
270+
return str(int(m[1]) + 1)
271+
# "x >= N" or "x ≥ N" → N
272+
m = re.search(rf"\b{re.escape(variable)}\s*(?:>=|≥)\s*(\d+)", prop)
273+
if m:
274+
return m[1]
275+
# "x = N" → N
276+
m = re.search(rf"\b{re.escape(variable)}\s*=\s*(\d+)", prop)
277+
if m:
278+
return m[1]
279+
# "N < x" → N + 1
280+
m = re.search(rf"(\d+)\s*<\s*{re.escape(variable)}\b", prop)
281+
if m:
282+
return str(int(m[1]) + 1)
283+
# "N <= x" or "N ≤ x" → N
284+
m = re.search(rf"(\d+)\s*(?:<=|≤)\s*{re.escape(variable)}\b", prop)
285+
if m:
286+
return m[1]
287+
return "0"
288+
259289
@staticmethod
260290
def _to_lean_expr(text: str) -> str:
261291
"""Convert natural language fragments to Lean syntax."""

tests/test_formal_verification.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,25 @@ def test_translate_gcd(self):
702702
assert spec is not None
703703
assert "Nat.gcd 12 8 = 4" in spec.coq_code
704704

705+
def test_exists_infers_witness_from_bound(self):
706+
"""Existential witness is derived from the property, not hardcoded."""
707+
translator = LeanTranslator()
708+
ir = build_claim_ir("exists x, x > 5")
709+
if ir is None:
710+
# If IR parsing doesn't handle this, test regex path
711+
claim = Claim(
712+
"a", "exists x such that x > 5",
713+
PropertyType.CORRECTNESS, 1.0, 0,
714+
)
715+
else:
716+
claim = Claim(
717+
"a", "exists x, x > 5",
718+
PropertyType.CORRECTNESS, 1.0, 0, claim_ir=ir,
719+
)
720+
spec = translator.translate(claim)
721+
if spec is not None:
722+
assert "⟨6," in spec.coq_code
723+
705724
def test_untranslatable_returns_none(self):
706725
"""Return None for claims that cannot be translated."""
707726
translator = LeanTranslator()
@@ -775,7 +794,7 @@ def test_successful_proof(self, mock_run):
775794

776795
@patch("subprocess.run")
777796
def test_failed_proof(self, mock_run):
778-
"""Failed lean invocation returns refuted."""
797+
"""Failed lean invocation returns inconclusive."""
779798
mock_run.return_value = Mock(
780799
returncode=1, stdout=b"", stderr=b"type mismatch"
781800
)
@@ -789,7 +808,7 @@ def test_failed_proof(self, mock_run):
789808
result = prover.prove_specification(spec)
790809

791810
assert result.proven is False
792-
assert result.solver_status == "refuted"
811+
assert result.solver_status == "inconclusive"
793812
assert "type mismatch" in result.error_message
794813

795814
@patch("subprocess.run")
@@ -836,7 +855,7 @@ def test_lean_failure_falls_back_to_coq(self):
836855
resolver = HybridLeanCoqResolver(prefer_lean=True, use_fallback=True)
837856
lean_fail = ProofResult(
838857
None, False, 10.0, "failed", None, prover_name="lean",
839-
solver_status="refuted",
858+
solver_status="inconclusive",
840859
)
841860
coq_ok = ProofResult(
842861
None, True, 20.0, None, None, prover_name="coq",
@@ -859,7 +878,7 @@ def test_no_fallback_returns_first_failure(self):
859878
resolver = HybridLeanCoqResolver(prefer_lean=True, use_fallback=False)
860879
lean_fail = ProofResult(
861880
None, False, 5.0, "failed", None, prover_name="lean",
862-
solver_status="refuted",
881+
solver_status="inconclusive",
863882
)
864883

865884
with (

0 commit comments

Comments
 (0)