Skip to content

Commit 56e81a0

Browse files
committed
fix: fixed equivalence evaluation by setting up bijection between the two formulas atoms
1 parent fda77f1 commit 56e81a0

3 files changed

Lines changed: 238 additions & 31 deletions

File tree

evaluation_function/domain/evaluators.py

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from itertools import product
1+
from itertools import product, permutations
22
from typing import Mapping, Set
33
from .formula import (
44
Formula,
@@ -71,6 +71,8 @@ def _extract_atoms(formula: Formula) -> Set[Atom]:
7171

7272

7373
class EquivalenceEvaluator:
74+
"""Checks if two formulas are equivalent up to renaming of atoms (so e.g. 's' and 'p' are equivalent)."""
75+
7476
def __init__(self, formula1: Formula, formula2: Formula):
7577
self._formula1 = formula1
7678
self._formula2 = formula2
@@ -80,27 +82,41 @@ def evaluate(self) -> bool:
8082
return ok
8183

8284
def evaluate_with_counterexample(self) -> tuple[bool, dict | None]:
83-
"""Returns (are_equivalent, counterexample_or_none). Counterexample has assignment, response_value, expected_value."""
84-
atoms1 = _extract_atoms(self._formula1)
85-
atoms2 = _extract_atoms(self._formula2)
86-
all_atoms = list(atoms1 | atoms2)
87-
88-
for assignment_values in product([False, True], repeat=len(all_atoms)):
89-
assignment_dict = {atom: val for atom, val in zip(all_atoms, assignment_values)}
90-
assignment = Assignment(assignment_dict)
91-
92-
evaluator1 = FormulaEvaluator(self._formula1, assignment)
93-
evaluator2 = FormulaEvaluator(self._formula2, assignment)
94-
v1, v2 = evaluator1.evaluate(), evaluator2.evaluate()
95-
96-
if v1 != v2:
97-
assignment_str = {atom.name: val for atom, val in assignment_dict.items()}
98-
return False, {
99-
"assignment": assignment_str,
100-
"response_value": v1,
101-
"expected_value": v2,
102-
}
103-
return True, None
85+
"""Returns (are_equivalent, counterexample_or_none). Equivalent = same truth behaviour under some renaming of atoms."""
86+
atoms1 = sorted(_extract_atoms(self._formula1), key=lambda a: a.name)
87+
atoms2 = sorted(_extract_atoms(self._formula2), key=lambda a: a.name)
88+
89+
if len(atoms1) != len(atoms2):
90+
return False, {
91+
"assignment": {},
92+
"response_value": None,
93+
"expected_value": None,
94+
"reason": f"different number of atoms: {len(atoms1)} vs {len(atoms2)}",
95+
}
96+
97+
n = len(atoms1)
98+
first_counterexample = None
99+
for perm in permutations(range(n)):
100+
# perm[j] = index in atoms2 that atoms1[j] is renamed to; so atoms1[j] gets value of atoms2[perm[j]]
101+
for assignment_values in product([False, True], repeat=n):
102+
assignment2_dict = {atoms2[i]: assignment_values[i] for i in range(n)}
103+
assignment1_dict = {atoms1[j]: assignment_values[perm[j]] for j in range(n)}
104+
a1 = Assignment(assignment1_dict)
105+
a2 = Assignment(assignment2_dict)
106+
v1 = FormulaEvaluator(self._formula1, a1).evaluate()
107+
v2 = FormulaEvaluator(self._formula2, a2).evaluate()
108+
if v1 != v2:
109+
if first_counterexample is None:
110+
first_counterexample = {
111+
"assignment": {atoms2[i].name: assignment_values[i] for i in range(n)},
112+
"response_value": v1,
113+
"expected_value": v2,
114+
}
115+
break
116+
else:
117+
return True, None
118+
119+
return False, first_counterexample
104120

105121

106122
class SatisfiabilityEvaluator:

evaluation_function/evaluation.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,14 @@ def evaluation_function(
126126
f"Comparing your formula \"{response_formula}\" with expected \"{equivalent}\". They are not equivalent."
127127
))
128128
if counterex:
129-
asn = ", ".join(f"{k}={counterex['assignment'][k]}" for k in sorted(counterex["assignment"]))
130-
feedback.append((
131-
"counterexample",
132-
f"Under assignment ({asn}): your formula = {counterex['response_value']}, expected formula = {counterex['expected_value']}."
133-
))
129+
if counterex.get("reason"):
130+
feedback.append(("counterexample", counterex["reason"]))
131+
elif counterex.get("assignment") is not None:
132+
asn = ", ".join(f"{k}={counterex['assignment'][k]}" for k in sorted(counterex["assignment"]))
133+
feedback.append((
134+
"counterexample",
135+
f"Under assignment ({asn}): your formula = {counterex['response_value']}, expected formula = {counterex['expected_value']}."
136+
))
134137
elif tautology:
135138
ev = TautologyEvaluator(formula)
136139
is_correct, counterex = ev.evaluate_with_counterexample()

evaluation_function/evaluation_test.py

Lines changed: 192 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def test_evaluation_default(self):
3232

3333
def test_check_tautology(self):
3434
response = {"formula": "p ∨ ¬p"}
35-
answer = {"satisability": False, "tautology": True, "equivalent": None, "truthTable": None}
35+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
3636
params = Params()
3737

3838
result = evaluation_function(response, answer, params).to_dict()
@@ -41,7 +41,7 @@ def test_check_tautology(self):
4141

4242
def test_check_tautology_fail(self):
4343
response = {"formula": "p ∧ ¬p"}
44-
answer = {"satisability": False, "tautology": True, "equivalent": None, "truthTable": None}
44+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
4545
params = Params()
4646

4747
result = evaluation_function(response, answer, params).to_dict()
@@ -84,6 +84,16 @@ def test_check_equivalence_fail(self):
8484

8585
self.assertFalse(result.get("is_correct"))
8686

87+
def test_check_equivalence_different_atoms(self):
88+
"""Formulas with same structure but different atom names (e.g. 's' vs 'p') are equivalent up to renaming."""
89+
response = {"formula": "s"}
90+
answer = {"satisfiability": False, "tautology": False, "equivalent": "p", "validTruthTable": False}
91+
params = Params()
92+
93+
result = evaluation_function(response, answer, params).to_dict()
94+
95+
self.assertTrue(result.get("is_correct"))
96+
8797
def test_truth_table_valid(self):
8898
response = {
8999
"formula": "p ∧ q",
@@ -126,7 +136,7 @@ def test_truth_table_invalid(self):
126136

127137
def test_invalid_response_type(self):
128138
response = "just a string" # Invalid type
129-
answer = {"satisability": False, "tautology": True, "equivalent": None, "truthTable": None}
139+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
130140
params = Params()
131141

132142
result = evaluation_function(response, answer, params).to_dict()
@@ -136,7 +146,7 @@ def test_invalid_response_type(self):
136146

137147
def test_missing_formula_field(self):
138148
response = {"wrongField": "p"}
139-
answer = {"satisability": False, "tautology": True, "equivalent": None, "truthTable": None}
149+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
140150
params = Params()
141151

142152
result = evaluation_function(response, answer, params).to_dict()
@@ -161,3 +171,181 @@ def test_multiple_params_selected(self):
161171

162172
self.assertFalse(result.get("is_correct"))
163173

174+
# --- Equivalence (extra) ---
175+
176+
def test_equivalence_same_structure_three_atoms(self):
177+
"""Same structure with different atom names: (a ∧ b) ∧ c vs (p ∧ q) ∧ r."""
178+
response = {"formula": "(a ∧ b) ∧ c"}
179+
answer = {"satisfiability": False, "tautology": False, "equivalent": "(p ∧ q) ∧ r", "validTruthTable": False}
180+
params = Params()
181+
result = evaluation_function(response, answer, params).to_dict()
182+
self.assertTrue(result.get("is_correct"))
183+
184+
def test_equivalence_different_number_of_atoms(self):
185+
"""Single atom vs binary: not equivalent (different structure)."""
186+
response = {"formula": "p"}
187+
answer = {"satisfiability": False, "tautology": False, "equivalent": "p ∧ q", "validTruthTable": False}
188+
params = Params()
189+
result = evaluation_function(response, answer, params).to_dict()
190+
self.assertFalse(result.get("is_correct"))
191+
self.assertIn("feedback", result)
192+
193+
def test_equivalence_negation_same_structure(self):
194+
"""¬s vs ¬p: equivalent up to renaming."""
195+
response = {"formula": "¬s"}
196+
answer = {"satisfiability": False, "tautology": False, "equivalent": "¬p", "validTruthTable": False}
197+
params = Params()
198+
result = evaluation_function(response, answer, params).to_dict()
199+
self.assertTrue(result.get("is_correct"))
200+
201+
def test_equivalence_implication_same_structure(self):
202+
"""a → b vs p → q: equivalent up to renaming."""
203+
response = {"formula": "a → b"}
204+
answer = {"satisfiability": False, "tautology": False, "equivalent": "p → q", "validTruthTable": False}
205+
params = Params()
206+
result = evaluation_function(response, answer, params).to_dict()
207+
self.assertTrue(result.get("is_correct"))
208+
209+
def test_equivalence_failure_includes_feedback(self):
210+
"""When equivalence fails, feedback should mention formulas and counterexample."""
211+
response = {"formula": "p ∨ q"}
212+
answer = {"satisfiability": False, "tautology": False, "equivalent": "p ∧ q", "validTruthTable": False}
213+
params = Params()
214+
result = evaluation_function(response, answer, params).to_dict()
215+
self.assertFalse(result.get("is_correct"))
216+
feedback = result.get("feedback_items", result.get("feedback", []))
217+
feedback_str = str(feedback).lower()
218+
self.assertTrue("equivalent" in feedback_str or "formula" in feedback_str or "counterexample" in feedback_str)
219+
220+
# --- Tautology (extra) ---
221+
222+
def test_tautology_implication_self(self):
223+
"""p → p is a tautology."""
224+
response = {"formula": "p → p"}
225+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
226+
params = Params()
227+
result = evaluation_function(response, answer, params).to_dict()
228+
self.assertTrue(result.get("is_correct"))
229+
230+
def test_tautology_fail_single_atom(self):
231+
"""Single atom p is not a tautology."""
232+
response = {"formula": "p"}
233+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
234+
params = Params()
235+
result = evaluation_function(response, answer, params).to_dict()
236+
self.assertFalse(result.get("is_correct"))
237+
238+
def test_tautology_failure_includes_feedback(self):
239+
"""Tautology failure should include feedback."""
240+
response = {"formula": "p"}
241+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
242+
params = Params()
243+
result = evaluation_function(response, answer, params).to_dict()
244+
self.assertFalse(result.get("is_correct"))
245+
self.assertIn("feedback", result)
246+
247+
# --- Satisfiability (extra) ---
248+
249+
def test_satisfiability_disjunction(self):
250+
"""p ∨ q is satisfiable."""
251+
response = {"formula": "p ∨ q"}
252+
answer = {"satisfiability": True, "tautology": False, "equivalent": None, "validTruthTable": False}
253+
params = Params()
254+
result = evaluation_function(response, answer, params).to_dict()
255+
self.assertTrue(result.get("is_correct"))
256+
257+
def test_satisfiability_failure_includes_feedback(self):
258+
"""Unsatisfiable formula should return False and include feedback."""
259+
response = {"formula": "p ∧ ¬p"}
260+
answer = {"satisfiability": True, "tautology": False, "equivalent": None, "validTruthTable": False}
261+
params = Params()
262+
result = evaluation_function(response, answer, params).to_dict()
263+
self.assertFalse(result.get("is_correct"))
264+
self.assertIn("feedback", result)
265+
266+
# --- Truth table (extra) ---
267+
268+
def test_truth_table_required_but_missing(self):
269+
"""When answer expects truth table but response has no truthTable, should fail."""
270+
response = {"formula": "p ∧ q"}
271+
answer = {"satisfiability": False, "tautology": False, "equivalent": None, "validTruthTable": True}
272+
params = Params()
273+
result = evaluation_function(response, answer, params).to_dict()
274+
self.assertFalse(result.get("is_correct"))
275+
276+
def test_truth_table_single_atom(self):
277+
"""Truth table for single atom p: 2 rows."""
278+
response = {
279+
"formula": "p",
280+
"truthTable": {
281+
"variables": ["p"],
282+
"cells": [["tt"], ["ff"]]
283+
}
284+
}
285+
answer = {"satisfiability": False, "tautology": False, "equivalent": None, "validTruthTable": True}
286+
params = Params()
287+
result = evaluation_function(response, answer, params).to_dict()
288+
self.assertTrue(result.get("is_correct"))
289+
290+
def test_truth_table_invalid_cell_token(self):
291+
"""Cell value that is not tt/ff should fail."""
292+
response = {
293+
"formula": "p",
294+
"truthTable": {
295+
"variables": ["p"],
296+
"cells": [["tt"], ["invalid"]]
297+
}
298+
}
299+
answer = {"satisfiability": False, "tautology": False, "equivalent": None, "validTruthTable": True}
300+
params = Params()
301+
result = evaluation_function(response, answer, params).to_dict()
302+
self.assertFalse(result.get("is_correct"))
303+
304+
def test_truth_table_missing_combinations(self):
305+
"""Only one row for two atoms should fail (missing combinations)."""
306+
response = {
307+
"formula": "p ∧ q",
308+
"truthTable": {
309+
"variables": ["p", "q", "p ∧ q"],
310+
"cells": [["tt", "tt", "tt"]]
311+
}
312+
}
313+
answer = {"satisfiability": False, "tautology": False, "equivalent": None, "validTruthTable": True}
314+
params = Params()
315+
result = evaluation_function(response, answer, params).to_dict()
316+
self.assertFalse(result.get("is_correct"))
317+
318+
# --- Input / validation (extra) ---
319+
320+
def test_answer_not_dict(self):
321+
"""Answer must be a dict; string answer gives incorrect."""
322+
response = {"formula": "p"}
323+
answer = 42
324+
params = Params()
325+
result = evaluation_function(response, answer, params).to_dict()
326+
self.assertFalse(result.get("is_correct"))
327+
328+
def test_response_as_json_string(self):
329+
"""Response can be a JSON string (parsed before use)."""
330+
response = '{"formula": "p ∨ ¬p"}'
331+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
332+
params = Params()
333+
result = evaluation_function(response, answer, params).to_dict()
334+
self.assertTrue(result.get("is_correct"))
335+
336+
def test_formula_not_string(self):
337+
"""Response formula must be a string."""
338+
response = {"formula": 123}
339+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
340+
params = Params()
341+
result = evaluation_function(response, answer, params).to_dict()
342+
self.assertFalse(result.get("is_correct"))
343+
344+
def test_truth_table_mode_with_truth_table_false(self):
345+
"""validTruthTable False means we are not in truth table mode."""
346+
response = {"formula": "p ∨ ¬p"}
347+
answer = {"satisfiability": False, "tautology": True, "equivalent": None, "validTruthTable": False}
348+
params = Params()
349+
result = evaluation_function(response, answer, params).to_dict()
350+
self.assertTrue(result.get("is_correct"))
351+

0 commit comments

Comments
 (0)