Skip to content

Commit a7a6f35

Browse files
committed
updated evaluation fucntion to pass
1 parent f6e60eb commit a7a6f35

3 files changed

Lines changed: 137 additions & 19 deletions

File tree

evaluation_function/evaluation.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Any
22
from lf_toolkit.evaluation import Result, Params
33

4-
from evaluation_function.domain.evaluators import *
4+
from evaluation_function.domain.evaluators import _extract_atoms, EquivalenceEvaluator, SatisfiabilityEvaluator, TautologyEvaluator
55
from evaluation_function.domain.formula import *
66

77
from evaluation_function.parsing.parser import formula_parser
@@ -40,7 +40,16 @@ def evaluation_function(
4040

4141

4242
if not isinstance(answer, str):
43-
raise Exception("Answer must be a string/text.")
43+
return Result(
44+
is_correct=False,
45+
feedback_items=[("incorrect input", "missing answer object")]
46+
)
47+
48+
if not isinstance(response, dict):
49+
return Result(
50+
is_correct=False,
51+
feedback_items=[("incorrect input", "missing response object")]
52+
)
4453

4554

4655
response_formula = response.get("formula", None)

evaluation_function/evaluation_test.py

Lines changed: 99 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,60 +22,142 @@ class TestEvaluationFunction(unittest.TestCase):
2222
"""
2323

2424
def test_evaluation_default(self):
25-
response, answer, params = "Hello, World", "Hello, World", Params()
25+
response = {"formula": "Hello, World"}
26+
answer = "Hello, World"
27+
params = Params()
2628

2729
result = evaluation_function(response, answer, params).to_dict()
2830

2931
self.assertEqual(result.get("is_correct"), False)
30-
self.assertFalse(len(result.get("feedback", [])) == 0)
3132

3233
def test_check_tautology(self):
33-
34-
response, answer, params = "p ∨ ¬p", "", {"tautology": True}
34+
response = {"formula": "p ∨ ¬p"}
35+
answer = ""
36+
params = {"tautology": True}
3537

3638
result = evaluation_function(response, answer, params).to_dict()
3739

3840
self.assertTrue(result.get("is_correct"))
3941

4042
def test_check_tautology_fail(self):
41-
42-
response, answer, params = "p ∧ ¬p", "", {"tautology": True}
43+
response = {"formula": "p ∧ ¬p"}
44+
answer = ""
45+
params = {"tautology": True}
4346

4447
result = evaluation_function(response, answer, params).to_dict()
4548

4649
self.assertFalse(result.get("is_correct"))
4750

48-
4951
def test_check_satisfiability(self):
50-
51-
response, answer, params = "p ∧ q", "", {"satisfiability": True}
52+
response = {"formula": "p ∧ q"}
53+
answer = ""
54+
params = {"satisfiability": True}
5255

5356
result = evaluation_function(response, answer, params).to_dict()
5457

5558
self.assertTrue(result.get("is_correct"))
5659

5760
def test_check_satisfiability_fail(self):
58-
59-
response, answer, params = "p ∧ ¬p", "", {"satisfiability": True}
61+
response = {"formula": "p ∧ ¬p"}
62+
answer = ""
63+
params = {"satisfiability": True}
6064

6165
result = evaluation_function(response, answer, params).to_dict()
6266

6367
self.assertFalse(result.get("is_correct"))
6468

65-
6669
def test_check_equivalence(self):
67-
68-
response, answer, params = "p ∧ q", "p ∧ (q ∨ q)", {"equivalence": True}
70+
response = {"formula": "p ∧ q"}
71+
answer = "p ∧ (q ∨ q)"
72+
params = {"equivalence": True}
6973

7074
result = evaluation_function(response, answer, params).to_dict()
7175

7276
self.assertTrue(result.get("is_correct"))
7377

7478
def test_check_equivalence_fail(self):
75-
76-
response, answer, params = "p ∧ q", "p", {"equivalence": True}
79+
response = {"formula": "p ∧ q"}
80+
answer = "p"
81+
params = {"equivalence": True}
7782

7883
result = evaluation_function(response, answer, params).to_dict()
7984

8085
self.assertFalse(result.get("is_correct"))
81-
86+
87+
def test_truth_table_valid(self):
88+
response = {
89+
"formula": "p ∧ q",
90+
"truthTable": {
91+
"variables": ["p", "q", "p ∧ q"],
92+
"cells": [
93+
["tt", "tt", "tt"],
94+
["tt", "ff", "ff"],
95+
["ff", "tt", "ff"],
96+
["ff", "ff", "ff"]
97+
]
98+
}
99+
}
100+
answer = "p ∧ q"
101+
params = {"equivalence": True}
102+
103+
result = evaluation_function(response, answer, params).to_dict()
104+
105+
self.assertTrue(result.get("is_correct"))
106+
107+
def test_truth_table_invalid(self):
108+
response = {
109+
"formula": "p ∧ q",
110+
"truthTable": {
111+
"variables": ["p", "q", "p ∧ q"],
112+
"cells": [
113+
["tt", "tt", "ff"], # Wrong value
114+
["tt", "ff", "ff"],
115+
["ff", "tt", "ff"],
116+
["ff", "ff", "ff"]
117+
]
118+
}
119+
}
120+
answer = "p ∧ q"
121+
params = {"equivalence": True}
122+
123+
result = evaluation_function(response, answer, params).to_dict()
124+
125+
self.assertFalse(result.get("is_correct"))
126+
127+
def test_invalid_response_type(self):
128+
response = "just a string" # Invalid type
129+
answer = "p"
130+
params = {"tautology": True}
131+
132+
result = evaluation_function(response, answer, params).to_dict()
133+
134+
self.assertFalse(result.get("is_correct"))
135+
self.assertIn("feedback", result)
136+
137+
def test_missing_formula_field(self):
138+
response = {"wrongField": "p"}
139+
answer = "p"
140+
params = {"tautology": True}
141+
142+
result = evaluation_function(response, answer, params).to_dict()
143+
144+
self.assertFalse(result.get("is_correct"))
145+
146+
def test_no_params_selected(self):
147+
response = {"formula": "p"}
148+
answer = "p"
149+
params = {}
150+
151+
result = evaluation_function(response, answer, params).to_dict()
152+
153+
self.assertFalse(result.get("is_correct"))
154+
155+
def test_multiple_params_selected(self):
156+
response = {"formula": "p"}
157+
answer = "p"
158+
params = {"tautology": True, "satisfiability": True}
159+
160+
result = evaluation_function(response, answer, params).to_dict()
161+
162+
self.assertFalse(result.get("is_correct"))
163+

evaluation_function/truth_table/truth_table_evaluate_tests.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,33 @@ def test_complex_formula(self):
181181
result = evaluate_truth_table(variables, cells, 2)
182182
self.assertTrue(result.is_correct)
183183

184+
def test_three_atoms(self):
185+
"""Test a valid truth table with three atoms"""
186+
variables = ["p", "q", "r", "p ∧ q ∧ r"]
187+
cells = [
188+
["tt", "tt", "tt", "tt"],
189+
["tt", "tt", "ff", "ff"],
190+
["tt", "ff", "tt", "ff"],
191+
["tt", "ff", "ff", "ff"],
192+
["ff", "tt", "tt", "ff"],
193+
["ff", "tt", "ff", "ff"],
194+
["ff", "ff", "tt", "ff"],
195+
["ff", "ff", "ff", "ff"]
196+
]
197+
result = evaluate_truth_table(variables, cells, 3)
198+
self.assertTrue(result.is_correct)
199+
200+
def test_only_atoms(self):
201+
"""Test truth table with only atom columns"""
202+
variables = ["p", "q"]
203+
cells = [
204+
["tt", "tt"],
205+
["tt", "ff"],
206+
["ff", "tt"],
207+
["ff", "ff"]
208+
]
209+
result = evaluate_truth_table(variables, cells, 2)
210+
self.assertTrue(result.is_correct)
184211

185212
if __name__ == '__main__':
186213
unittest.main()

0 commit comments

Comments
 (0)