Skip to content

Commit 169b43f

Browse files
Merge pull request #45 from lambda-feedback/test-tr124-restructuring
Test tr124 restructuring
2 parents a79a280 + efa622f commit 169b43f

9 files changed

Lines changed: 229 additions & 26 deletions

File tree

app/context/symbolic.py

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def check_criterion(criterion, parameters_dict, generate_feedback=True):
4040
result = check_order(criterion, parameters_dict)
4141
elif label == "CONTAINS":
4242
result = check_contains_symbol(criterion, parameters_dict)
43+
elif label == "PROPORTIONAL_TO":
44+
result = check_proportionality(criterion, parameters_dict)
4345
elif label == "WHERE":
4446
crit = criterion.children[0]
4547
subs = criterion.children[1]
@@ -90,7 +92,10 @@ def do_comparison(comparison_symbol, expression):
9092
"<=": lambda expr: bool(expression.cancel().simplify().simplify() <= 0),
9193
}
9294
comparison = comparisons[comparison_symbol.strip()]
93-
result = comparison(expression)
95+
try:
96+
result = comparison(expression)
97+
except Exception:
98+
result = None
9499
return result
95100

96101

@@ -146,6 +151,20 @@ def check_order(criterion, parameters_dict, local_substitutions=[]):
146151
return result
147152

148153

154+
def check_proportionality(criterion, parameters_dict, local_substitutions=[]):
155+
lhs_expr, rhs_expr = create_expressions_for_comparison(criterion, parameters_dict, local_substitutions)
156+
result = None
157+
if lhs_expr.cancel().simplify().simplify() != 0:
158+
result = (rhs_expr/lhs_expr).cancel().simplify()
159+
elif rhs_expr.cancel().simplify().simplify() != 0:
160+
result = (lhs_expr/rhs_expr).cancel().simplify()
161+
if result == 0 or result is None:
162+
result = False
163+
else:
164+
result = result.is_constant()
165+
return result
166+
167+
149168
def check_contains_symbol(criterion, parameters_dict, local_substitutions=[]):
150169
lhs_expr, rhs_expr = create_expressions_for_comparison(criterion, parameters_dict, local_substitutions)
151170
result = rhs_expr in lhs_expr.atoms()
@@ -218,10 +237,14 @@ def mathematical_equivalence(unused_input):
218237
return {
219238
label+"_TRUE": None
220239
}
221-
else:
240+
elif result is False:
222241
return {
223242
label+"_FALSE": None
224243
}
244+
else:
245+
return {
246+
label+"_UNKNOWN": None
247+
}
225248

226249
def set_equivalence(unused_input):
227250
matches = {"responses": [False]*len(response_list), "answers": [False]*len(answer_list)}
@@ -328,7 +351,7 @@ def same_symbols(unused_input):
328351
ans = parameters_dict["reserved_expressions"]["answer"]
329352
use_equality_equivalence = isinstance(res, Equality) or isinstance(ans, Equality)
330353

331-
# TODO: Make checking set quivalence its own context that calls symbolic comparisons instead
354+
# TODO: Make checking set equivalence its own context that calls symbolic comparisons instead
332355
if use_set_equivalence is True:
333356
graph.add_evaluation_node(
334357
label,
@@ -718,14 +741,19 @@ def criterion_eval_node(criterion, parameters_dict, generate_feedback=True):
718741
def evaluation_node_internal(unused_input):
719742
result = check_criterion(criterion, parameters_dict, generate_feedback)
720743
label = criterion.content_string()
721-
if result:
744+
if result is True:
722745
return {
723746
label+"_TRUE": feedback_string_generator_inputs
724747
}
725-
else:
748+
elif result is False:
726749
return {
727750
label+"_FALSE": feedback_string_generator_inputs
728751
}
752+
else:
753+
return {
754+
label+"_UNKNOWN": feedback_string_generator_inputs
755+
}
756+
729757
label = criterion.content_string()
730758
graph = CriteriaGraph(label)
731759
END = CriteriaGraph.END
@@ -747,6 +775,14 @@ def evaluation_node_internal(unused_input):
747775
feedback_string_generator=symbolic_feedback_string_generators["GENERIC"]("FALSE")
748776
)
749777
graph.attach(label+"_FALSE", END.label)
778+
graph.attach(
779+
label,
780+
label+"_UNKNOWN",
781+
summary="True",
782+
details=label+" is false.",
783+
feedback_string_generator=symbolic_feedback_string_generators["GENERIC"]("FALSE")
784+
)
785+
graph.attach(label+"_UNKNOWN", END.label)
750786
return graph
751787

752788

app/docs/user.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,12 @@ The `criteria` parameter reserves `response` and `answer` as keywords that will
4343

4444
| Name | Syntax | Description | Example |
4545
|-------|:-------------------------------|:------------------------------------|:--------------------|
46-
| EQUAL | `EXPRESSION = EXPRESSION` | Checks if the expressions are equal | `answer = response` |
47-
| WHERE | `EXPRESSION = EXPRESSION where EXPRESSION = EXPRESSION, ... , EXPRESSION = EXPRESSION` | Checks if the equality on the left side of `where` are equal if the equalities in the comma-separated list on the right side of `where` | `answer = response` |
46+
| EQUAL | `EXPRESSION = EXPRESSION` | Checks if the expressions are equal | `answer = response` - Default way to check equality of expressions |
47+
| ORDER | `EXPRESSION ORDER EXPRESSION` | Checks if the expressions have the given order. ORDER operators can be `>`, `<`, `>=`, `<=` | `answer > response` - Checks if the answer is greater than the response |
48+
| WHERE | `EXPRESSION = EXPRESSION where EXPRESSION = EXPRESSION; ... ; EXPRESSION = EXPRESSION` | Checks if the equality on the left side of `where` are equal if the equalities in the comma-separated list on the right side of `where` | `answer = response where x = 0` - Checks if the curves given by the answer and the response intersect when $x=0$. |
49+
| WRITTEN_AS | `EXPRESSION written as EXPRESSION` | Syntactical comparison, checks if the two expressions are written the same way. | `response written as answer` - Checks if the response is written in the same for as the answer (e.g. if answer is `(x+1)(x+2)` then the response `x^2+3x+2` will not satisfy the criteria but `(x+3)(x+4)` will). |
50+
| PROPORTIONAL | `EXPRESSION proportional to EXPRESSION` | Checks if one expression can be written is equivalent to the other expression multiplied by some constant. | `answer proportional to response` |
51+
| CONTAINS | `EXPRESSION contains EXPRESSION` | Checks if the left expression has the right expression as a subexpression. | `response contains x` - Checks if the response contains the symbol x |
4852

4953

5054
## `elementary_functions`

app/evaluation.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def determine_context(parameters):
5555
input_symbols_reserved_codes.append(input_symbol[0])
5656
input_symbols_reserved_aliases += [ip for ip in input_symbol[1] if len(ip.strip()) > 0]
5757

58-
reserved_keywords_codes = {"where", "written as"}
58+
reserved_keywords_codes = {"where", "written as", "contains", "proportional to"}
5959
reserved_keywords_aliases = {"plus_minus", "minus_plus"}
6060
for re in parameters["reserved_expressions_strings"].values():
6161
reserved_keywords_aliases = reserved_keywords_aliases.union(set(re.keys()))
@@ -205,6 +205,7 @@ def generate_feedback(main_criteria, criteria_graphs, evaluation_parameters):
205205
response = evaluation_parameters["reserved_expressions"]["response"]
206206
criteria_feedback = set()
207207
is_correct = True
208+
custom_feedback = evaluation_parameters.get("custom_feedback",{})
208209
for (criterion_identifier, graph) in criteria_graphs.items():
209210
# TODO: Find better way to identify main criteria for criteria graph
210211
main_criteria = criterion_identifier+"_TRUE"
@@ -219,7 +220,7 @@ def generate_feedback(main_criteria, criteria_graphs, evaluation_parameters):
219220
# assumption that some way to return partial feedback
220221
# before script has executed completely will be available
221222
# in the future
222-
evaluation_result.add_feedback_from_tags(criteria_feedback, graph)
223+
evaluation_result.add_feedback_from_tags(criteria_feedback, graph, custom_feedback=custom_feedback)
223224
evaluation_result.is_correct = is_correct
224225
return
225226

@@ -239,12 +240,6 @@ def evaluation_function(response, answer, params, include_test_data=False) -> di
239240

240241
parameters = deepcopy(params)
241242

242-
# CONSIDER: Can this be moved into the preprocessing procedures in a consistent way?
243-
# Can it be turned into its own context? Or moved into the determine_context procedure?
244-
# What solution will be most consistently reusable?
245-
if parameters.get("is_latex", False):
246-
response = parse_latex(response, parameters.get("symbols", {}), False)
247-
248243
reserved_expressions_strings = {
249244
"learner": {
250245
"response": response
@@ -269,13 +264,31 @@ def evaluation_function(response, answer, params, include_test_data=False) -> di
269264
else:
270265
evaluation_result.latex = preview["latex"]
271266
evaluation_result.simplified = preview["sympy"]
267+
268+
reserved_expressions_keys = list(reserved_expressions_strings["learner"].keys())+list(reserved_expressions_strings["task"].keys())
272269
parameters.update(
273270
{
274271
"context": context,
275-
"parsing_parameters": context["parsing_parameters_generator"](parameters),
272+
"reserved_keywords": context["reserved_keywords"]+reserved_expressions_keys,
273+
}
274+
)
275+
parsing_parameters = context["parsing_parameters_generator"](parameters, unsplittable_symbols=reserved_expressions_keys)
276+
parameters.update(
277+
{
278+
"parsing_parameters": parsing_parameters,
276279
}
277280
)
278281

282+
# CONSIDER: Can this be moved into the preprocessing procedures in a consistent way?
283+
# Can it be turned into its own context? Or moved into the determine_context procedure?
284+
# What solution will be most consistently reusable?
285+
if parameters.get("is_latex", False):
286+
parameters["reserved_expressions_strings"]["learner"].update(
287+
{
288+
"response": parse_latex(response, parameters.get("symbols", {}), False, parameters=parameters),
289+
}
290+
)
291+
279292
# FIXME: Move this into expression_utilities
280293
if params.get("strict_syntax", True):
281294
if "^" in response:
@@ -287,13 +300,10 @@ def evaluation_function(response, answer, params, include_test_data=False) -> di
287300
if reserved_expressions_success is False:
288301
return evaluation_result.serialise(include_test_data)
289302
reserved_expressions_parsed = {**reserved_expressions["learner"], **reserved_expressions["task"]}
290-
parameters.update({"reserved_keywords": parameters["context"]["reserved_keywords"]+list(reserved_expressions_parsed.keys())})
291303

292304
criteria_parser = context["generate_criteria_parser"](reserved_expressions)
293305
criteria = create_criteria_dict(criteria_parser, parameters)
294306

295-
parsing_parameters = parameters["context"]["parsing_parameters_generator"](parameters, unsplittable_symbols=list(reserved_expressions_parsed.keys()))
296-
297307
evaluation_parameters = FrozenValuesDictionary(
298308
{
299309
"reserved_expressions_strings": reserved_expressions_strings,
@@ -308,6 +318,7 @@ def evaluation_function(response, answer, params, include_test_data=False) -> di
308318
"numerical": parameters.get("numerical", False),
309319
"atol": parameters.get("atol", 0),
310320
"rtol": parameters.get("rtol", 0),
321+
"custom_feedback": parameters.get("custom_feedback",{}),
311322
}
312323
)
313324

app/evaluation_tests.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ class TestEvaluationFunction():
2323
"""
2424

2525
# Import tests that makes sure that mathematical expression comparison works as expected
26-
from .tests.symbolic_evaluation_tests import TestEvaluationFunction as TestSymbolicComparison
26+
#from .tests.symbolic_evaluation_tests import TestEvaluationFunction as TestSymbolicComparison
2727

2828
# Import tests that makes sure that physical quantities are handled as expected
29-
from .tests.physical_quantity_evaluation_tests import TestEvaluationFunction as TestQuantities
29+
#from .tests.physical_quantity_evaluation_tests import TestEvaluationFunction as TestQuantities
3030

3131
# Import tests that corresponds to examples in documentation and examples module
3232
from .tests.example_tests import TestEvaluationFunction as TestExamples
@@ -81,4 +81,4 @@ def test_CHEM40002_1_5_instance_2024_25(self):
8181

8282

8383
if __name__ == "__main__":
84-
pytest.main(['-k not slow', '--tb=line', '--durations=10', os.path.abspath(__file__)])
84+
pytest.main(['-xk not slow', '--tb=short', '--durations=10', os.path.abspath(__file__)])

app/feedback/symbolic.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,8 @@
6969
"EXPONENTIAL": "Response and answer are both written on exponential form.", # None,
7070
"UNKNOWN": "The response is not written on the expected form.",
7171
}[tag]
72+
feedback_generators["PROPORTIONAL_TO"] = lambda tag: lambda inputs: {
73+
"TRUE": None,
74+
"FALSE": None,
75+
"UNKNOWN": None,
76+
}[tag]

app/tests/example_tests.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,142 @@ def test_syntactical_comparison(self, response, answer, criteria, value, feedbac
507507
assert result["is_correct"] is value
508508
assert set(feedback_tags) == set(result["tags"])
509509

510+
@pytest.mark.parametrize(
511+
"response, value, tags",
512+
[
513+
(
514+
"2a+2b+2c",
515+
True,
516+
[
517+
"response proportional to answer_TRUE",
518+
],
519+
),
520+
(
521+
"a+2b+3c",
522+
False,
523+
[
524+
"response proportional to answer_FALSE",
525+
],
526+
),
527+
(
528+
"pi*(a+b+c)",
529+
True,
530+
[
531+
"response proportional to answer_TRUE",
532+
],
533+
),
534+
(
535+
"x*(a+b+c)",
536+
False,
537+
[
538+
"response proportional to answer_FALSE",
539+
],
540+
),
541+
]
542+
)
543+
def test_custom_comparison_with_criteria_proportional(self, response, value, tags):
544+
params = {
545+
"strict_syntax": False,
546+
"elementary_functions": True,
547+
"criteria": "response proportional to answer",
548+
}
549+
answer = "a+b+c"
550+
result = evaluation_function(response, answer, params, include_test_data=True)
551+
assert result["is_correct"] is value
552+
assert set(tags) == set(result["tags"])
553+
554+
@pytest.mark.parametrize(
555+
"response, value, tags",
556+
[
557+
(
558+
"2*x^2+0.5+0.25*sin(x)^2",
559+
False,
560+
[
561+
"answer <= response_TRUE",
562+
"2+answer > response_UNKNOWN",
563+
]
564+
),
565+
]
566+
)
567+
def test_custom_comparison_with_criteria_order(self, response, value, tags):
568+
params = {
569+
"strict_syntax": False,
570+
"elementary_functions": True,
571+
"criteria": "answer <= response, 2+answer > response",
572+
"symbol_assumptions": "('x', 'real')",
573+
}
574+
answer = "2*x^2"
575+
result = evaluation_function(response, answer, params, include_test_data=True)
576+
assert result["is_correct"] is value
577+
assert set(tags) == set(result["tags"])
578+
579+
@pytest.mark.parametrize(
580+
"response, value, tags",
581+
[
582+
(
583+
"pi*n",
584+
True,
585+
[
586+
"sin(response)=0_TRUE",
587+
"sin(response)=0_SAME_SYMBOLS_TRUE",
588+
"response contains n_TRUE",
589+
],
590+
),
591+
]
592+
)
593+
def test_custom_comparison_with_criteria_contains(self, response, value, tags):
594+
params = {
595+
"strict_syntax": False,
596+
"elementary_functions": True,
597+
"criteria": "sin(response)=0, response contains n",
598+
"symbols": {
599+
"n": {
600+
"latex": r"\(n\)",
601+
"aliases": ["i", "k", "N", "I", "K"],
602+
},
603+
},
604+
"symbol_assumptions": "('n', 'integer')"
605+
}
606+
answer = "0"
607+
result = evaluation_function(response, answer, params, include_test_data=True)
608+
assert result["is_correct"] is value
609+
assert set(tags) == set(result["tags"])
610+
611+
@pytest.mark.parametrize(
612+
"response, answer, criteria, value, feedback_tags, custom_feedback, additional_params",
613+
[
614+
(
615+
"2*x^2+0.5+0.25*sin(x)^2",
616+
"2x^2",
617+
"answer <= response, 2+answer > response",
618+
False,
619+
[
620+
"answer <= response_TRUE",
621+
"2+answer > response_UNKNOWN",
622+
],
623+
{
624+
"answer <= response_TRUE": "AAA",
625+
"2+answer > response_UNKNOWN": "BBB",
626+
},
627+
{
628+
"symbol_assumptions": "('x', 'real')",
629+
}
630+
),
631+
]
632+
)
633+
def test_criteria_custom_feedback(self, response, answer, criteria, value, feedback_tags, custom_feedback, additional_params):
634+
params = {
635+
"strict_syntax": False,
636+
"elementary_functions": True,
637+
"criteria": criteria,
638+
"custom_feedback": custom_feedback,
639+
}
640+
params.update(additional_params)
641+
result = evaluation_function(response, answer, params, include_test_data=True)
642+
assert result["is_correct"] is value
643+
assert set(feedback_tags) == set(result["tags"])
644+
for string in custom_feedback.values():
645+
assert string in result["feedback"]
510646

511647
if __name__ == "__main__":
512648
pytest.main(['-sk not slow', "--tb=line", os.path.abspath(__file__)])

0 commit comments

Comments
 (0)