Skip to content

Commit bb64323

Browse files
Merge pull request #46 from lambda-feedback/test-tr124-restructuring
Test tr124 restructuring
2 parents 169b43f + fb1d3cc commit bb64323

8 files changed

Lines changed: 236 additions & 68 deletions

File tree

app/context/physical_quantity.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -279,12 +279,12 @@ def quantity_match(unused_inputs):
279279
# numerical tolerances can be applied appropriately
280280
if parsing_params.get('rtol', 0) > 0 or parsing_params.get('atol', 0) > 0:
281281
if (lhs_string == 'answer' and rhs_string == 'response') or (lhs_string == 'response' and rhs_string == 'answer'):
282-
ans = parameters["reserved_expressions"]["answer"]["standard"]["value"]
283-
res = parameters["reserved_expressions"]["response"]["standard"]["value"]
282+
ans = parameters["reserved_expressions"]["answer"]["standard"]["value"].simplify()
283+
res = parameters["reserved_expressions"]["response"]["standard"]["value"].simplify()
284284
if (ans is not None and ans.is_constant()) and (res is not None and res.is_constant()):
285-
if parsing_params.get('rtol', 0) > 0:
285+
if parsing_params.get('rtol', 0) > 0 and (ans != 0):
286286
value_match = bool(abs(float((ans-res)/ans)) < parsing_params['rtol'])
287-
elif parsing_params.get('atol', 0) > 0:
287+
elif parsing_params.get('atol', 0) > 0 or (ans == 0):
288288
value_match = bool(abs(float(ans-res)) < parsing_params['atol'])
289289

290290
substitutions = [(key, expr["standard"]["unit"]) for (key, expr) in reserved_expressions]
@@ -541,20 +541,6 @@ def expression_preprocess(name, expr, parameters):
541541
expr = expr[0:match_content.span()[0]]+match_content.group().replace("*", " ")+expr[match_content.span()[1]:]
542542
match_content = re.search(search_string, expr)
543543

544-
prefixes = set(x[0] for x in set_of_SI_prefixes)
545-
fundamental_units = set(x[0] for x in set_of_SI_base_unit_dimensions)
546-
units_string = parameters["units_string"]
547-
valid_units = set()
548-
for key in units_sets_dictionary.keys():
549-
if key in units_string:
550-
for unit in units_sets_dictionary[key]:
551-
valid_units = valid_units.union(set((unit[0], unit[1])+unit[3]+unit[4]))
552-
dimensions = set(x[2] for x in set_of_SI_base_unit_dimensions)
553-
unsplittable_symbols = list(prefixes | fundamental_units | valid_units | dimensions)
554-
preprocess_parameters = deepcopy(parameters)
555-
# TODO: find better way to prevent preprocessing from mangling reserved keywords for physical quantity criteria
556-
preprocess_parameters.update({"reserved_keywords": preprocess_parameters.get("reserved_keywords", [])+unsplittable_symbols+['matches']})
557-
expr = substitute_input_symbols(expr.strip(), preprocess_parameters)[0]
558544
success = True
559545
return success, expr, None
560546

@@ -572,7 +558,9 @@ def feedback_string_generator(tags, graph, parameters_dict):
572558
def parsing_parameters_generator(params, unsplittable_symbols=tuple(), symbol_assumptions=tuple()):
573559
parsing_parameters = create_sympy_parsing_params(params)
574560
parsing_parameters.update({
575-
"strictness": params.get("strictness", "natural")
561+
"strictness": params.get("strictness", "natural"),
562+
"rtol": float(params.get("rtol", 0)),
563+
"atol": float(params.get("atol", 0)),
576564
})
577565
return parsing_parameters
578566

app/context/symbolic.py

Lines changed: 76 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -101,46 +101,60 @@ def do_comparison(comparison_symbol, expression):
101101

102102
def check_equality(criterion, parameters_dict, local_substitutions=[]):
103103
lhs_expr, rhs_expr = create_expressions_for_comparison(criterion, parameters_dict, local_substitutions)
104-
result = do_comparison(criterion.content, lhs_expr-rhs_expr)
105-
106-
# TODO: Make numerical comparison its own context
107-
if result is False:
108-
error_below_rtol = None
109-
error_below_atol = None
110-
if parameters_dict.get("numerical", False) or float(parameters_dict.get("rtol", 0)) > 0 or float(parameters_dict.get("atol", 0)) > 0:
111-
# REMARK: 'pi' should be a reserved symbol but it is sometimes not treated as one, possibly because of input symbols.
112-
# The two lines below this comments fixes the issue but a more robust solution should be found for cases where there
113-
# are other reserved symbols.
114-
def replace_pi(expr):
115-
pi_symbol = pi
116-
for s in expr.free_symbols:
117-
if str(s) == 'pi':
118-
pi_symbol = s
119-
return expr.subs(pi_symbol, float(pi))
120-
# NOTE: This code assumes that the left hand side is the response and the right hand side is the answer
121-
# Separates LHS and RHS, parses and evaluates them
122-
res = N(replace_pi(lhs_expr))
123-
ans = N(replace_pi(rhs_expr))
124-
if float(parameters_dict.get("atol", 0)) > 0:
125-
try:
126-
absolute_error = abs(float(ans-res))
127-
error_below_atol = bool(absolute_error < float(parameters_dict["atol"]))
128-
except TypeError:
129-
error_below_atol = None
130-
else:
131-
error_below_atol = True
132-
if float(parameters_dict.get("rtol", 0)) > 0:
133-
try:
134-
relative_error = abs(float((ans-res)/ans))
135-
error_below_rtol = bool(relative_error < float(parameters_dict["rtol"]))
136-
except TypeError:
137-
error_below_rtol = None
138-
else:
139-
error_below_rtol = True
140-
if error_below_atol is None or error_below_rtol is None:
141-
result = False
142-
elif error_below_atol is True and error_below_rtol is True:
143-
result = True
104+
if isinstance(lhs_expr, Equality) and not isinstance(rhs_expr, Equality):
105+
result = False
106+
elif not isinstance(lhs_expr, Equality) and isinstance(rhs_expr, Equality):
107+
result = False
108+
else:
109+
result = do_comparison(criterion.content, lhs_expr-rhs_expr)
110+
# There are some types of expression, e.g. those containing hyperbolic trigonometric functions, that can behave
111+
# unpredictably when simplification is applied. For that reason we check several different combinations of
112+
# simplifications here in order to reduce the likelihood of false negatives.
113+
if result is False:
114+
result = do_comparison(criterion.content, lhs_expr-rhs_expr.simplify())
115+
if result is False:
116+
result = do_comparison(criterion.content, lhs_expr.simplify()-rhs_expr)
117+
if result is False:
118+
result = do_comparison(criterion.content, lhs_expr.simplify()-rhs_expr.simplify())
119+
120+
# TODO: Make numerical comparison its own context
121+
if result is False:
122+
error_below_rtol = None
123+
error_below_atol = None
124+
if parameters_dict.get("numerical", False) or float(parameters_dict.get("rtol", 0)) > 0 or float(parameters_dict.get("atol", 0)) > 0:
125+
# REMARK: 'pi' should be a reserved symbol but it is sometimes not treated as one, possibly because of input symbols.
126+
# The two lines below this comments fixes the issue but a more robust solution should be found for cases where there
127+
# are other reserved symbols.
128+
def replace_pi(expr):
129+
pi_symbol = pi
130+
for s in expr.free_symbols:
131+
if str(s) == 'pi':
132+
pi_symbol = s
133+
return expr.subs(pi_symbol, float(pi))
134+
# NOTE: This code assumes that the left hand side is the response and the right hand side is the answer
135+
# Separates LHS and RHS, parses and evaluates them
136+
res = N(replace_pi(lhs_expr))
137+
ans = N(replace_pi(rhs_expr))
138+
if float(parameters_dict.get("atol", 0)) > 0:
139+
try:
140+
absolute_error = abs(float(ans-res))
141+
error_below_atol = bool(absolute_error < float(parameters_dict["atol"]))
142+
except TypeError:
143+
error_below_atol = None
144+
else:
145+
error_below_atol = True
146+
if float(parameters_dict.get("rtol", 0)) > 0:
147+
try:
148+
relative_error = abs(float((ans-res)/ans))
149+
error_below_rtol = bool(relative_error < float(parameters_dict["rtol"]))
150+
except TypeError:
151+
error_below_rtol = None
152+
else:
153+
error_below_rtol = True
154+
if error_below_atol is None or error_below_rtol is None:
155+
result = False
156+
elif error_below_atol is True and error_below_rtol is True:
157+
result = True
144158

145159
return result
146160

@@ -252,7 +266,12 @@ def set_equivalence(unused_input):
252266
result = None
253267
for j, answer in enumerate(answer_list):
254268
current_pair = [("response", response), ("answer", answer)]
255-
result = check_equality(criterion, parameters_dict, local_substitutions=current_pair)
269+
if isinstance(response, Equality) and not isinstance(answer, Equality):
270+
result = False
271+
elif not isinstance(response, Equality) and isinstance(answer, Equality):
272+
result = False
273+
else:
274+
result = check_equality(criterion, parameters_dict, local_substitutions=current_pair)
256275
if result is True:
257276
matches["responses"][i] = True
258277
matches["answers"][j] = True
@@ -397,6 +416,14 @@ def same_symbols(unused_input):
397416
details="Checks if "+str(lhs)+" is equivalent to "+str(rhs)+".",
398417
evaluate=equality_equivalence
399418
)
419+
graph.attach(
420+
label,
421+
label+"_UNKNOWN",
422+
summary="Cannot determine if "+str(lhs)+" is equivalent to "+str(rhs),
423+
details="Cannot determine if "+str(lhs)+" is equivalent to "+str(rhs)+".",
424+
feedback_string_generator=symbolic_feedback_string_generators["INTERNAL"]("EQUALITY_EQUIVALENCE_UNKNOWN")
425+
)
426+
graph.attach(label+"_UNKNOWN", END.label)
400427
graph.attach(
401428
label,
402429
label+"_TRUE",
@@ -474,6 +501,14 @@ def same_symbols(unused_input):
474501
feedback_string_generator=symbolic_feedback_string_generators["response=answer"]("FALSE")
475502
)
476503
graph.attach(label+"_FALSE", END.label)
504+
graph.attach(
505+
label,
506+
label+"_UNKNOWN",
507+
summary="Cannot detrmine if "+str(lhs)+"="+str(rhs),
508+
details="Cannot detrmine if "+str(lhs)+" is equal to "+str(rhs)+".",
509+
feedback_string_generator=symbolic_feedback_string_generators["response=answer"]("UNKNOWN")
510+
)
511+
graph.attach(label+"_UNKNOWN", END.label)
477512
return graph
478513

479514

app/evaluation_tests.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ class TestEvaluationFunction():
2323
"""
2424

2525
# Import tests that makes sure that mathematical expression comparison works as expected
26-
#from .tests.symbolic_evaluation_tests import TestEvaluationFunction as TestSymbolicComparison
26+
from .tests.symbolic_evaluation_tests import TestEvaluationFunction as TestSymbolicComparison
2727

2828
# Import tests that makes sure that physical quantities are handled as expected
29-
#from .tests.physical_quantity_evaluation_tests import TestEvaluationFunction as TestQuantities
29+
from .tests.physical_quantity_evaluation_tests import TestEvaluationFunction as TestQuantities
3030

3131
# Import tests that corresponds to examples in documentation and examples module
32-
from .tests.example_tests import TestEvaluationFunction as TestExamples
32+
#from .tests.example_tests import TestEvaluationFunction as TestExamples
3333

3434
def test_eval_function_can_handle_latex_input(self):
3535
response = r"\sin x + x^{7}"

app/feedback/symbolic.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"EQUALITY_NOT_EXPRESSION": "The response was an equality but was expected to be an expression.",
2626
"EQUALITIES_EQUIVALENT": None,
2727
"EQUALITIES_NOT_EQUIVALENT": "The response is not the expected equality.",
28+
"EQUALITY_EQUIVALENCE_UNKNOWN": "Cannot determine if the given equality is equivalent to the expected equality.",
2829
"WITHIN_TOLERANCE": None, # "The difference between the response the answer is within specified error tolerance.",
2930
"NOT_NUMERICAL": None, # "The expression cannot be evaluated numerically.",
3031
}[tag]

app/preview_implementations/symbolic_preview.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def preview_function(response: str, params: Params) -> Result:
108108
sympy_out = []
109109
for expression in expression_list:
110110
latex_out.append(sympy_to_latex(expression, symbols, settings={"mul_symbol": r" \cdot "}))
111-
sympy_out.append(str(expression))
111+
sympy_out.append(response)
112112

113113
if len(sympy_out) == 1:
114114
sympy_out = sympy_out[0]

app/tests/physical_quantity_evaluation_tests.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,66 @@ def test_legacy_strictness(self):
325325
result = evaluation_function(res, ans, params, include_test_data=True)
326326
assert result["is_correct"] is True
327327

328+
def test_physical_quantity_with_rtol(self):
329+
ans = "7500 m/s"
330+
res = "7504.1 m/s"
331+
params = {
332+
'rtol': 0.05,
333+
'strict_syntax': False,
334+
'physical_quantity': True,
335+
'elementary_functions': True,
336+
}
337+
result = evaluation_function(res, ans, params, include_test_data=True)
338+
assert result["is_correct"] is True
339+
340+
def test_physical_quantity_with_atol(self):
341+
ans = "7500 m/s"
342+
res = "7504.1 m/s"
343+
params = {
344+
'atol': 5,
345+
'strict_syntax': False,
346+
'physical_quantity': True,
347+
'elementary_functions': True,
348+
}
349+
result = evaluation_function(res, ans, params, include_test_data=True)
350+
assert result["is_correct"] is True
351+
352+
# def test_rad_vs_Hz(self):
353+
# ans = "28.53 rad/s"
354+
# res = "4.5405 H"
355+
# params = {
356+
# 'rtol': 0.03,
357+
# 'strict_syntax': False,
358+
# 'physical_quantity': True,
359+
# 'elementary_functions': True,
360+
# }
361+
# result = evaluation_function(res, ans, params, include_test_data=True)
362+
# assert result["is_correct"] is True
363+
364+
def test_tolerance_given_as_string(self):
365+
ans = "4.52 kg"
366+
res = "13.74 kg"
367+
params = {
368+
'rtol': '0.015',
369+
'strict_syntax': False,
370+
'physical_quantity': True,
371+
'elementary_functions': True,
372+
}
373+
result = evaluation_function(res, ans, params, include_test_data=True)
374+
assert result["is_correct"] is False
375+
376+
def test_answer_zero_value(self):
377+
ans = "0 m"
378+
res = "1 m"
379+
params = {
380+
'rtol': 0,
381+
'atol': 0,
382+
'strict_syntax': False,
383+
'physical_quantity': True,
384+
'elementary_functions': True,
385+
}
386+
result = evaluation_function(res, ans, params, include_test_data=True)
387+
assert result["is_correct"] is False
328388

329389
if __name__ == "__main__":
330390
pytest.main(['-xk not slow', "--no-header", os.path.abspath(__file__)])

app/tests/symbolic_evaluation_tests.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,5 +1881,84 @@ def test_alternatives_to_input_symbols_takes_priority_over_elementary_function_a
18811881
result = evaluation_function(response, answer, params)
18821882
assert result["is_correct"] is False
18831883

1884+
def test_elementary_function_symbol_with_implicit_multiplication_on_both_sides(self):
1885+
response = "momega^3 l^2/2(exp(2omegat)-exp(-2omegat))"
1886+
answer = "m*omega^3*l^2*sinh(2*omega*t)"
1887+
params = {
1888+
'strict_syntax': False,
1889+
'elementary_functions': True,
1890+
}
1891+
result = evaluation_function(response, answer, params)
1892+
assert result["is_correct"] is True
1893+
1894+
def test_response_for_which_correctness_cannot_be_determined(self):
1895+
response = "2 pi e^{-a |omega|}" # The expression in {...} is interpreted as elements in a set instead of a math expression
1896+
answer = "2 pi e^(-a|omega|)"
1897+
params = {
1898+
'atol': 0,
1899+
'rtol': 0,
1900+
'strict_syntax': False,
1901+
'physical_quantity': False,
1902+
'elementary_functions': True,
1903+
}
1904+
result = evaluation_function(response, answer, params)
1905+
assert result["is_correct"] is False
1906+
1907+
def test_unexpected_equalities_in_response_that_generates_set(self):
1908+
response = "z= plus_minus 1 + 2*i" # plus_minus generates a set of two equalities
1909+
answer = "2i plus_minus 1" # plus_minus generates a set of two expressions
1910+
params = {
1911+
'atol': 0,
1912+
'rtol': 0,
1913+
'strict_syntax': False,
1914+
'physical_quantity': False,
1915+
'elementary_functions': True,
1916+
}
1917+
result = evaluation_function(response, answer, params)
1918+
assert result["is_correct"] is False
1919+
1920+
def test_infinity_alias(self):
1921+
response = "2.694"
1922+
answer = "infinity"
1923+
params = {
1924+
'strict_syntax': False,
1925+
'elementary_functions': True,
1926+
}
1927+
result = evaluation_function(response, answer, params)
1928+
assert result["is_correct"] is False
1929+
1930+
def test_equality_expression_mismatch_with_custom_criteria(self):
1931+
response = "x-0.883x^2=0.251X^3"
1932+
answer = "0.4842x-0.1163x^3"
1933+
params = {
1934+
'atol': 0,
1935+
'rtol': 0.03,
1936+
'criteria': 'response=answer where x=0, diff(response,x)=diff(answer,x) where x=0, diff(response,x,2)=diff(answer,x,2) where x=0, diff(response,x,3)=diff(answer,x,3) where x=0',
1937+
'strict_syntax': False,
1938+
'physical_quantity': False,
1939+
'elementary_functions': True,
1940+
}
1941+
result = evaluation_function(response, answer, params)
1942+
assert result["is_correct"] is False
1943+
1944+
def test_input_symbols_takes_priority_when_containing_elementary_function_names_as_substring(self):
1945+
response = "Vmax - Vmaxe^-(t/tau)"
1946+
answer = "Vmax*(1-exp(-t/tau))"
1947+
params = {
1948+
"atol": 0,
1949+
"rtol": 0,
1950+
"strict_syntax": False,
1951+
"elementary_functions": True,
1952+
"physical_quantity": False,
1953+
"symbols": {
1954+
"Vmax": {"aliases": ["V_max"], "latex": r"$V_{max}$"},
1955+
"RS": {"aliases": ["R_S", "rs"], "latex": r"$R_S$"},
1956+
"RF": {"aliases": ["R_F", "rf"], "latex": r"$R_f$"},
1957+
"tau": {"aliases": [], "latex": r"$\tau$"},
1958+
},
1959+
}
1960+
result = evaluation_function(response, answer, params)
1961+
assert result["is_correct"] is True
1962+
18841963
if __name__ == "__main__":
18851964
pytest.main(['-xk not slow', "--tb=line", '--durations=10', os.path.abspath(__file__)])

0 commit comments

Comments
 (0)