Skip to content

Commit f8ebd41

Browse files
committed
split
1 parent 1e9ab7e commit f8ebd41

1 file changed

Lines changed: 24 additions & 6 deletions

File tree

tests/unit/rl_utils_test.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,25 +111,33 @@ class TestNormalizeFinalAnswer(unittest.TestCase):
111111
"""Tests for utils_rl.normalize_final_answer."""
112112

113113
@pytest.mark.cpu_only
114-
def test_normalize_final_answer(self):
114+
def test_comma_boxed_and_currency(self):
115115
# Comma-separated numbers, \\boxed{}, and leading $ are all normalized to plain integers
116116
self.assertEqual(utils_rl.normalize_final_answer("1,000"), "1000")
117117
self.assertEqual(utils_rl.normalize_final_answer("$1,000"), "1000")
118118
self.assertEqual(utils_rl.normalize_final_answer("\\boxed{1,000}"), "1000")
119119

120+
@pytest.mark.cpu_only
121+
def test_equation_splitting_and_unit_removal(self):
120122
# Expressions with '=' are split on '='; trailing unit words are stripped
121123
self.assertEqual(utils_rl.normalize_final_answer("x = 10"), "10")
122124
self.assertEqual(utils_rl.normalize_final_answer("total = 100 meters"), "100")
123125
self.assertEqual(utils_rl.normalize_final_answer("42 mph"), "42")
124126

127+
@pytest.mark.cpu_only
128+
def test_latex_wrappers(self):
125129
# \\text{}, \\textbf{}, and \\overline{} wrappers are removed, leaving inner content
126130
self.assertEqual(utils_rl.normalize_final_answer("\\text{hello}"), "hello")
127131
self.assertEqual(utils_rl.normalize_final_answer("\\textbf{42}"), "42")
128132
self.assertEqual(utils_rl.normalize_final_answer("\\overline{AB}"), "AB")
129133

134+
@pytest.mark.cpu_only
135+
def test_dollar_math_extraction(self):
130136
# Content inside $...$ is extracted
131137
self.assertEqual(utils_rl.normalize_final_answer("The answer is $\\frac{1}{2}$"), "\\frac{1}{2}")
132138

139+
@pytest.mark.cpu_only
140+
def test_shorthand_frac_and_sqrt(self):
133141
# Shorthand \\fracab and \\sqrta are expanded to their full LaTeX forms
134142
self.assertEqual(utils_rl.normalize_final_answer("\\fracab"), "\\frac{a}{b}")
135143
self.assertEqual(utils_rl.normalize_final_answer("\\sqrta"), "\\sqrt{a}")
@@ -150,22 +158,32 @@ def _score(self, completion):
150158
return utils_rl.match_format_approximately(None, completion, self.config)
151159

152160
@pytest.mark.cpu_only
153-
def test_partial_format_scores(self):
154-
"""Scores cover the full range depending on how many tags appear exactly once."""
161+
def test_score_all_tags_present_exactly_once(self):
155162
# All four tags present exactly once -> 4 * 0.5 = 2.0
156163
self.assertEqual(self._score(["<reasoning>think</reasoning><answer>42</answer>"])[0], 2.0)
164+
165+
@pytest.mark.cpu_only
166+
def test_score_no_tags_present(self):
157167
# No tags at all -> 4 * -0.5 = -2.0
158168
self.assertEqual(self._score(["The answer is 42."])[0], -2.0)
169+
170+
@pytest.mark.cpu_only
171+
def test_score_only_answer_tags_present(self):
159172
# Only <answer>...</answer> present -> 2 * 0.5 + 2 * -0.5 = 0.0
160173
self.assertEqual(self._score(["<answer>42</answer>"])[0], 0.0)
174+
175+
@pytest.mark.cpu_only
176+
def test_score_duplicate_reasoning_start_tag(self):
161177
# Duplicate <reasoning> tag -> 3 * 0.5 + 1 * -0.5 = 1.0
162178
self.assertEqual(self._score(["<reasoning><reasoning>think</reasoning><answer>42</answer>"])[0], 1.0)
179+
180+
@pytest.mark.cpu_only
181+
def test_score_multiple_completions(self):
163182
# Multiple completions at once -> one score per entry
164-
multi_completions = [
183+
scores = self._score([
165184
"<reasoning>think</reasoning><answer>42</answer>", # 2.0
166185
"no tags here", # -2.0
167-
]
168-
scores = self._score(multi_completions)
186+
])
169187
self.assertEqual(len(scores), 2)
170188
self.assertEqual(scores[0], 2.0)
171189
self.assertEqual(scores[1], -2.0)

0 commit comments

Comments
 (0)