fix

hengtaoguo · hengtaoguo · commit da0bda8ad2cb · 2026-03-11T18:50:14.000Z
diff --git a/tests/unit/rl_utils_test.py b/tests/unit/rl_utils_test.py
@@ -112,30 +112,30 @@ class TestNormalizeFinalAnswer(unittest.TestCase):
 
   @pytest.mark.cpu_only
   def test_normalize_final_answer(self):
-    """Comma-separated numbers, \\boxed{}, and leading $ are all normalized to plain integers."""
+    # Comma-separated numbers, \\boxed{}, and leading $ are all normalized to plain integers
     self.assertEqual(utils_rl.normalize_final_answer("1,000"), "1000")
     self.assertEqual(utils_rl.normalize_final_answer("$1,000"), "1000")
     self.assertEqual(utils_rl.normalize_final_answer("\\boxed{1,000}"), "1000")
 
-    """Expressions with '=' are split on '='; trailing unit words are stripped."""
+    # Expressions with '=' are split on '='; trailing unit words are stripped
     self.assertEqual(utils_rl.normalize_final_answer("x = 10"), "10")
     self.assertEqual(utils_rl.normalize_final_answer("total = 100 meters"), "100")
     self.assertEqual(utils_rl.normalize_final_answer("42 mph"), "42")
 
-    """\\text{}, \\textbf{}, and \\overline{} wrappers are removed, leaving inner content."""
+    # \\text{}, \\textbf{}, and \\overline{} wrappers are removed, leaving inner content
     self.assertEqual(utils_rl.normalize_final_answer("\\text{hello}"), "hello")
     self.assertEqual(utils_rl.normalize_final_answer("\\textbf{42}"), "42")
     self.assertEqual(utils_rl.normalize_final_answer("\\overline{AB}"), "AB")
 
-    """Content inside $...$ is extracted."""
+    # Content inside $...$ is extracted
     self.assertEqual(utils_rl.normalize_final_answer("The answer is $\\frac{1}{2}$"), "\\frac{1}{2}")
 
-    """Shorthand \\fracab and \\sqrta are expanded to their full LaTeX forms."""
+    # Shorthand \\fracab and \\sqrta are expanded to their full LaTeX forms
     self.assertEqual(utils_rl.normalize_final_answer("\\fracab"), "\\frac{a}{b}")
     self.assertEqual(utils_rl.normalize_final_answer("\\sqrta"), "\\sqrt{a}")
 
 
-class TestMatchFormatApproximately(unittest.TestCase):
+class TestMatchFormatApproximatelyScores(unittest.TestCase):
   """Tests for utils_rl.match_format_approximately.
 
   Each tag that appears exactly once contributes reward_partial_format_match (0.5).
@@ -147,40 +147,25 @@ def setUp(self):
     self.config = _make_config()
 
   def _score(self, completion):
-    return utils_rl.match_format_approximately(None, [completion], self.config)
+    return utils_rl.match_format_approximately(None, completion, self.config)
 
   @pytest.mark.cpu_only
-  def test_score_0_no_tags_present(self):
-    """No tags at all -> each of the 4 tags triggers penalty -> score = 4 * -0.5 = -2.0."""
-    completion = "The answer is 42."
-    self.assertEqual(self._score(completion)[0], -2.0)
-
-  @pytest.mark.cpu_only
-  def test_score_1_duplicate_reasoning_start_tag(self):
-    """Duplicate <reasoning> tag -> that tag penalised; other three correct -> 3*0.5 + (-0.5) = 1.0."""
-    completion = "<reasoning><reasoning>think</reasoning><answer>42</answer>"
-    self.assertEqual(self._score(completion)[0], 1.0)
-
-  @pytest.mark.cpu_only
-  def test_score_2_only_answer_tags_present(self):
-    """Only answer open/close tags present once -> 2 rewards + 2 penalties = 0.0."""
-    completion = "<answer>42</answer>"
-    self.assertEqual(self._score(completion)[0], 0.0)
-
-  @pytest.mark.cpu_only
-  def test_score_4_all_tags_present_exactly_once(self):
-    """All four tags appear exactly once -> score = 4 * 0.5 = 2.0."""
-    completion = "<reasoning>think</reasoning><answer>42</answer>"
-    self.assertEqual(self._score(completion)[0], 2.0)
-
-  @pytest.mark.cpu_only
-  def test_score_multiple_completions(self):
-    """Passing multiple completions returns one score per completion."""
-    completions = [
+  def test_partial_format_scores(self):
+    """Scores cover the full range depending on how many tags appear exactly once."""
+    # All four tags present exactly once -> 4 * 0.5 = 2.0
+    self.assertEqual(self._score(["<reasoning>think</reasoning><answer>42</answer>"])[0], 2.0)
+    # No tags at all -> 4 * -0.5 = -2.0
+    self.assertEqual(self._score(["The answer is 42."])[0], -2.0)
+    # Only <answer>...</answer> present -> 2 * 0.5 + 2 * -0.5 = 0.0
+    self.assertEqual(self._score(["<answer>42</answer>"])[0], 0.0)
+    # Duplicate <reasoning> tag -> 3 * 0.5 + 1 * -0.5 = 1.0
+    self.assertEqual(self._score(["<reasoning><reasoning>think</reasoning><answer>42</answer>"])[0], 1.0)
+    # Multiple completions at once -> one score per entry
+    multi_completions = [
         "<reasoning>think</reasoning><answer>42</answer>",  # 2.0
         "no tags here",  # -2.0
     ]
-    scores = utils_rl.match_format_approximately(None, completions, self.config)
+    scores = self._score(multi_completions)
     self.assertEqual(len(scores), 2)
     self.assertEqual(scores[0], 2.0)
     self.assertEqual(scores[1], -2.0)