Skip to content

Commit d011b2f

Browse files
abrichrclaude
andauthored
fix: constrained decoding preserves chain-of-thought reasoning (#200)
The Thought/Action format from SYSTEM_PROMPT is now enforced by the constrained decoding regex: Thought: <up to 500 chars of reasoning> Action: CLICK(x=0.50, y=0.30) This gives the model a reasoning budget while guaranteeing parseable output. Prior regex had no prefix (model couldn't reason) or used (.|\n)* (Outlines couldn't compile the DFA). Also exposes _ACTION_RE (action-only regex) for use by the parser. Tests updated: 30 pass (was 21). Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9e9bbc8 commit d011b2f

2 files changed

Lines changed: 52 additions & 22 deletions

File tree

openadapt_evals/training/standalone/trainer.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,21 @@ def __init__(
9090

9191
# --- Constrained decoding -------------------------------------------
9292

93-
# Regex matching valid action formats. No free-text prefix — the
94-
# model MUST output an action as its very first token. This is
95-
# intentional: constrained decoding forces structured output.
96-
# If the model needs chain-of-thought, disable constrained_decoding
97-
# and rely on prompt instructions instead.
98-
_ACTION_REGEX = (
93+
# Regex matching the required Thought/Action format from SYSTEM_PROMPT.
94+
# The model gets up to 500 chars of chain-of-thought reasoning, then
95+
# MUST output exactly one valid action. This preserves the model's
96+
# ability to reason while guaranteeing parseable output.
97+
#
98+
# Format: Thought: <reasoning>\nAction: <action>
99+
_ACTION_RE = (
99100
r"CLICK\(x=0\.\d{1,3},\s*y=0\.\d{1,3}\)"
100101
r'|TYPE\(text="[^"]{0,200}"\)'
101102
r"|WAIT\(\)"
102103
r"|DONE\(\)"
103104
)
105+
_ACTION_REGEX = (
106+
r"Thought: [^\n]{1,500}\nAction: (" + _ACTION_RE + r")"
107+
)
104108
# Sentinel: None = not yet attempted, list = success, False = failed
105109
_constrained_processor_cache: Any = None
106110

tests/test_standalone_trainer.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,39 +20,65 @@
2020

2121

2222
class TestActionRegex:
23-
"""Verify the action format regex matches valid actions and rejects junk."""
23+
"""Verify the Thought/Action format regex matches valid output and rejects junk."""
2424

25-
regex = GRPOTrainer._ACTION_REGEX
25+
full_regex = GRPOTrainer._ACTION_REGEX
26+
action_regex = GRPOTrainer._ACTION_RE
27+
28+
# -- Full Thought/Action format tests --
2629

2730
@pytest.mark.parametrize(
28-
"action",
31+
"output",
2932
[
30-
"CLICK(x=0.50, y=0.30)",
31-
"CLICK(x=0.0, y=0.0)",
32-
"CLICK(x=0.999, y=0.123)",
33-
'TYPE(text="hello world")',
34-
'TYPE(text="")',
35-
'TYPE(text="notepad")',
36-
"WAIT()",
37-
"DONE()",
33+
"Thought: I need to click the start menu.\nAction: CLICK(x=0.50, y=0.30)",
34+
"Thought: Type notepad in the search box.\nAction: TYPE(text=\"notepad\")",
35+
"Thought: Wait for the UI to load.\nAction: WAIT()",
36+
"Thought: The task is complete.\nAction: DONE()",
37+
"Thought: Click the Chrome icon on the desktop to open Chrome.\nAction: CLICK(x=0.05, y=0.20)",
38+
"Thought: x\nAction: CLICK(x=0.0, y=0.0)",
3839
],
3940
)
40-
def test_valid_actions_match(self, action: str) -> None:
41-
assert re.match(self.regex, action), f"Expected match: {action!r}"
41+
def test_valid_thought_action_matches(self, output: str) -> None:
42+
assert re.match(self.full_regex, output), f"Expected match: {output!r}"
4243

4344
@pytest.mark.parametrize(
4445
"text",
4546
[
47+
# No Thought prefix
48+
"CLICK(x=0.50, y=0.30)",
49+
"Action: CLICK(x=0.50, y=0.30)",
50+
# Free-text reasoning without structure
4651
"** Let me think about this...",
4752
"1. Analyze the user's goal",
4853
"The user wants to open Task Manager",
4954
"",
50-
"CLICK",
51-
"click(0.5, 0.3)",
55+
# Missing Action line
56+
"Thought: I should click here.",
57+
# Wrong action format
58+
"Thought: Click\nAction: click(0.5, 0.3)",
59+
"Thought: Click\nAction: CLICK",
5260
],
5361
)
5462
def test_invalid_text_rejected(self, text: str) -> None:
55-
assert not re.match(self.regex, text), f"Should NOT match: {text!r}"
63+
assert not re.match(self.full_regex, text), f"Should NOT match: {text!r}"
64+
65+
# -- Action-only regex tests (used by parser) --
66+
67+
@pytest.mark.parametrize(
68+
"action",
69+
[
70+
"CLICK(x=0.50, y=0.30)",
71+
"CLICK(x=0.0, y=0.0)",
72+
"CLICK(x=0.999, y=0.123)",
73+
'TYPE(text="hello world")',
74+
'TYPE(text="")',
75+
'TYPE(text="notepad")',
76+
"WAIT()",
77+
"DONE()",
78+
],
79+
)
80+
def test_action_only_regex_matches(self, action: str) -> None:
81+
assert re.match(self.action_regex, action), f"Expected match: {action!r}"
5682

5783

5884
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)