1313from openadapt_evals .adapters .rl_env import RLEnvironment
1414from openadapt_evals .adapters .verl_env import (
1515 WAADesktopEnv ,
16+ _ACTION_PATTERN ,
1617 _build_obs_dict ,
1718 _parse_action_str ,
1819)
@@ -52,14 +53,37 @@ def test_done(self):
5253 action = _parse_action_str ("DONE()" )
5354 assert action .type == "done"
5455
55- def test_scroll (self ):
56+ def test_scroll_with_direction (self ):
5657 action = _parse_action_str ('SCROLL(x=0.50, y=0.50, direction="down")' )
5758 assert action .type == "scroll"
59+ assert action .scroll_direction == "down"
60+
61+ def test_scroll_up (self ):
62+ action = _parse_action_str ('SCROLL(x=0.50, y=0.50, direction="up")' )
63+ assert action .scroll_direction == "up"
64+
65+ def test_scroll_default_direction (self ):
66+ action = _parse_action_str ("SCROLL(x=0.50, y=0.50)" )
67+ assert action .scroll_direction == "down"
5868
5969 def test_invalid_returns_done (self ):
6070 action = _parse_action_str ("random garbage text" )
6171 assert action .type == "done"
6272
73+ def test_drag_with_end_coords (self ):
74+ action = _parse_action_str ("DRAG(x=0.20, y=0.30, end_x=0.80, end_y=0.70)" )
75+ assert action .type == "drag"
76+ assert action .x == pytest .approx (0.20 )
77+ assert action .y == pytest .approx (0.30 )
78+ assert action .end_x == pytest .approx (0.80 )
79+ assert action .end_y == pytest .approx (0.70 )
80+
81+ def test_drag_without_end_coords (self ):
82+ action = _parse_action_str ("DRAG(x=0.20, y=0.30)" )
83+ assert action .type == "drag"
84+ assert action .end_x is None
85+ assert action .end_y is None
86+
6387 def test_with_thinking (self ):
6488 action = _parse_action_str (
6589 "<think>I need to click the button</think>\n CLICK(x=0.25, y=0.75)"
@@ -68,14 +92,21 @@ def test_with_thinking(self):
6892 assert action .x == pytest .approx (0.25 )
6993 assert action .y == pytest .approx (0.75 )
7094
95+ def test_invalid_action_not_matched (self ):
96+ """Unparseable input should not match the action pattern."""
97+ assert _ACTION_PATTERN .search ("random garbage" ) is None
98+
99+ def test_explicit_done_is_matched (self ):
100+ """Explicit DONE() should match the action pattern."""
101+ assert _ACTION_PATTERN .search ("DONE()" ) is not None
102+
71103
72104# --- Observation building tests ---
73105
74106
75107class TestBuildObsDict :
76108 def test_with_screenshot (self ):
77109 """Test obs dict with PNG bytes."""
78- # Create a minimal valid PNG (1x1 red pixel)
79110 from PIL import Image
80111 import io
81112
@@ -137,6 +168,7 @@ def test_system_prompt(self):
137168 assert "obs_str" in result
138169 assert "CLICK" in result ["obs_str" ]
139170 assert "TYPE" in result ["obs_str" ]
171+ assert "DRAG" in result ["obs_str" ]
140172 assert "DONE" in result ["obs_str" ]
141173
142174 def test_reset_returns_obs_dict (self ):
@@ -165,7 +197,6 @@ def test_step_done_triggers_eval(self):
165197 asyncio .run (env .reset (seed = 42 ))
166198 obs , reward , done , info = asyncio .run (env .step ("DONE()" ))
167199 assert done is True
168- # Reward should be a float from evaluation (mock evaluator)
169200 assert isinstance (reward , float )
170201
171202 def test_max_steps_triggers_done (self ):
@@ -186,15 +217,13 @@ def test_close(self):
186217 assert env ._rl_env is None
187218
188219 def test_full_episode_flow (self ):
189- """Test a complete episode: reset → multiple steps → done → evaluate."""
220+ """Test a complete episode: reset -> multiple steps -> done -> evaluate."""
190221 env = _make_mock_env ()
191222 env ._max_steps = 5
192223
193- # Reset
194224 obs , info = asyncio .run (env .reset (seed = 1 ))
195225 assert "obs_str" in obs
196226
197- # Take some actions
198227 obs , r , done , _ = asyncio .run (env .step ("CLICK(x=0.05, y=0.08)" ))
199228 assert not done
200229 assert r == 0.0
@@ -203,28 +232,70 @@ def test_full_episode_flow(self):
203232 assert not done
204233 assert r == 0.0
205234
206- # Finish
207235 obs , r , done , info = asyncio .run (env .step ("DONE()" ))
208236 assert done
209237 assert isinstance (r , float )
210238
211239 def test_protocol_has_required_methods (self ):
212240 """Verify WAADesktopEnv has all GymImageEnv protocol methods."""
213241 env = _make_mock_env ()
214- assert hasattr (env , "reset" )
215- assert hasattr (env , "step" )
216- assert hasattr (env , "close" )
217- assert hasattr (env , "system_prompt" )
218- assert callable (env .reset )
219- assert callable (env .step )
220- assert callable (env .close )
221- assert callable (env .system_prompt )
242+ for method in ("reset" , "step" , "close" , "system_prompt" , "health_check" ):
243+ assert hasattr (env , method )
244+ assert callable (getattr (env , method ))
222245
223246 def test_obs_contains_image_placeholder (self ):
224247 """Test that observations with screenshots include <image> placeholder."""
225248 env = _make_mock_env ()
226249 obs , _ = asyncio .run (env .reset (seed = 42 ))
227- # Mock adapter returns observations that may or may not have screenshots
228- # At minimum, obs_str should be present
229250 assert "obs_str" in obs
230251 assert isinstance (obs ["obs_str" ], str )
252+
253+ # --- health_check tests ---
254+
255+ def test_health_check_not_initialized (self ):
256+ """Health check before reset returns not_initialized."""
257+ env = WAADesktopEnv .__new__ (WAADesktopEnv )
258+ env ._rl_env = None
259+ env ._server_url = "mock"
260+ env ._step_count = 0
261+ result = asyncio .run (env .health_check ())
262+ assert result ["status" ] == "not_initialized"
263+
264+ def test_health_check_ready_after_episode (self ):
265+ """Health check after completed episode returns ready."""
266+ env = _make_mock_env ()
267+ asyncio .run (env .reset (seed = 42 ))
268+ asyncio .run (env .step ("DONE()" ))
269+ result = asyncio .run (env .health_check ())
270+ assert result ["status" ] == "ready"
271+
272+ def test_health_check_busy_mid_episode (self ):
273+ """Health check mid-episode returns busy."""
274+ env = _make_mock_env ()
275+ asyncio .run (env .reset (seed = 42 ))
276+ asyncio .run (env .step ("CLICK(x=0.5, y=0.5)" ))
277+ result = asyncio .run (env .health_check ())
278+ assert result ["status" ] == "busy"
279+
280+ # --- is_action_valid tests ---
281+
282+ def test_is_action_valid_for_parsed_action (self ):
283+ """Actions that parse successfully should be marked valid."""
284+ env = _make_mock_env ()
285+ asyncio .run (env .reset (seed = 42 ))
286+ _ , _ , _ , info = asyncio .run (env .step ("CLICK(x=0.5, y=0.5)" ))
287+ assert info ["is_action_valid" ] is True
288+
289+ def test_is_action_valid_for_done (self ):
290+ """Explicit DONE() should be marked valid."""
291+ env = _make_mock_env ()
292+ asyncio .run (env .reset (seed = 42 ))
293+ _ , _ , _ , info = asyncio .run (env .step ("DONE()" ))
294+ assert info ["is_action_valid" ] is True
295+
296+ def test_is_action_invalid_for_garbage (self ):
297+ """Unparseable input should be marked invalid."""
298+ env = _make_mock_env ()
299+ asyncio .run (env .reset (seed = 42 ))
300+ _ , _ , _ , info = asyncio .run (env .step ("random garbage" ))
301+ assert info ["is_action_valid" ] is False
0 commit comments