|
8 | 8 | from hud.tools.computer.anthropic import AnthropicComputerTool |
9 | 9 | from hud.tools.computer.gemini import GeminiComputerTool |
10 | 10 | from hud.tools.computer.glm import GLMComputerTool |
11 | | -from hud.tools.computer.hud import HudComputerTool |
| 11 | +from hud.tools.computer.hud import AgentCoordinate, HudComputerTool |
12 | 12 | from hud.tools.computer.openai import OpenAIComputerTool |
13 | 13 | from hud.tools.computer.qwen import QwenComputerTool |
14 | 14 | from hud.tools.executors.base import BaseExecutor |
@@ -36,6 +36,11 @@ async def drag(self, path, pattern=None, hold_keys=None, take_screenshot=True): |
36 | 36 | return await super().drag(path, pattern, hold_keys, take_screenshot=False) |
37 | 37 |
|
38 | 38 |
|
| 39 | +class EmptyErrorExecutor(BaseExecutor): |
| 40 | + async def click(self, *args, **kwargs): |
| 41 | + return ContentResult(error="") |
| 42 | + |
| 43 | + |
39 | 44 | @pytest.mark.asyncio |
40 | 45 | async def test_hud_computer_screenshot(): |
41 | 46 | comp = HudComputerTool() |
@@ -75,15 +80,42 @@ async def test_anthropic_computer_screenshot(): |
75 | 80 |
|
76 | 81 |
|
77 | 82 | @pytest.mark.asyncio |
78 | | -async def test_gemini_computer_click_reports_agent_coordinates(): |
| 83 | +async def test_gemini_computer_scaling_preserves_model_coordinates(): |
79 | 84 | comp = GeminiComputerTool() |
| 85 | + x, y = comp._scale_coordinates(214, 420) |
| 86 | + |
| 87 | + assert x is not None |
| 88 | + assert y is not None |
| 89 | + assert int(x) != 214 |
| 90 | + assert int(y) != 420 |
| 91 | + assert getattr(x, "agent_value") == 214 |
| 92 | + assert getattr(y, "agent_value") == 420 |
| 93 | + |
| 94 | + |
| 95 | +@pytest.mark.asyncio |
| 96 | +async def test_gemini_computer_click_reports_model_coordinates(): |
| 97 | + comp = GeminiComputerTool(executor=BaseExecutor()) |
| 98 | + |
80 | 99 | blocks = await comp(action="click_at", x=214, y=420) |
81 | 100 |
|
82 | 101 | assert any( |
83 | | - "(214, 420)" in content.text for content in blocks if isinstance(content, TextContent) |
| 102 | + "Clicked at (214, 420)" in content.text |
| 103 | + for content in blocks |
| 104 | + if isinstance(content, TextContent) |
84 | 105 | ) |
85 | 106 |
|
86 | 107 |
|
| 108 | +@pytest.mark.asyncio |
| 109 | +async def test_gemini_computer_does_not_mask_empty_error(): |
| 110 | + comp = GeminiComputerTool(executor=EmptyErrorExecutor()) |
| 111 | + |
| 112 | + blocks = await comp(action="click_at", x=214, y=420) |
| 113 | + text = "\n".join(content.text for content in blocks if isinstance(content, TextContent)) |
| 114 | + |
| 115 | + assert "Clicked at (214, 420)" not in text |
| 116 | + assert "Tool execution failed with no error output" in text |
| 117 | + |
| 118 | + |
87 | 119 | @pytest.mark.asyncio |
88 | 120 | async def test_anthropic_computer_zoom(): |
89 | 121 | """Test zoom action on AnthropicComputerTool. |
@@ -125,40 +157,44 @@ async def test_anthropic_computer_zoom(): |
125 | 157 |
|
126 | 158 | @pytest.mark.asyncio |
127 | 159 | async def test_openai_computer_click(): |
128 | | - comp = OpenAIComputerTool() |
| 160 | + comp = OpenAIComputerTool(executor=BaseExecutor()) |
129 | 161 | blocks = await comp(type="click", x=5, y=5) |
130 | 162 | assert blocks |
131 | | - assert any("(5, 5)" in content.text for content in blocks if isinstance(content, TextContent)) |
132 | 163 |
|
133 | 164 |
|
134 | 165 | @pytest.mark.asyncio |
135 | | -async def test_anthropic_computer_click_reports_agent_coordinates(): |
136 | | - comp = AnthropicComputerTool() |
137 | | - blocks = await comp(action="left_click", coordinate=[123, 456], text=None) |
| 166 | +async def test_anthropic_computer_scaling_preserves_agent_coordinates(): |
| 167 | + comp = AnthropicComputerTool(executor=BaseExecutor()) |
| 168 | + x, y = comp._scale_coordinates(123, 456) |
138 | 169 |
|
139 | | - assert any( |
140 | | - "(123, 456)" in content.text for content in blocks if isinstance(content, TextContent) |
141 | | - ) |
| 170 | + assert x is not None |
| 171 | + assert y is not None |
| 172 | + assert getattr(x, "agent_value") == 123 |
| 173 | + assert getattr(y, "agent_value") == 456 |
142 | 174 |
|
143 | 175 |
|
144 | 176 | @pytest.mark.asyncio |
145 | | -async def test_qwen_computer_click_reports_agent_coordinates(): |
146 | | - comp = QwenComputerTool() |
147 | | - blocks = await comp(action="left_click", coordinate=[123, 456]) |
| 177 | +async def test_qwen_computer_scaling_preserves_agent_coordinates(): |
| 178 | + comp = QwenComputerTool(executor=BaseExecutor()) |
| 179 | + x, y = comp._scale_coordinates(123, 456) |
148 | 180 |
|
149 | | - assert any( |
150 | | - "(123, 456)" in content.text for content in blocks if isinstance(content, TextContent) |
151 | | - ) |
| 181 | + assert x is not None |
| 182 | + assert y is not None |
| 183 | + assert getattr(x, "agent_value") == 123 |
| 184 | + assert getattr(y, "agent_value") == 456 |
152 | 185 |
|
153 | 186 |
|
154 | 187 | @pytest.mark.asyncio |
155 | | -async def test_glm_computer_click_reports_agent_coordinates(): |
156 | | - comp = GLMComputerTool() |
157 | | - blocks = await comp(action="left_click", start_box="[123,456]") |
| 188 | +async def test_glm_computer_scaling_preserves_model_coordinates(): |
| 189 | + comp = GLMComputerTool(executor=BaseExecutor()) |
| 190 | + x, y = comp._scale_coordinates(123, 456) |
158 | 191 |
|
159 | | - assert any( |
160 | | - "(123, 456)" in content.text for content in blocks if isinstance(content, TextContent) |
161 | | - ) |
| 192 | + assert x is not None |
| 193 | + assert y is not None |
| 194 | + assert int(x) != 123 |
| 195 | + assert int(y) != 456 |
| 196 | + assert getattr(x, "agent_value") == 123 |
| 197 | + assert getattr(y, "agent_value") == 456 |
162 | 198 |
|
163 | 199 |
|
164 | 200 | def test_normalized_coordinate_max_stays_in_display_bounds(): |
@@ -217,6 +253,28 @@ async def test_xdo_drag_executes_interpolated_mouse_moves(): |
217 | 253 | assert mouse_moves[-1] == "mousemove 120 0" |
218 | 254 |
|
219 | 255 |
|
| 256 | +@pytest.mark.asyncio |
| 257 | +async def test_xdo_commands_use_execution_pixels_for_agent_coordinates(): |
| 258 | + executor = RecordingXDOExecutor() |
| 259 | + |
| 260 | + await executor.click(x=AgentCoordinate(309, 214), y=AgentCoordinate(396, 420)) |
| 261 | + |
| 262 | + assert executor.commands[-1] == "mousemove 309 396 click 1" |
| 263 | + |
| 264 | + |
| 265 | +@pytest.mark.asyncio |
| 266 | +async def test_xdo_nonzero_empty_stderr_surfaces_error(monkeypatch): |
| 267 | + async def fake_run(command: str): |
| 268 | + return 1, "", "" |
| 269 | + |
| 270 | + monkeypatch.setattr("hud.tools.executors.xdo.run", fake_run) |
| 271 | + executor = XDOExecutor() |
| 272 | + |
| 273 | + result = await executor.execute("mousemove 1 2", take_screenshot=False) |
| 274 | + |
| 275 | + assert result.error == "Command failed with exit code 1" |
| 276 | + |
| 277 | + |
220 | 278 | class TestHudComputerToolExtended: |
221 | 279 | """Extended tests for HudComputerTool covering edge cases and platform logic.""" |
222 | 280 |
|
|
0 commit comments