Skip to content

Commit e9c5a97

Browse files
committed
feat(locators): optimise serialization for anthropic claude 3.5 sonnet
1 parent 2b252c6 commit e9c5a97

2 files changed

Lines changed: 14 additions & 7 deletions

File tree

src/askui/models/locators.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,10 @@ def serialize(self, locator: Locator) -> str:
103103
raise ValueError(f"Unsupported locator type: {type(locator)}")
104104

105105
def _serialize_class(self, class_: Class) -> str:
106-
return class_.class_name or "ui element"
106+
if class_.class_name:
107+
return f"an arbitrary {class_.class_name} shown"
108+
else:
109+
return "an arbitrary ui element (e.g., text, button, textfield, etc.)"
107110

108111
def _serialize_description(self, description: Description) -> str:
109112
return description.description

tests/e2e/agent/test_locate.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@ def github_login_screenshot(path_fixtures: pathlib.Path) -> Image.Image:
3434
"askui",
3535
"anthropic-claude-3-5-sonnet-20241022",
3636
])
37-
@pytest.mark.xfail(
38-
reason="Location may be inconsistent depending on the model used",
39-
)
4037
class TestVisionAgentLocate:
4138
"""Test class for VisionAgent.locate() method."""
4239

@@ -47,19 +44,26 @@ def test_locate_with_string_locator(self, vision_agent: VisionAgent, github_logi
4744
assert 450 <= x <= 570
4845
assert 190 <= y <= 260
4946

50-
def test_locate_with_class_locator(self, vision_agent: VisionAgent, github_login_screenshot: Image.Image, model_name: str) -> None:
47+
def test_locate_with_textfield_class_locator(self, vision_agent: VisionAgent, github_login_screenshot: Image.Image, model_name: str) -> None:
5148
"""Test locating elements using a class locator."""
5249
locator = Class("textfield")
5350
x, y = vision_agent.locate(locator, github_login_screenshot, model_name=model_name)
5451
assert 50 <= x <= 860 or 350 <= x <= 570 or 350 <= x <= 570
5552
assert 0 <= y <= 80 or 210 <= y <= 280 or 160 <= y <= 230
53+
54+
def test_locate_with_unspecified_class_locator(self, vision_agent: VisionAgent, github_login_screenshot: Image.Image, model_name: str) -> None:
55+
"""Test locating elements using a class locator."""
56+
locator = Class()
57+
x, y = vision_agent.locate(locator, github_login_screenshot, model_name=model_name)
58+
assert 0 <= x <= github_login_screenshot.width
59+
assert 0 <= y <= github_login_screenshot.height
5660

5761
def test_locate_with_description_locator(self, vision_agent: VisionAgent, github_login_screenshot: Image.Image, model_name: str) -> None:
5862
"""Test locating elements using a description locator."""
59-
locator = Description("Green sign in button")
63+
locator = Description("Username textfield")
6064
x, y = vision_agent.locate(locator, github_login_screenshot, model_name=model_name)
6165
assert 350 <= x <= 570
62-
assert 240 <= y <= 310
66+
assert 160 <= y <= 230
6367

6468
def test_locate_with_similar_text_locator(self, vision_agent: VisionAgent, github_login_screenshot: Image.Image, model_name: str) -> None:
6569
"""Test locating elements using a text locator."""

0 commit comments

Comments
 (0)