Skip to content

Commit 690a801

Browse files
committed
fix anthropic cua and add self heal back to agent exmaples
1 parent 744b78f commit 690a801

File tree

3 files changed

+99
-93
lines changed

3 files changed

+99
-93
lines changed

examples/agent_example.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ async def main():
4040
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
4141
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
4242
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
43+
self_heal=True,
4344
verbose=2,
4445
)
4546

examples/agent_example_local.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ async def main():
3838
env="LOCAL",
3939
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
4040
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
41+
self_heal=True,
4142
verbose=2,
4243
)
4344

stagehand/agent/anthropic_cua.py

Lines changed: 97 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from anthropic import Anthropic, AnthropicError
66
from dotenv import load_dotenv
7+
from pydantic import TypeAdapter
78

89
from ..handlers.cua_handler import CUAHandler, StagehandFunctionName
910
from ..types.agent import (
@@ -362,7 +363,7 @@ def _convert_tool_use_to_agent_action(
362363
)
363364
return None
364365

365-
action_model_payload: Optional[AgentActionType] = None
366+
action_payload_dict: Optional[dict[str, Any]] = None
366367
reasoning = tool_input.get("reasoning")
367368

368369
try:
@@ -375,52 +376,53 @@ def _convert_tool_use_to_agent_action(
375376
)
376377

377378
if action_type_str == "left_click":
378-
action_model_payload = AgentActionType(
379-
type="click",
380-
x=x,
381-
y=y,
382-
button="left",
383-
)
379+
action_payload_dict = {
380+
"type": "click",
381+
"x": x,
382+
"y": y,
383+
"button": "left",
384+
}
384385
action_type_str = "click" # Normalize
385386

386387
elif action_type_str == "right_click":
387-
action_model_payload = AgentActionType(
388-
type="click",
389-
x=x,
390-
y=y,
391-
button="right",
392-
)
388+
action_payload_dict = {
389+
"type": "click",
390+
"x": x,
391+
"y": y,
392+
"button": "right",
393+
}
393394
action_type_str = "click" # Normalize
394395

395396
elif action_type_str == "middle_click":
396-
action_model_payload = AgentActionType(
397-
type="click",
398-
x=x,
399-
y=y,
400-
button="middle",
401-
)
397+
action_payload_dict = {
398+
"type": "click",
399+
"x": x,
400+
"y": y,
401+
"button": "middle",
402+
}
402403
action_type_str = "click" # Normalize
403404

404405
elif action_type_str == "double_click":
405-
action_model_payload = AgentActionType(
406-
type="double_click",
407-
x=x,
408-
y=y,
409-
)
406+
action_payload_dict = {
407+
"type": "double_click",
408+
"x": x,
409+
"y": y,
410+
}
410411

411412
elif action_type_str == "triple_click":
412413
# Handle as double_click for now since we don't have a dedicated triple click
413-
action_model_payload = AgentActionType(
414-
type="double_click",
415-
x=x,
416-
y=y,
417-
)
414+
action_payload_dict = {
415+
"type": "double_click",
416+
"x": x,
417+
"y": y,
418+
}
418419
action_type_str = "double_click" # Normalize
419420

420421
elif action_type_str == "type":
421-
action_model_payload = AgentActionType(
422-
type="type", text=tool_input.get("text", "")
423-
)
422+
action_payload_dict = {
423+
"type": "type",
424+
"text": tool_input.get("text", ""),
425+
}
424426

425427
elif action_type_str == "key":
426428
key_text = tool_input.get("text", "")
@@ -429,10 +431,10 @@ def _convert_tool_use_to_agent_action(
429431
keys = [
430432
self.key_to_playwright(k.strip()) for k in key_text.split("+")
431433
]
432-
action_model_payload = AgentActionType(
433-
type="keypress",
434-
keys=keys,
435-
)
434+
action_payload_dict = {
435+
"type": "keypress",
436+
"keys": keys,
437+
}
436438
action_type_str = "keypress" # Normalize
437439

438440
elif action_type_str == "hold_key":
@@ -446,10 +448,10 @@ def _convert_tool_use_to_agent_action(
446448
self.key_to_playwright(k.strip()) for k in key_text.split("+")
447449
]
448450
# For now, handle as a regular keypress
449-
action_model_payload = AgentActionType(
450-
type="keypress",
451-
keys=keys,
452-
)
451+
action_payload_dict = {
452+
"type": "keypress",
453+
"keys": keys,
454+
}
453455
action_type_str = "keypress" # Normalize
454456

455457
elif action_type_str == "scroll":
@@ -469,20 +471,20 @@ def _convert_tool_use_to_agent_action(
469471
elif scroll_direction == "left":
470472
scroll_x = -scroll_amount * scroll_multiplier
471473

472-
action_model_payload = AgentActionType(
473-
type="scroll",
474-
x=x or 0, # Default to 0 if none
475-
y=y or 0, # Default to 0 if none
476-
scroll_x=scroll_x,
477-
scroll_y=scroll_y,
478-
)
474+
action_payload_dict = {
475+
"type": "scroll",
476+
"x": x or 0, # Default to 0 if none
477+
"y": y or 0, # Default to 0 if none
478+
"scroll_x": scroll_x,
479+
"scroll_y": scroll_y,
480+
}
479481

480482
elif action_type_str == "mouse_move":
481-
action_model_payload = AgentActionType(
482-
type="move",
483-
x=x,
484-
y=y,
485-
)
483+
action_payload_dict = {
484+
"type": "move",
485+
"x": x,
486+
"y": y,
487+
}
486488
action_type_str = "move" # Normalize
487489

488490
elif action_type_str == "left_click_drag":
@@ -499,14 +501,13 @@ def _convert_tool_use_to_agent_action(
499501
and x is not None
500502
and y is not None
501503
):
502-
path_points = [
503-
Point(x=start_x, y=start_y),
504-
Point(x=x, y=y),
505-
]
506-
action_model_payload = AgentActionType(
507-
type="drag",
508-
path=path_points,
509-
)
504+
action_payload_dict = {
505+
"type": "drag",
506+
"path": [
507+
{"x": start_x, "y": start_y},
508+
{"x": x, "y": y},
509+
],
510+
}
510511
action_type_str = "drag" # Normalize
511512
else:
512513
self.logger.error(
@@ -517,54 +518,54 @@ def _convert_tool_use_to_agent_action(
517518

518519
elif action_type_str == "left_mouse_down":
519520
# Currently not directly supported - handle as a click for now
520-
action_model_payload = AgentActionType(
521-
type="click",
522-
x=x,
523-
y=y,
524-
button="left",
525-
)
521+
action_payload_dict = {
522+
"type": "click",
523+
"x": x,
524+
"y": y,
525+
"button": "left",
526+
}
526527
action_type_str = "click" # Normalize
527528

528529
elif action_type_str == "left_mouse_up":
529530
# Currently not directly supported - handle as a click for now
530-
action_model_payload = AgentActionType(
531-
type="click",
532-
x=x,
533-
y=y,
534-
button="left",
535-
)
531+
action_payload_dict = {
532+
"type": "click",
533+
"x": x,
534+
"y": y,
535+
"button": "left",
536+
}
536537
action_type_str = "click" # Normalize
537538

538539
elif action_type_str == "wait":
539540
duration = tool_input.get("duration", 1) # Default 1 second
540541
# Convert seconds to milliseconds
541-
action_model_payload = AgentActionType(
542-
type="wait",
543-
miliseconds=int(duration * 1000),
544-
)
542+
action_payload_dict = {
543+
"type": "wait",
544+
"miliseconds": int(duration * 1000),
545+
}
545546

546547
elif action_type_str == "screenshot":
547-
action_model_payload = AgentActionType(
548-
type="screenshot",
549-
)
548+
action_payload_dict = {
549+
"type": "screenshot",
550+
}
550551

551552
elif action_type_str == "cursor_position":
552553
# This is a read operation, not directly supported
553554
# Return a no-op for now
554-
action_model_payload = AgentActionType(
555-
type="screenshot", # Use screenshot as a way to show cursor position
556-
)
555+
action_payload_dict = {
556+
"type": "screenshot", # Use screenshot as a way to show cursor position
557+
}
557558
action_type_str = "screenshot" # Normalize
558559

559560
elif action_type_str == "function":
560561
if tool_name == "goto":
561562
url = tool_input.get("url")
562563
if url:
563-
action_model_payload = AgentActionType(
564-
type="function",
565-
name="goto",
566-
arguments=FunctionArguments(url=url),
567-
)
564+
action_payload_dict = {
565+
"type": "function",
566+
"name": "goto",
567+
"arguments": {"url": url},
568+
}
568569
action_type_str = "function"
569570
else:
570571
self.logger.error(
@@ -573,11 +574,11 @@ def _convert_tool_use_to_agent_action(
573574
)
574575
return None
575576
elif tool_name == "navigate_back":
576-
action_model_payload = AgentActionType(
577-
type="function",
578-
name="navigate_back",
579-
arguments=FunctionArguments(),
580-
)
577+
action_payload_dict = {
578+
"type": "function",
579+
"name": "navigate_back",
580+
"arguments": None,
581+
}
581582
action_type_str = "function"
582583
else:
583584
self.logger.error(
@@ -586,7 +587,10 @@ def _convert_tool_use_to_agent_action(
586587
)
587588
return None
588589

589-
if action_model_payload is not None:
590+
if action_payload_dict is not None:
591+
action_model_payload = TypeAdapter(AgentActionType).validate_python(
592+
action_payload_dict
593+
)
590594
return AgentAction(
591595
action_type=action_type_str,
592596
action=action_model_payload,

0 commit comments

Comments
 (0)