44
55from anthropic import Anthropic , AnthropicError
66from dotenv import load_dotenv
7+ from pydantic import TypeAdapter
78
89from ..handlers .cua_handler import CUAHandler , StagehandFunctionName
910from ..types .agent import (
@@ -362,7 +363,7 @@ def _convert_tool_use_to_agent_action(
362363 )
363364 return None
364365
365- action_model_payload : Optional [AgentActionType ] = None
366+ action_payload_dict : Optional [dict [ str , Any ] ] = None
366367 reasoning = tool_input .get ("reasoning" )
367368
368369 try :
@@ -375,52 +376,53 @@ def _convert_tool_use_to_agent_action(
375376 )
376377
377378 if action_type_str == "left_click" :
378- action_model_payload = AgentActionType (
379- type = "click" ,
380- x = x ,
381- y = y ,
382- button = "left" ,
383- )
379+ action_payload_dict = {
380+ " type" : "click" ,
381+ "x" : x ,
382+ "y" : y ,
383+ " button" : "left" ,
384+ }
384385 action_type_str = "click" # Normalize
385386
386387 elif action_type_str == "right_click" :
387- action_model_payload = AgentActionType (
388- type = "click" ,
389- x = x ,
390- y = y ,
391- button = "right" ,
392- )
388+ action_payload_dict = {
389+ " type" : "click" ,
390+ "x" : x ,
391+ "y" : y ,
392+ " button" : "right" ,
393+ }
393394 action_type_str = "click" # Normalize
394395
395396 elif action_type_str == "middle_click" :
396- action_model_payload = AgentActionType (
397- type = "click" ,
398- x = x ,
399- y = y ,
400- button = "middle" ,
401- )
397+ action_payload_dict = {
398+ " type" : "click" ,
399+ "x" : x ,
400+ "y" : y ,
401+ " button" : "middle" ,
402+ }
402403 action_type_str = "click" # Normalize
403404
404405 elif action_type_str == "double_click" :
405- action_model_payload = AgentActionType (
406- type = "double_click" ,
407- x = x ,
408- y = y ,
409- )
406+ action_payload_dict = {
407+ " type" : "double_click" ,
408+ "x" : x ,
409+ "y" : y ,
410+ }
410411
411412 elif action_type_str == "triple_click" :
412413 # Handle as double_click for now since we don't have a dedicated triple click
413- action_model_payload = AgentActionType (
414- type = "double_click" ,
415- x = x ,
416- y = y ,
417- )
414+ action_payload_dict = {
415+ " type" : "double_click" ,
416+ "x" : x ,
417+ "y" : y ,
418+ }
418419 action_type_str = "double_click" # Normalize
419420
420421 elif action_type_str == "type" :
421- action_model_payload = AgentActionType (
422- type = "type" , text = tool_input .get ("text" , "" )
423- )
422+ action_payload_dict = {
423+ "type" : "type" ,
424+ "text" : tool_input .get ("text" , "" ),
425+ }
424426
425427 elif action_type_str == "key" :
426428 key_text = tool_input .get ("text" , "" )
@@ -429,10 +431,10 @@ def _convert_tool_use_to_agent_action(
429431 keys = [
430432 self .key_to_playwright (k .strip ()) for k in key_text .split ("+" )
431433 ]
432- action_model_payload = AgentActionType (
433- type = "keypress" ,
434- keys = keys ,
435- )
434+ action_payload_dict = {
435+ " type" : "keypress" ,
436+ " keys" : keys ,
437+ }
436438 action_type_str = "keypress" # Normalize
437439
438440 elif action_type_str == "hold_key" :
@@ -446,10 +448,10 @@ def _convert_tool_use_to_agent_action(
446448 self .key_to_playwright (k .strip ()) for k in key_text .split ("+" )
447449 ]
448450 # For now, handle as a regular keypress
449- action_model_payload = AgentActionType (
450- type = "keypress" ,
451- keys = keys ,
452- )
451+ action_payload_dict = {
452+ " type" : "keypress" ,
453+ " keys" : keys ,
454+ }
453455 action_type_str = "keypress" # Normalize
454456
455457 elif action_type_str == "scroll" :
@@ -469,20 +471,20 @@ def _convert_tool_use_to_agent_action(
469471 elif scroll_direction == "left" :
470472 scroll_x = - scroll_amount * scroll_multiplier
471473
472- action_model_payload = AgentActionType (
473- type = "scroll" ,
474- x = x or 0 , # Default to 0 if none
475- y = y or 0 , # Default to 0 if none
476- scroll_x = scroll_x ,
477- scroll_y = scroll_y ,
478- )
474+ action_payload_dict = {
475+ " type" : "scroll" ,
476+ "x" : x or 0 , # Default to 0 if none
477+ "y" : y or 0 , # Default to 0 if none
478+ " scroll_x" : scroll_x ,
479+ " scroll_y" : scroll_y ,
480+ }
479481
480482 elif action_type_str == "mouse_move" :
481- action_model_payload = AgentActionType (
482- type = "move" ,
483- x = x ,
484- y = y ,
485- )
483+ action_payload_dict = {
484+ " type" : "move" ,
485+ "x" : x ,
486+ "y" : y ,
487+ }
486488 action_type_str = "move" # Normalize
487489
488490 elif action_type_str == "left_click_drag" :
@@ -499,14 +501,13 @@ def _convert_tool_use_to_agent_action(
499501 and x is not None
500502 and y is not None
501503 ):
502- path_points = [
503- Point (x = start_x , y = start_y ),
504- Point (x = x , y = y ),
505- ]
506- action_model_payload = AgentActionType (
507- type = "drag" ,
508- path = path_points ,
509- )
504+ action_payload_dict = {
505+ "type" : "drag" ,
506+ "path" : [
507+ {"x" : start_x , "y" : start_y },
508+ {"x" : x , "y" : y },
509+ ],
510+ }
510511 action_type_str = "drag" # Normalize
511512 else :
512513 self .logger .error (
@@ -517,54 +518,54 @@ def _convert_tool_use_to_agent_action(
517518
518519 elif action_type_str == "left_mouse_down" :
519520 # Currently not directly supported - handle as a click for now
520- action_model_payload = AgentActionType (
521- type = "click" ,
522- x = x ,
523- y = y ,
524- button = "left" ,
525- )
521+ action_payload_dict = {
522+ " type" : "click" ,
523+ "x" : x ,
524+ "y" : y ,
525+ " button" : "left" ,
526+ }
526527 action_type_str = "click" # Normalize
527528
528529 elif action_type_str == "left_mouse_up" :
529530 # Currently not directly supported - handle as a click for now
530- action_model_payload = AgentActionType (
531- type = "click" ,
532- x = x ,
533- y = y ,
534- button = "left" ,
535- )
531+ action_payload_dict = {
532+ " type" : "click" ,
533+ "x" : x ,
534+ "y" : y ,
535+ " button" : "left" ,
536+ }
536537 action_type_str = "click" # Normalize
537538
538539 elif action_type_str == "wait" :
539540 duration = tool_input .get ("duration" , 1 ) # Default 1 second
540541 # Convert seconds to milliseconds
541- action_model_payload = AgentActionType (
542- type = "wait" ,
543- miliseconds = int (duration * 1000 ),
544- )
542+ action_payload_dict = {
543+ " type" : "wait" ,
544+ " miliseconds" : int (duration * 1000 ),
545+ }
545546
546547 elif action_type_str == "screenshot" :
547- action_model_payload = AgentActionType (
548- type = "screenshot" ,
549- )
548+ action_payload_dict = {
549+ " type" : "screenshot" ,
550+ }
550551
551552 elif action_type_str == "cursor_position" :
552553 # This is a read operation, not directly supported
553554 # Return a no-op for now
554- action_model_payload = AgentActionType (
555- type = "screenshot" , # Use screenshot as a way to show cursor position
556- )
555+ action_payload_dict = {
556+ " type" : "screenshot" , # Use screenshot as a way to show cursor position
557+ }
557558 action_type_str = "screenshot" # Normalize
558559
559560 elif action_type_str == "function" :
560561 if tool_name == "goto" :
561562 url = tool_input .get ("url" )
562563 if url :
563- action_model_payload = AgentActionType (
564- type = "function" ,
565- name = "goto" ,
566- arguments = FunctionArguments ( url = url ) ,
567- )
564+ action_payload_dict = {
565+ " type" : "function" ,
566+ " name" : "goto" ,
567+ " arguments" : { " url" : url } ,
568+ }
568569 action_type_str = "function"
569570 else :
570571 self .logger .error (
@@ -573,11 +574,11 @@ def _convert_tool_use_to_agent_action(
573574 )
574575 return None
575576 elif tool_name == "navigate_back" :
576- action_model_payload = AgentActionType (
577- type = "function" ,
578- name = "navigate_back" ,
579- arguments = FunctionArguments () ,
580- )
577+ action_payload_dict = {
578+ " type" : "function" ,
579+ " name" : "navigate_back" ,
580+ " arguments" : None ,
581+ }
581582 action_type_str = "function"
582583 else :
583584 self .logger .error (
@@ -586,7 +587,10 @@ def _convert_tool_use_to_agent_action(
586587 )
587588 return None
588589
589- if action_model_payload is not None :
590+ if action_payload_dict is not None :
591+ action_model_payload = TypeAdapter (AgentActionType ).validate_python (
592+ action_payload_dict
593+ )
590594 return AgentAction (
591595 action_type = action_type_str ,
592596 action = action_model_payload ,
0 commit comments