@@ -72,9 +72,7 @@ def add_element(
7272 ) -> None :
7373 """Add an element to the registry."""
7474 eid = int (element_id ) if isinstance (element_id , str ) else element_id
75- self .elements [eid ] = UIElement (
76- element_id = eid , role = role , name = name , bbox = bbox
77- )
75+ self .elements [eid ] = UIElement (element_id = eid , role = role , name = name , bbox = bbox )
7876
7977 def get_element (self , element_id : int ) -> UIElement | None :
8078 """Get element by ID."""
@@ -137,7 +135,9 @@ def process_node(node: dict[str, Any]) -> None:
137135 node_id = node .get ("id" , node .get ("node_id" , node .get ("element_id" )))
138136 if node_id is not None :
139137 try :
140- eid = int (str (node_id ).replace ("e" , "" ).replace ("[" , "" ).replace ("]" , "" ))
138+ eid = int (
139+ str (node_id ).replace ("e" , "" ).replace ("[" , "" ).replace ("]" , "" )
140+ )
141141 bbox = node .get ("bbox" , node .get ("bounds" ))
142142 if bbox and len (bbox ) >= 4 :
143143 registry .add_element (
@@ -277,7 +277,9 @@ def to_pyautogui(
277277 py = int (self .y * screen_height )
278278 return f"pyautogui.click({ px } , { py } )"
279279 elif self .element_id is not None :
280- return f"# CLICK element { self .element_id } (needs coordinate conversion)"
280+ return (
281+ f"# CLICK element { self .element_id } (needs coordinate conversion)"
282+ )
281283 elif self .action_type == "type" :
282284 text = self .text or ""
283285 return f"pyautogui.write('{ text } ')"
@@ -444,10 +446,10 @@ def _try_json_parse(self, response: str) -> ParsedAction:
444446 """Try to extract and parse JSON from response."""
445447 # Try to find JSON object in response
446448 json_patterns = [
447- r' ```json\s*(\{[^`]*\})\s*```' , # Markdown code block
448- r' ```\s*(\{[^`]*\})\s*```' , # Plain code block
449- r' (\{[^{}]*\})' , # Simple JSON object
450- r' (\{[^{}]*\{[^{}]*\}[^{}]*\})' , # Nested JSON (max 1 level)
449+ r" ```json\s*(\{[^`]*\})\s*```" , # Markdown code block
450+ r" ```\s*(\{[^`]*\})\s*```" , # Plain code block
451+ r" (\{[^{}]*\})" , # Simple JSON object
452+ r" (\{[^{}]*\{[^{}]*\}[^{}]*\})" , # Nested JSON (max 1 level)
451453 ]
452454
453455 for pattern in json_patterns :
@@ -621,7 +623,7 @@ def _try_pyautogui_parse(self, response: str) -> ParsedAction:
621623 """Try to parse PyAutoGUI-style code."""
622624 # pyautogui.click(x, y)
623625 click_match = re .search (
624- r' pyautogui\.click\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)' ,
626+ r" pyautogui\.click\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)" ,
625627 response ,
626628 re .IGNORECASE ,
627629 )
@@ -633,7 +635,7 @@ def _try_pyautogui_parse(self, response: str) -> ParsedAction:
633635
634636 # pyautogui.doubleClick(x, y)
635637 dclick_match = re .search (
636- r' pyautogui\.doubleClick\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)' ,
638+ r" pyautogui\.doubleClick\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)" ,
637639 response ,
638640 re .IGNORECASE ,
639641 )
@@ -668,7 +670,7 @@ def _try_pyautogui_parse(self, response: str) -> ParsedAction:
668670
669671 # pyautogui.hotkey('key1', 'key2')
670672 hotkey_match = re .search (
671- r' pyautogui\.hotkey\s*\(\s*(.+?)\s*\)' ,
673+ r" pyautogui\.hotkey\s*\(\s*(.+?)\s*\)" ,
672674 response ,
673675 re .IGNORECASE ,
674676 )
@@ -687,7 +689,7 @@ def _try_pyautogui_parse(self, response: str) -> ParsedAction:
687689
688690 # pyautogui.scroll(amount)
689691 scroll_match = re .search (
690- r' pyautogui\.scroll\s*\(\s*(-?\d+)\s*\)' ,
692+ r" pyautogui\.scroll\s*\(\s*(-?\d+)\s*\)" ,
691693 response ,
692694 re .IGNORECASE ,
693695 )
@@ -700,13 +702,15 @@ def _try_pyautogui_parse(self, response: str) -> ParsedAction:
700702 amount = abs (clicks ),
701703 )
702704
703- return ParsedAction (action_type = "unknown" , parse_error = "No PyAutoGUI pattern matched" )
705+ return ParsedAction (
706+ action_type = "unknown" , parse_error = "No PyAutoGUI pattern matched"
707+ )
704708
705709 def _try_regex_parse (self , response : str ) -> ParsedAction :
706710 """Try regex patterns for function-style actions."""
707711 # CLICK(x, y) - normalized coordinates
708712 click_norm = re .search (
709- r' CLICK\s*\(\s*(0?\.\d+)\s*,\s*(0?\.\d+)\s*\)' ,
713+ r" CLICK\s*\(\s*(0?\.\d+)\s*,\s*(0?\.\d+)\s*\)" ,
710714 response ,
711715 re .IGNORECASE ,
712716 )
@@ -719,7 +723,7 @@ def _try_regex_parse(self, response: str) -> ParsedAction:
719723
720724 # CLICK(x, y) - larger numbers (pixels)
721725 click_pixel = re .search (
722- r' CLICK\s*\(\s*(\d+(?:\.\d+)?)\s*,\s*(\d+(?:\.\d+)?)\s*\)' ,
726+ r" CLICK\s*\(\s*(\d+(?:\.\d+)?)\s*,\s*(\d+(?:\.\d+)?)\s*\)" ,
723727 response ,
724728 re .IGNORECASE ,
725729 )
@@ -731,7 +735,7 @@ def _try_regex_parse(self, response: str) -> ParsedAction:
731735
732736 # CLICK([id]) - element ID
733737 click_element = re .search (
734- r' CLICK\s*\(\s*\[\s*(\d+)\s*\]\s*\)' ,
738+ r" CLICK\s*\(\s*\[\s*(\d+)\s*\]\s*\)" ,
735739 response ,
736740 re .IGNORECASE ,
737741 )
@@ -743,7 +747,7 @@ def _try_regex_parse(self, response: str) -> ParsedAction:
743747
744748 # CLICK(id) without brackets
745749 click_id = re .search (
746- r' CLICK\s*\(\s*(\d+)\s*\)' ,
750+ r" CLICK\s*\(\s*(\d+)\s*\)" ,
747751 response ,
748752 re .IGNORECASE ,
749753 )
@@ -764,50 +768,65 @@ def _try_regex_parse(self, response: str) -> ParsedAction:
764768
765769 # KEY(key) or KEY(mod+key)
766770 key_match = re .search (
767- r' KEY\s*\(\s*([a-zA-Z0-9_+]+)\s*\)' ,
771+ r" KEY\s*\(\s*([a-zA-Z0-9_+]+)\s*\)" ,
768772 response ,
769773 re .IGNORECASE ,
770774 )
771775 if key_match :
772776 key_str = key_match .group (1 ).lower ()
773- if '+' in key_str :
774- parts = key_str .split ('+' )
777+ if "+" in key_str :
778+ parts = key_str .split ("+" )
775779 modifiers = parts [:- 1 ]
776780 key = parts [- 1 ]
777781 return ParsedAction (action_type = "key" , key = key , modifiers = modifiers )
778782 return ParsedAction (action_type = "key" , key = key_str )
779783
780784 # SCROLL(direction) or SCROLL(direction, amount)
781785 scroll_match = re .search (
782- r' SCROLL\s*\(\s*([a-zA-Z]+)(?:\s*,\s*(\d+))?\s*\)' ,
786+ r" SCROLL\s*\(\s*([a-zA-Z]+)(?:\s*,\s*(\d+))?\s*\)" ,
783787 response ,
784788 re .IGNORECASE ,
785789 )
786790 if scroll_match :
787791 direction = scroll_match .group (1 ).lower ()
788792 amount = int (scroll_match .group (2 )) if scroll_match .group (2 ) else 3
789- return ParsedAction (action_type = "scroll" , direction = direction , amount = amount )
793+ return ParsedAction (
794+ action_type = "scroll" , direction = direction , amount = amount
795+ )
790796
791- return ParsedAction (action_type = "unknown" , parse_error = "No regex pattern matched" )
797+ return ParsedAction (
798+ action_type = "unknown" , parse_error = "No regex pattern matched"
799+ )
792800
793801 def _try_keyword_parse (self , response : str ) -> ParsedAction :
794802 """Try special keywords."""
795803 response_upper = response .upper ().strip ()
796804
797805 # DONE() or just DONE
798- if re .search (r'\bDONE\s*\(\s*\)\s*$' , response , re .IGNORECASE ) or response_upper == "DONE" :
806+ if (
807+ re .search (r"\bDONE\s*\(\s*\)\s*$" , response , re .IGNORECASE )
808+ or response_upper == "DONE"
809+ ):
799810 return ParsedAction (action_type = "done" )
800811
801812 # WAIT() or WAIT
802- if re .search (r'\bWAIT\s*\(\s*\)\s*$' , response , re .IGNORECASE ) or response_upper == "WAIT" :
813+ if (
814+ re .search (r"\bWAIT\s*\(\s*\)\s*$" , response , re .IGNORECASE )
815+ or response_upper == "WAIT"
816+ ):
803817 return ParsedAction (action_type = "wait" )
804818
805819 # FAIL() or FAIL
806- if re .search (r'\bFAIL\s*\(\s*\)\s*$' , response , re .IGNORECASE ) or response_upper == "FAIL" :
820+ if (
821+ re .search (r"\bFAIL\s*\(\s*\)\s*$" , response , re .IGNORECASE )
822+ or response_upper == "FAIL"
823+ ):
807824 return ParsedAction (action_type = "fail" )
808825
809826 # Look for "task is complete" or similar phrases
810- if re .search (r'task\s+(?:is\s+)?(?:complete|done|finished)' , response , re .IGNORECASE ):
827+ if re .search (
828+ r"task\s+(?:is\s+)?(?:complete|done|finished)" , response , re .IGNORECASE
829+ ):
811830 return ParsedAction (
812831 action_type = "done" ,
813832 confidence = 0.7 ,
@@ -842,7 +861,7 @@ def _normalize_element_id(self, element_id: Any) -> int | None:
842861
843862 if isinstance (element_id , str ):
844863 # Extract number from "e17", "[17]", "element_17" etc.
845- match = re .search (r' \d+' , element_id )
864+ match = re .search (r" \d+" , element_id )
846865 if match :
847866 return int (match .group ())
848867
0 commit comments