2121_element_map = {} # ref_id -> element
2222
2323
24- def _ensure_playwright ():
24+ async def _ensure_playwright ():
2525 """Lazy import and install check."""
2626 global _playwright
2727 if _playwright is None :
2828 try :
29- from playwright .sync_api import sync_playwright
29+ from playwright .async_api import async_playwright
3030
31- _playwright = sync_playwright ()
31+ _playwright = async_playwright ()
3232 except ImportError :
3333 raise RuntimeError (
3434 "playwright is not installed. Run: pip install playwright && playwright install chromium"
3535 )
3636
3737
38- def _get_browser ():
38+ async def _get_browser ():
3939 """Get or create a persistent browser instance."""
4040 global _browser , _context , _page , _console_logs
41- _ensure_playwright ()
41+ await _ensure_playwright ()
4242
4343 if _browser is None :
44- pw = _playwright .start ()
44+ pw = await _playwright .start ()
4545 headless = os .environ .get ("ICLAW_BROWSER_HEADLESS" , "1" ) == "1"
46- _browser = pw .chromium .launch (headless = headless )
47- _context = _browser .new_context (
46+ _browser = await pw .chromium .launch (headless = headless )
47+ _context = await _browser .new_context (
4848 viewport = {"width" : 1280 , "height" : 720 },
4949 user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ,
5050 )
51- _page = _context .new_page ()
51+ _page = await _context .new_page ()
5252 _console_logs = []
5353
5454 # Capture console messages
@@ -74,13 +74,13 @@ def _get_browser():
7474 return _page
7575
7676
77- def _build_element_map (page ):
77+ async def _build_element_map (page ):
7878 """Build a map of ref_id -> interactive elements on the page."""
7979 global _element_map
8080 _element_map = {}
8181
8282 # Get all interactive elements
83- elements = page .query_selector_all (
83+ elements = await page .query_selector_all (
8484 "a, button, input, textarea, select, [role='button'], [role='link'], "
8585 "[role='tab'], [role='menuitem'], [onclick], [tabindex]"
8686 )
@@ -92,19 +92,19 @@ def _build_element_map(page):
9292 return _element_map
9393
9494
95- def _get_element_snapshot (page ) -> str :
95+ async def _get_element_snapshot (page ) -> str :
9696 """Build a text snapshot of the page with ref IDs for interactive elements."""
97- _build_element_map (page )
97+ await _build_element_map (page )
9898
9999 # Get page title and URL
100- title = page .title ()
100+ title = await page .title ()
101101 url = page .url
102102
103103 # Get accessibility tree
104104 snapshot_lines = [f"Page: { title } " , f"URL: { url } " , "" ]
105105
106106 # Get all visible text elements with their ref IDs
107- result = page .evaluate ("""() => {
107+ result = await page .evaluate ("""() => {
108108 const items = [];
109109 let refCounter = 0;
110110
@@ -202,29 +202,29 @@ def _get_element_snapshot(page) -> str:
202202# --- Public API functions (called by tool dispatch) ---
203203
204204
205- def browser_navigate (url : str ) -> str :
205+ async def browser_navigate (url : str ) -> str :
206206 """Navigate to a URL and return a snapshot."""
207207 log_verbose (f"[browser] Navigating to { url } " )
208- page = _get_browser ()
208+ page = await _get_browser ()
209209 try :
210- page .goto (url , wait_until = "domcontentloaded" , timeout = 30000 )
211- page .wait_for_load_state ("networkidle" , timeout = 10000 )
210+ await page .goto (url , wait_until = "domcontentloaded" , timeout = 30000 )
211+ await page .wait_for_load_state ("networkidle" , timeout = 10000 )
212212 except Exception :
213213 # networkidle may timeout on heavy pages, that's ok
214214 pass
215215
216- snapshot = _get_element_snapshot (page )
216+ snapshot = await _get_element_snapshot (page )
217217 return f"Navigated to { url } \n \n { snapshot } "
218218
219219
220- def browser_snapshot (full : bool = False ) -> str :
220+ async def browser_snapshot (full : bool = False ) -> str :
221221 """Get current page snapshot with element refs."""
222- page = _get_browser ()
223- snapshot = _get_element_snapshot (page )
222+ page = await _get_browser ()
223+ snapshot = await _get_element_snapshot (page )
224224
225225 if full :
226226 # Also get full page text
227- full_text = page .evaluate ("() => document.body.innerText" )
227+ full_text = await page .evaluate ("() => document.body.innerText" )
228228 # Truncate to reasonable size
229229 if len (full_text ) > 8000 :
230230 full_text = full_text [:8000 ] + "\n ... (truncated)"
@@ -233,88 +233,88 @@ def browser_snapshot(full: bool = False) -> str:
233233 return snapshot
234234
235235
236- def browser_click (ref : str ) -> str :
236+ async def browser_click (ref : str ) -> str :
237237 """Click an element by its ref ID."""
238238 log_verbose (f"[browser] Clicking { ref } " )
239- page = _get_browser ()
239+ page = await _get_browser ()
240240
241241 if ref not in _element_map :
242242 # Try rebuilding the map
243- _build_element_map (page )
243+ await _build_element_map (page )
244244 if ref not in _element_map :
245245 return f"Error: Element { ref } not found. Take a new snapshot first."
246246
247247 try :
248248 el = _element_map [ref ]
249- el .scroll_into_view_if_needed ()
250- el .click (timeout = 5000 )
251- page .wait_for_load_state ("domcontentloaded" , timeout = 5000 )
249+ await el .scroll_into_view_if_needed ()
250+ await el .click (timeout = 5000 )
251+ await page .wait_for_load_state ("domcontentloaded" , timeout = 5000 )
252252 except Exception :
253253 pass
254254
255- snapshot = _get_element_snapshot (page )
255+ snapshot = await _get_element_snapshot (page )
256256 return f"Clicked { ref } \n \n { snapshot } "
257257
258258
259- def browser_type (ref : str , text : str , submit : bool = False ) -> str :
259+ async def browser_type (ref : str , text : str , submit : bool = False ) -> str :
260260 """Type text into an element. If submit=True, press Enter after."""
261261 log_verbose (f"[browser] Typing into { ref } : { text [:50 ]} ..." )
262- page = _get_browser ()
262+ page = await _get_browser ()
263263
264264 if ref not in _element_map :
265- _build_element_map (page )
265+ await _build_element_map (page )
266266 if ref not in _element_map :
267267 return f"Error: Element { ref } not found. Take a new snapshot first."
268268
269269 try :
270270 el = _element_map [ref ]
271- el .scroll_into_view_if_needed ()
272- el .click ()
273- el .fill (text )
271+ await el .scroll_into_view_if_needed ()
272+ await el .click ()
273+ await el .fill (text )
274274 if submit :
275- el .press ("Enter" )
276- page .wait_for_load_state ("domcontentloaded" , timeout = 5000 )
275+ await el .press ("Enter" )
276+ await page .wait_for_load_state ("domcontentloaded" , timeout = 5000 )
277277 except Exception as e :
278278 return f"Error typing into { ref } : { e } "
279279
280- snapshot = _get_element_snapshot (page )
280+ snapshot = await _get_element_snapshot (page )
281281 return f"Typed into { ref } \n \n { snapshot } "
282282
283283
284- def browser_press (key : str ) -> str :
284+ async def browser_press (key : str ) -> str :
285285 """Press a keyboard key (Enter, Tab, Escape, ArrowDown, etc.)."""
286286 log_verbose (f"[browser] Pressing { key } " )
287- page = _get_browser ()
287+ page = await _get_browser ()
288288
289289 try :
290- page .keyboard .press (key )
291- page .wait_for_load_state ("domcontentloaded" , timeout = 3000 )
290+ await page .keyboard .press (key )
291+ await page .wait_for_load_state ("domcontentloaded" , timeout = 3000 )
292292 except Exception :
293293 pass
294294
295- snapshot = _get_element_snapshot (page )
295+ snapshot = await _get_element_snapshot (page )
296296 return f"Pressed { key } \n \n { snapshot } "
297297
298298
299- def browser_scroll (direction : str = "down" ) -> str :
299+ async def browser_scroll (direction : str = "down" ) -> str :
300300 """Scroll the page up or down."""
301- page = _get_browser ()
301+ page = await _get_browser ()
302302 delta = 500 if direction == "down" else - 500
303- page .evaluate (f"window.scrollBy(0, { delta } )" )
303+ await page .evaluate (f"window.scrollBy(0, { delta } )" )
304304
305- snapshot = _get_element_snapshot (page )
305+ snapshot = await _get_element_snapshot (page )
306306 return f"Scrolled { direction } \n \n { snapshot } "
307307
308308
309- def browser_screenshot (save_path : Optional [str ] = None ) -> str :
309+ async def browser_screenshot (save_path : Optional [str ] = None ) -> str :
310310 """Take a screenshot. Returns path to the saved image."""
311- page = _get_browser ()
311+ page = await _get_browser ()
312312
313313 if save_path is None :
314314 fd , save_path = tempfile .mkstemp (suffix = ".png" , prefix = "iclaw_browser_" )
315315 os .close (fd )
316316
317- page .screenshot (path = save_path , full_page = False )
317+ await page .screenshot (path = save_path , full_page = False )
318318
319319 # Also return a base64 thumbnail for vision models
320320 with open (save_path , "rb" ) as f :
@@ -323,7 +323,7 @@ def browser_screenshot(save_path: Optional[str] = None) -> str:
323323 return f"Screenshot saved to { save_path } \n Base64 ({ len (b64 )} chars): data:image/png;base64,{ b64 [:200 ]} ..."
324324
325325
326- def browser_console (clear : bool = False ) -> str :
326+ async def browser_console (clear : bool = False ) -> str :
327327 """Get browser console output."""
328328 global _console_logs
329329
@@ -341,34 +341,34 @@ def browser_console(clear: bool = False) -> str:
341341 return "\n " .join (output )
342342
343343
344- def browser_back () -> str :
344+ async def browser_back () -> str :
345345 """Navigate back in browser history."""
346- page = _get_browser ()
347- page .go_back (wait_until = "domcontentloaded" , timeout = 10000 )
348- snapshot = _get_element_snapshot (page )
346+ page = await _get_browser ()
347+ await page .go_back (wait_until = "domcontentloaded" , timeout = 10000 )
348+ snapshot = await _get_element_snapshot (page )
349349 return f"Navigated back\n \n { snapshot } "
350350
351351
352- def browser_close () -> str :
352+ async def browser_close () -> str :
353353 """Close the browser and clean up."""
354354 global _browser , _context , _page , _console_logs , _element_map
355355
356356 if _page :
357- _page .close ()
357+ await _page .close ()
358358 _page = None
359359 if _context :
360- _context .close ()
360+ await _context .close ()
361361 _context = None
362362 if _browser :
363- _browser .close ()
363+ await _browser .close ()
364364 _browser = None
365365 _console_logs .clear ()
366366 _element_map .clear ()
367367
368368 return "Browser closed."
369369
370370
371- def dispatch_browser_call (function_name : str , args : dict ) -> str :
371+ async def dispatch_browser_call (function_name : str , args : dict ) -> str :
372372 """Dispatch a browser tool call to the appropriate function."""
373373 dispatch = {
374374 "browser_navigate" : lambda a : browser_navigate (a ["url" ]),
@@ -390,6 +390,6 @@ def dispatch_browser_call(function_name: str, args: dict) -> str:
390390 return f"Unknown browser function: { function_name } "
391391
392392 try :
393- return handler (args )
393+ return await handler (args )
394394 except Exception as e :
395395 return f"Browser error: { e } "
0 commit comments