Skip to content

Commit 364961d

Browse files
committed
refactor: convert browser_tool from sync to async Playwright
- Switch from playwright.sync_api to playwright.async_api - Add async/await to all browser functions and helpers - Update dispatch_browser_call and main.py call sites to await
1 parent f148313 commit 364961d

2 files changed

Lines changed: 69 additions & 65 deletions

File tree

iclaw/main.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,9 @@ async def _main():
356356
if not url.startswith(("http://", "https://")):
357357
url = "https://" + url
358358
try:
359-
output = dispatch_browser_call("browser_navigate", {"url": url})
359+
output = await dispatch_browser_call(
360+
"browser_navigate", {"url": url}
361+
)
360362
print(output)
361363
except Exception as e:
362364
print(f"Browser error: {e}", file=sys.stderr)
@@ -522,7 +524,9 @@ async def _main():
522524
)
523525

524526
if function_name in BROWSER_TOOL_NAMES:
525-
output = dispatch_browser_call(function_name, function_args)
527+
output = await dispatch_browser_call(
528+
function_name, function_args
529+
)
526530
tool_logs.append(
527531
{
528532
"timestamp": time.time(),

iclaw/tools/browser_tool.py

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -21,34 +21,34 @@
2121
_element_map = {} # ref_id -> element
2222

2323

24-
def _ensure_playwright():
24+
async def _ensure_playwright():
2525
"""Lazy import and install check."""
2626
global _playwright
2727
if _playwright is None:
2828
try:
29-
from playwright.sync_api import sync_playwright
29+
from playwright.async_api import async_playwright
3030

31-
_playwright = sync_playwright()
31+
_playwright = async_playwright()
3232
except ImportError:
3333
raise RuntimeError(
3434
"playwright is not installed. Run: pip install playwright && playwright install chromium"
3535
)
3636

3737

38-
def _get_browser():
38+
async def _get_browser():
3939
"""Get or create a persistent browser instance."""
4040
global _browser, _context, _page, _console_logs
41-
_ensure_playwright()
41+
await _ensure_playwright()
4242

4343
if _browser is None:
44-
pw = _playwright.start()
44+
pw = await _playwright.start()
4545
headless = os.environ.get("ICLAW_BROWSER_HEADLESS", "1") == "1"
46-
_browser = pw.chromium.launch(headless=headless)
47-
_context = _browser.new_context(
46+
_browser = await pw.chromium.launch(headless=headless)
47+
_context = await _browser.new_context(
4848
viewport={"width": 1280, "height": 720},
4949
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
5050
)
51-
_page = _context.new_page()
51+
_page = await _context.new_page()
5252
_console_logs = []
5353

5454
# Capture console messages
@@ -74,13 +74,13 @@ def _get_browser():
7474
return _page
7575

7676

77-
def _build_element_map(page):
77+
async def _build_element_map(page):
7878
"""Build a map of ref_id -> interactive elements on the page."""
7979
global _element_map
8080
_element_map = {}
8181

8282
# Get all interactive elements
83-
elements = page.query_selector_all(
83+
elements = await page.query_selector_all(
8484
"a, button, input, textarea, select, [role='button'], [role='link'], "
8585
"[role='tab'], [role='menuitem'], [onclick], [tabindex]"
8686
)
@@ -92,19 +92,19 @@ def _build_element_map(page):
9292
return _element_map
9393

9494

95-
def _get_element_snapshot(page) -> str:
95+
async def _get_element_snapshot(page) -> str:
9696
"""Build a text snapshot of the page with ref IDs for interactive elements."""
97-
_build_element_map(page)
97+
await _build_element_map(page)
9898

9999
# Get page title and URL
100-
title = page.title()
100+
title = await page.title()
101101
url = page.url
102102

103103
# Get accessibility tree
104104
snapshot_lines = [f"Page: {title}", f"URL: {url}", ""]
105105

106106
# Get all visible text elements with their ref IDs
107-
result = page.evaluate("""() => {
107+
result = await page.evaluate("""() => {
108108
const items = [];
109109
let refCounter = 0;
110110
@@ -202,29 +202,29 @@ def _get_element_snapshot(page) -> str:
202202
# --- Public API functions (called by tool dispatch) ---
203203

204204

205-
def browser_navigate(url: str) -> str:
205+
async def browser_navigate(url: str) -> str:
206206
"""Navigate to a URL and return a snapshot."""
207207
log_verbose(f"[browser] Navigating to {url}")
208-
page = _get_browser()
208+
page = await _get_browser()
209209
try:
210-
page.goto(url, wait_until="domcontentloaded", timeout=30000)
211-
page.wait_for_load_state("networkidle", timeout=10000)
210+
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
211+
await page.wait_for_load_state("networkidle", timeout=10000)
212212
except Exception:
213213
# networkidle may timeout on heavy pages, that's ok
214214
pass
215215

216-
snapshot = _get_element_snapshot(page)
216+
snapshot = await _get_element_snapshot(page)
217217
return f"Navigated to {url}\n\n{snapshot}"
218218

219219

220-
def browser_snapshot(full: bool = False) -> str:
220+
async def browser_snapshot(full: bool = False) -> str:
221221
"""Get current page snapshot with element refs."""
222-
page = _get_browser()
223-
snapshot = _get_element_snapshot(page)
222+
page = await _get_browser()
223+
snapshot = await _get_element_snapshot(page)
224224

225225
if full:
226226
# Also get full page text
227-
full_text = page.evaluate("() => document.body.innerText")
227+
full_text = await page.evaluate("() => document.body.innerText")
228228
# Truncate to reasonable size
229229
if len(full_text) > 8000:
230230
full_text = full_text[:8000] + "\n... (truncated)"
@@ -233,88 +233,88 @@ def browser_snapshot(full: bool = False) -> str:
233233
return snapshot
234234

235235

236-
def browser_click(ref: str) -> str:
236+
async def browser_click(ref: str) -> str:
237237
"""Click an element by its ref ID."""
238238
log_verbose(f"[browser] Clicking {ref}")
239-
page = _get_browser()
239+
page = await _get_browser()
240240

241241
if ref not in _element_map:
242242
# Try rebuilding the map
243-
_build_element_map(page)
243+
await _build_element_map(page)
244244
if ref not in _element_map:
245245
return f"Error: Element {ref} not found. Take a new snapshot first."
246246

247247
try:
248248
el = _element_map[ref]
249-
el.scroll_into_view_if_needed()
250-
el.click(timeout=5000)
251-
page.wait_for_load_state("domcontentloaded", timeout=5000)
249+
await el.scroll_into_view_if_needed()
250+
await el.click(timeout=5000)
251+
await page.wait_for_load_state("domcontentloaded", timeout=5000)
252252
except Exception:
253253
pass
254254

255-
snapshot = _get_element_snapshot(page)
255+
snapshot = await _get_element_snapshot(page)
256256
return f"Clicked {ref}\n\n{snapshot}"
257257

258258

259-
def browser_type(ref: str, text: str, submit: bool = False) -> str:
259+
async def browser_type(ref: str, text: str, submit: bool = False) -> str:
260260
"""Type text into an element. If submit=True, press Enter after."""
261261
log_verbose(f"[browser] Typing into {ref}: {text[:50]}...")
262-
page = _get_browser()
262+
page = await _get_browser()
263263

264264
if ref not in _element_map:
265-
_build_element_map(page)
265+
await _build_element_map(page)
266266
if ref not in _element_map:
267267
return f"Error: Element {ref} not found. Take a new snapshot first."
268268

269269
try:
270270
el = _element_map[ref]
271-
el.scroll_into_view_if_needed()
272-
el.click()
273-
el.fill(text)
271+
await el.scroll_into_view_if_needed()
272+
await el.click()
273+
await el.fill(text)
274274
if submit:
275-
el.press("Enter")
276-
page.wait_for_load_state("domcontentloaded", timeout=5000)
275+
await el.press("Enter")
276+
await page.wait_for_load_state("domcontentloaded", timeout=5000)
277277
except Exception as e:
278278
return f"Error typing into {ref}: {e}"
279279

280-
snapshot = _get_element_snapshot(page)
280+
snapshot = await _get_element_snapshot(page)
281281
return f"Typed into {ref}\n\n{snapshot}"
282282

283283

284-
def browser_press(key: str) -> str:
284+
async def browser_press(key: str) -> str:
285285
"""Press a keyboard key (Enter, Tab, Escape, ArrowDown, etc.)."""
286286
log_verbose(f"[browser] Pressing {key}")
287-
page = _get_browser()
287+
page = await _get_browser()
288288

289289
try:
290-
page.keyboard.press(key)
291-
page.wait_for_load_state("domcontentloaded", timeout=3000)
290+
await page.keyboard.press(key)
291+
await page.wait_for_load_state("domcontentloaded", timeout=3000)
292292
except Exception:
293293
pass
294294

295-
snapshot = _get_element_snapshot(page)
295+
snapshot = await _get_element_snapshot(page)
296296
return f"Pressed {key}\n\n{snapshot}"
297297

298298

299-
def browser_scroll(direction: str = "down") -> str:
299+
async def browser_scroll(direction: str = "down") -> str:
300300
"""Scroll the page up or down."""
301-
page = _get_browser()
301+
page = await _get_browser()
302302
delta = 500 if direction == "down" else -500
303-
page.evaluate(f"window.scrollBy(0, {delta})")
303+
await page.evaluate(f"window.scrollBy(0, {delta})")
304304

305-
snapshot = _get_element_snapshot(page)
305+
snapshot = await _get_element_snapshot(page)
306306
return f"Scrolled {direction}\n\n{snapshot}"
307307

308308

309-
def browser_screenshot(save_path: Optional[str] = None) -> str:
309+
async def browser_screenshot(save_path: Optional[str] = None) -> str:
310310
"""Take a screenshot. Returns path to the saved image."""
311-
page = _get_browser()
311+
page = await _get_browser()
312312

313313
if save_path is None:
314314
fd, save_path = tempfile.mkstemp(suffix=".png", prefix="iclaw_browser_")
315315
os.close(fd)
316316

317-
page.screenshot(path=save_path, full_page=False)
317+
await page.screenshot(path=save_path, full_page=False)
318318

319319
# Also return a base64 thumbnail for vision models
320320
with open(save_path, "rb") as f:
@@ -323,7 +323,7 @@ def browser_screenshot(save_path: Optional[str] = None) -> str:
323323
return f"Screenshot saved to {save_path}\nBase64 ({len(b64)} chars): data:image/png;base64,{b64[:200]}..."
324324

325325

326-
def browser_console(clear: bool = False) -> str:
326+
async def browser_console(clear: bool = False) -> str:
327327
"""Get browser console output."""
328328
global _console_logs
329329

@@ -341,34 +341,34 @@ def browser_console(clear: bool = False) -> str:
341341
return "\n".join(output)
342342

343343

344-
def browser_back() -> str:
344+
async def browser_back() -> str:
345345
"""Navigate back in browser history."""
346-
page = _get_browser()
347-
page.go_back(wait_until="domcontentloaded", timeout=10000)
348-
snapshot = _get_element_snapshot(page)
346+
page = await _get_browser()
347+
await page.go_back(wait_until="domcontentloaded", timeout=10000)
348+
snapshot = await _get_element_snapshot(page)
349349
return f"Navigated back\n\n{snapshot}"
350350

351351

352-
def browser_close() -> str:
352+
async def browser_close() -> str:
353353
"""Close the browser and clean up."""
354354
global _browser, _context, _page, _console_logs, _element_map
355355

356356
if _page:
357-
_page.close()
357+
await _page.close()
358358
_page = None
359359
if _context:
360-
_context.close()
360+
await _context.close()
361361
_context = None
362362
if _browser:
363-
_browser.close()
363+
await _browser.close()
364364
_browser = None
365365
_console_logs.clear()
366366
_element_map.clear()
367367

368368
return "Browser closed."
369369

370370

371-
def dispatch_browser_call(function_name: str, args: dict) -> str:
371+
async def dispatch_browser_call(function_name: str, args: dict) -> str:
372372
"""Dispatch a browser tool call to the appropriate function."""
373373
dispatch = {
374374
"browser_navigate": lambda a: browser_navigate(a["url"]),
@@ -390,6 +390,6 @@ def dispatch_browser_call(function_name: str, args: dict) -> str:
390390
return f"Unknown browser function: {function_name}"
391391

392392
try:
393-
return handler(args)
393+
return await handler(args)
394394
except Exception as e:
395395
return f"Browser error: {e}"

0 commit comments

Comments
 (0)