Skip to content

Commit 77c7ff0

Browse files
committed
feat(browser): support headed mode
1 parent 0be63f1 commit 77c7ff0

6 files changed

Lines changed: 322 additions & 54 deletions

File tree

crates/rexos-tools/src/lib.rs

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ impl Toolset {
143143
self.shell(&args.command, timeout_ms).await
144144
}
145145
"docker_exec" => {
146-
let args: DockerExecArgs = serde_json::from_str(arguments_json)
147-
.context("parse docker_exec arguments")?;
146+
let args: DockerExecArgs =
147+
serde_json::from_str(arguments_json).context("parse docker_exec arguments")?;
148148
self.docker_exec(&args.command).await
149149
}
150150
"process_start" => {
@@ -189,8 +189,8 @@ impl Toolset {
189189
self.web_search(&args.query, args.max_results).await
190190
}
191191
"a2a_discover" => {
192-
let args: A2aDiscoverArgs = serde_json::from_str(arguments_json)
193-
.context("parse a2a_discover arguments")?;
192+
let args: A2aDiscoverArgs =
193+
serde_json::from_str(arguments_json).context("parse a2a_discover arguments")?;
194194
self.a2a_discover(&args.url, args.allow_private).await
195195
}
196196
"a2a_send" => {
@@ -201,8 +201,13 @@ impl Toolset {
201201
.as_deref()
202202
.or(args.url.as_deref())
203203
.context("missing agent_url (or url) for a2a_send")?;
204-
self.a2a_send(url, &args.message, args.session_id.as_deref(), args.allow_private)
205-
.await
204+
self.a2a_send(
205+
url,
206+
&args.message,
207+
args.session_id.as_deref(),
208+
args.allow_private,
209+
)
210+
.await
206211
}
207212
"image_analyze" => {
208213
let args: ImageAnalyzeArgs = serde_json::from_str(arguments_json)
@@ -247,8 +252,13 @@ impl Toolset {
247252
"browser_navigate" => {
248253
let args: BrowserNavigateArgs = serde_json::from_str(arguments_json)
249254
.context("parse browser_navigate arguments")?;
250-
self.browser_navigate(&args.url, args.timeout_ms, args.allow_private)
251-
.await
255+
self.browser_navigate(
256+
&args.url,
257+
args.timeout_ms,
258+
args.allow_private,
259+
args.headless,
260+
)
261+
.await
252262
}
253263
"browser_close" => {
254264
let _args: serde_json::Value = serde_json::from_str(arguments_json)
@@ -630,10 +640,8 @@ impl Toolset {
630640
stderr: stderr_buf,
631641
};
632642

633-
mgr.processes.insert(
634-
process_id.clone(),
635-
Arc::new(tokio::sync::Mutex::new(entry)),
636-
);
643+
mgr.processes
644+
.insert(process_id.clone(), Arc::new(tokio::sync::Mutex::new(entry)));
637645

638646
Ok(serde_json::json!({
639647
"process_id": process_id,
@@ -704,10 +712,7 @@ impl Toolset {
704712

705713
let timeout = Duration::from_secs(5);
706714
let mut guard = entry.lock().await;
707-
let stdin = guard
708-
.stdin
709-
.as_mut()
710-
.context("process stdin is closed")?;
715+
let stdin = guard.stdin.as_mut().context("process stdin is closed")?;
711716

712717
tokio::time::timeout(timeout, stdin.write_all(data.as_bytes()))
713718
.await
@@ -1155,7 +1160,8 @@ impl Toolset {
11551160
bytes.extend_from_slice(&sample.to_le_bytes());
11561161
}
11571162

1158-
std::fs::write(&out_path, &bytes).with_context(|| format!("write {}", out_path.display()))?;
1163+
std::fs::write(&out_path, &bytes)
1164+
.with_context(|| format!("write {}", out_path.display()))?;
11591165

11601166
Ok(serde_json::json!({
11611167
"path": rel,
@@ -1216,7 +1222,8 @@ impl Toolset {
12161222
"<!DOCTYPE html>\n<html>\n<head><meta charset=\"utf-8\"><title>{safe_title}</title></head>\n<body>\n{sanitized}\n</body>\n</html>\n"
12171223
);
12181224

1219-
std::fs::write(&out_path, &full).with_context(|| format!("write {}", out_path.display()))?;
1225+
std::fs::write(&out_path, &full)
1226+
.with_context(|| format!("write {}", out_path.display()))?;
12201227

12211228
Ok(serde_json::json!({
12221229
"canvas_id": canvas_id,
@@ -1232,6 +1239,7 @@ impl Toolset {
12321239
url: &str,
12331240
_timeout_ms: Option<u64>,
12341241
allow_private: bool,
1242+
headless: Option<bool>,
12351243
) -> anyhow::Result<String> {
12361244
let url = reqwest::Url::parse(url).context("parse url")?;
12371245
match url.scheme() {
@@ -1255,7 +1263,15 @@ impl Toolset {
12551263

12561264
let mut guard = self.browser.lock().await;
12571265
if guard.is_none() {
1258-
*guard = Some(BrowserSession::spawn().await?);
1266+
let headless = headless.unwrap_or_else(browser_headless_default);
1267+
*guard = Some(BrowserSession::spawn(headless).await?);
1268+
} else if let Some(requested) = headless {
1269+
let session_headless = guard.as_ref().map(|s| s.headless).unwrap_or(true);
1270+
if session_headless != requested {
1271+
bail!(
1272+
"browser session already started with headless={session_headless}; call browser_close before starting a new session with headless={requested}"
1273+
);
1274+
}
12591275
}
12601276

12611277
let session = guard.as_mut().expect("set above");
@@ -1473,6 +1489,7 @@ impl BridgeResponse {
14731489
}
14741490

14751491
struct BrowserSession {
1492+
headless: bool,
14761493
child: tokio::process::Child,
14771494
stdin: tokio::process::ChildStdin,
14781495
stdout: BufReader<tokio::process::ChildStdout>,
@@ -1485,21 +1502,18 @@ impl std::fmt::Debug for BrowserSession {
14851502
}
14861503

14871504
impl BrowserSession {
1488-
async fn spawn() -> anyhow::Result<Self> {
1505+
async fn spawn(headless: bool) -> anyhow::Result<Self> {
14891506
let python = browser_python_exe();
14901507
let script_path = browser_bridge_script_path()?;
14911508

14921509
let mut cmd = tokio::process::Command::new(python);
14931510
cmd.arg("-u").arg(script_path);
1494-
cmd.args([
1495-
"--headless",
1496-
"--width",
1497-
"1280",
1498-
"--height",
1499-
"720",
1500-
"--timeout",
1501-
"30",
1502-
]);
1511+
if headless {
1512+
cmd.arg("--headless");
1513+
} else {
1514+
cmd.arg("--no-headless");
1515+
}
1516+
cmd.args(["--width", "1280", "--height", "720", "--timeout", "30"]);
15031517
cmd.stdin(Stdio::piped())
15041518
.stdout(Stdio::piped())
15051519
.stderr(Stdio::null());
@@ -1510,6 +1524,7 @@ impl BrowserSession {
15101524
let stdout = child.stdout.take().context("capture bridge stdout")?;
15111525

15121526
let mut session = Self {
1527+
headless,
15131528
child,
15141529
stdin,
15151530
stdout: BufReader::new(stdout),
@@ -1568,6 +1583,17 @@ impl Drop for BrowserSession {
15681583
}
15691584
}
15701585

1586+
fn browser_headless_default() -> bool {
1587+
if let Ok(v) = std::env::var("REXOS_BROWSER_HEADLESS") {
1588+
match v.trim().to_ascii_lowercase().as_str() {
1589+
"0" | "false" | "no" | "off" => return false,
1590+
"1" | "true" | "yes" | "on" => return true,
1591+
_ => {}
1592+
}
1593+
}
1594+
true
1595+
}
1596+
15711597
fn browser_python_exe() -> String {
15721598
if let Ok(v) = std::env::var("REXOS_BROWSER_PYTHON") {
15731599
if !v.trim().is_empty() {
@@ -1793,6 +1819,8 @@ struct BrowserNavigateArgs {
17931819
timeout_ms: Option<u64>,
17941820
#[serde(default)]
17951821
allow_private: bool,
1822+
#[serde(default)]
1823+
headless: Option<bool>,
17961824
}
17971825

17981826
#[derive(Debug, serde::Deserialize)]
@@ -1879,8 +1907,8 @@ fn contains_event_handler_attr(lower: &str) -> bool {
18791907

18801908
if i > 0 {
18811909
let prev = bytes[i - 1];
1882-
let ok_boundary = prev.is_ascii_whitespace()
1883-
|| matches!(prev, b'<' | b'"' | b'\'' | b'/' | b'=');
1910+
let ok_boundary =
1911+
prev.is_ascii_whitespace() || matches!(prev, b'<' | b'"' | b'\'' | b'/' | b'=');
18841912
if !ok_boundary {
18851913
continue;
18861914
}
@@ -1917,16 +1945,8 @@ fn sanitize_canvas_html(html: &str, max_bytes: usize) -> anyhow::Result<String>
19171945
let lower = html.to_ascii_lowercase();
19181946

19191947
for tag in [
1920-
"<script",
1921-
"</script",
1922-
"<iframe",
1923-
"</iframe",
1924-
"<object",
1925-
"</object",
1926-
"<embed",
1927-
"</embed",
1928-
"<applet",
1929-
"</applet",
1948+
"<script", "</script", "<iframe", "</iframe", "<object", "</object", "<embed", "</embed",
1949+
"<applet", "</applet",
19301950
] {
19311951
if lower.contains(tag) {
19321952
bail!("forbidden html tag detected: {tag}");
@@ -2905,7 +2925,9 @@ fn compat_tool_defs() -> Vec<ToolDefinition> {
29052925
kind: "function".to_string(),
29062926
function: ToolFunctionDefinition {
29072927
name: "hand_list".to_string(),
2908-
description: "List available Hands (curated autonomous packages) and their activation status.".to_string(),
2928+
description:
2929+
"List available Hands (curated autonomous packages) and their activation status."
2930+
.to_string(),
29092931
parameters: json!({
29102932
"type": "object",
29112933
"properties": {},
@@ -3222,7 +3244,8 @@ fn browser_navigate_def() -> ToolDefinition {
32223244
"properties": {
32233245
"url": { "type": "string", "description": "HTTP(S) URL to open." },
32243246
"timeout_ms": { "type": "integer", "description": "Timeout in milliseconds (default 30000).", "minimum": 1 },
3225-
"allow_private": { "type": "boolean", "description": "Allow loopback/private IPs (default false)." }
3247+
"allow_private": { "type": "boolean", "description": "Allow loopback/private IPs (default false)." },
3248+
"headless": { "type": "boolean", "description": "Run the browser in headless mode (default true). Set false to show a GUI window." }
32263249
},
32273250
"required": ["url"],
32283251
"additionalProperties": false
@@ -3275,8 +3298,7 @@ fn browser_press_key_def() -> ToolDefinition {
32753298
kind: "function".to_string(),
32763299
function: ToolFunctionDefinition {
32773300
name: "browser_press_key".to_string(),
3278-
description: "Press a key in the browser (optionally on a target element)."
3279-
.to_string(),
3301+
description: "Press a key in the browser (optionally on a target element).".to_string(),
32803302
parameters: serde_json::json!({
32813303
"type": "object",
32823304
"properties": {
@@ -3650,7 +3672,10 @@ mod tests {
36503672

36513673
let bytes = std::fs::read(workspace.join("out.wav")).unwrap();
36523674
assert!(bytes.starts_with(b"RIFF"), "missing RIFF header");
3653-
assert!(bytes.windows(4).any(|w| w == b"WAVE"), "missing WAVE header");
3675+
assert!(
3676+
bytes.windows(4).any(|w| w == b"WAVE"),
3677+
"missing WAVE header"
3678+
);
36543679
}
36553680

36563681
#[tokio::test]
@@ -3905,10 +3930,7 @@ mod tests {
39053930
let _ = tools
39063931
.call(
39073932
"process_write",
3908-
&format!(
3909-
r#"{{ "process_id": "{}", "data": "hi" }}"#,
3910-
process_id
3911-
),
3933+
&format!(r#"{{ "process_id": "{}", "data": "hi" }}"#, process_id),
39123934
)
39133935
.await
39143936
.unwrap();
@@ -3972,7 +3994,10 @@ mod tests {
39723994
.get("saved_to")
39733995
.and_then(|v| v.as_str())
39743996
.expect("saved_to");
3975-
assert!(saved_to.ends_with(".html"), "unexpected saved_to: {saved_to}");
3997+
assert!(
3998+
saved_to.ends_with(".html"),
3999+
"unexpected saved_to: {saved_to}"
4000+
);
39764001

39774002
let html = std::fs::read_to_string(workspace.join(saved_to)).unwrap();
39784003
assert!(html.contains("<h1>Hello</h1>"), "{html}");
@@ -4226,7 +4251,10 @@ mod tests {
42264251
assert_eq!(v["key"], "Enter");
42274252

42284253
let out = tools
4229-
.call("browser_wait_for", r#"{ "text": "hello", "timeout_ms": 1 }"#)
4254+
.call(
4255+
"browser_wait_for",
4256+
r#"{ "text": "hello", "timeout_ms": 1 }"#,
4257+
)
42304258
.await
42314259
.unwrap();
42324260
let v: serde_json::Value = serde_json::from_str(&out).unwrap();
@@ -4251,6 +4279,39 @@ mod tests {
42514279
assert_eq!(out.trim(), "ok");
42524280
}
42534281

4282+
#[tokio::test]
4283+
async fn browser_navigate_honors_headless_flag() {
4284+
let _lock = ENV_LOCK.lock().unwrap();
4285+
4286+
let tmp = tempfile::tempdir().unwrap();
4287+
let workspace = tmp.path().join("ws");
4288+
std::fs::create_dir_all(&workspace).unwrap();
4289+
4290+
let bridge_path = tmp.path().join("bridge.py");
4291+
std::fs::write(&bridge_path, stub_bridge_script()).unwrap();
4292+
4293+
let python = if cfg!(windows) { "python" } else { "python3" };
4294+
let _python_guard = EnvVarGuard::set("REXOS_BROWSER_PYTHON", python);
4295+
let _bridge_guard = EnvVarGuard::set("REXOS_BROWSER_BRIDGE_PATH", bridge_path.as_os_str());
4296+
4297+
let tools = Toolset::new(workspace).unwrap();
4298+
4299+
let out = tools
4300+
.call(
4301+
"browser_navigate",
4302+
r#"{ "url": "http://127.0.0.1:1/", "allow_private": true, "headless": false }"#,
4303+
)
4304+
.await
4305+
.unwrap();
4306+
4307+
let v: serde_json::Value = serde_json::from_str(&out).unwrap();
4308+
assert_eq!(
4309+
v.get("headless").and_then(|v| v.as_bool()),
4310+
Some(false),
4311+
"{v}"
4312+
);
4313+
}
4314+
42544315
struct EnvVarGuard {
42554316
key: &'static str,
42564317
previous: Option<OsString>,
@@ -4286,7 +4347,8 @@ parser.add_argument("--no-headless", dest="headless", action="store_false")
42864347
parser.add_argument("--width", type=int, default=1280)
42874348
parser.add_argument("--height", type=int, default=720)
42884349
parser.add_argument("--timeout", type=int, default=30)
4289-
parser.parse_args()
4350+
args = parser.parse_args()
4351+
headless = bool(args.headless)
42904352
42914353
sys.stdout.write(json.dumps({"success": True, "data": {"status": "ready"}}) + "\n")
42924354
sys.stdout.flush()
@@ -4301,7 +4363,7 @@ for line in sys.stdin:
43014363
action = cmd.get("action", "")
43024364
if action == "Navigate":
43034365
current_url = cmd.get("url", "")
4304-
resp = {"success": True, "data": {"title": "Stub", "url": current_url}}
4366+
resp = {"success": True, "data": {"title": "Stub", "url": current_url, "headless": headless}}
43054367
elif action == "ReadPage":
43064368
resp = {"success": True, "data": {"title": "Stub", "url": current_url, "content": "hello"}}
43074369
elif action == "Screenshot":

0 commit comments

Comments
 (0)