Skip to content

Commit 6f6b175

Browse files
authored
chore: sync upstream browser-use target (#183)
1 parent fb762f6 commit 6f6b175

11 files changed

Lines changed: 320 additions & 30 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ translation of the Python internals.
88
Current frozen upstream target:
99

1010
```text
11-
browser-use/browser-use@157779338afdcc03023010ec3c24ad63d820453c
11+
browser-use/browser-use@834269609082d187ca0250de2c06d93799dac92d
1212
```
1313

1414
The detailed support matrix lives in [docs/RELEASE.md](docs/RELEASE.md). The

crates/browser-use-core/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ pub(crate) use urls::{
109109
pub(crate) use usage::TokenUsageTracker;
110110

111111
/// Version of the upstream browser-use source that this crate initially targets.
112-
pub const INITIAL_UPSTREAM_COMMIT: &str = "157779338afdcc03023010ec3c24ad63d820453c";
112+
pub const INITIAL_UPSTREAM_COMMIT: &str = "834269609082d187ca0250de2c06d93799dac92d";
113113

114114
#[cfg(test)]
115115
mod tests;

crates/browser-use-core/src/prompt.rs

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ use browser_use_llm::{ChatMessage, ChatRequest, ContentPart, ImageDetailLevel, M
3434
use serde_json::Value;
3535
use std::collections::{BTreeMap, BTreeSet};
3636

37+
const PLACEHOLDER_4PX_SCREENSHOT: &str = "iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII=";
38+
3739
mod history;
3840
mod schema;
3941

@@ -279,22 +281,22 @@ pub fn build_step_request_with_file_system(
279281
let agent_history = render_previous_results(history, settings.max_history_items);
280282
let page_stats = render_page_stats(state);
281283
let agent_state =
282-
render_agent_state_description(task, &page_stats, history, state, settings, file_system);
284+
render_agent_state_description(&page_stats, history, state, settings, file_system);
283285
let read_state = render_read_state_description(history)
284286
.map(|description| format!("\n<read_state>\n{description}\n</read_state>\n"))
285287
.unwrap_or_default();
288+
let step_meta = render_step_meta_description(history);
286289
let sensitive_values = collect_sensitive_data_values(&settings.sensitive_data);
287290
let user_text = redact_sensitive_string(
288291
&format!(
289-
"<agent_history>\n{agent_history}\n</agent_history>\n\n<agent_state>\n{agent_state}\n</agent_state>\n<browser_state>\n{state_json}\n</browser_state>{read_state}"
292+
"{}<agent_history>\n{agent_history}\n</agent_history>\n\n<agent_state>\n{agent_state}\n</agent_state>\n<browser_state>\n{state_json}\n</browser_state>{read_state}{step_meta}",
293+
render_user_request_description(task)
290294
),
291295
&sensitive_values,
292296
);
293297
let mut user_content = vec![ContentPart::Text { text: user_text }];
294298
user_content.extend(settings.sample_images.iter().cloned());
295-
if settings.use_vision.accepts_prompt_image()
296-
&& let Some(screenshot) = state.screenshot.as_deref()
297-
{
299+
if let Some(screenshot) = prompt_visible_screenshot(state, settings) {
298300
user_content.push(ContentPart::ImageUrl {
299301
image_url: prompt_screenshot_data_url(screenshot, settings.llm_screenshot_size),
300302
detail: Some(settings.vision_detail_level),
@@ -559,6 +561,39 @@ fn render_judge_trajectory(history: &AgentHistory) -> String {
559561
.join("\n\n")
560562
}
561563

564+
fn render_user_request_description(task: &str) -> String {
565+
format!("<user_request>\n{task}\n</user_request>\n\n")
566+
}
567+
568+
fn render_step_meta_description(history: &AgentHistory) -> String {
569+
let next_step = latest_history_step_number(history).unwrap_or(0) + 1;
570+
format!(
571+
"<step_info>Step {next_step}\nToday:{}</step_info>\n",
572+
utc_date_string(now_seconds() as i64)
573+
)
574+
}
575+
576+
fn utc_date_string(unix_seconds: i64) -> String {
577+
let days = unix_seconds.div_euclid(86_400);
578+
let (year, month, day) = civil_from_days(days);
579+
format!("{year:04}-{month:02}-{day:02}")
580+
}
581+
582+
fn civil_from_days(days_since_unix_epoch: i64) -> (i32, u32, u32) {
583+
// Howard Hinnant's civil-from-days algorithm, using a 1970-01-01 epoch.
584+
let z = days_since_unix_epoch + 719_468;
585+
let era = if z >= 0 { z } else { z - 146_096 }.div_euclid(146_097);
586+
let doe = z - era * 146_097;
587+
let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096).div_euclid(365);
588+
let mut year = yoe + era * 400;
589+
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
590+
let mp = (5 * doy + 2).div_euclid(153);
591+
let day = doy - (153 * mp + 2).div_euclid(5) + 1;
592+
let month = mp + if mp < 10 { 3 } else { -9 };
593+
year += if month <= 2 { 1 } else { 0 };
594+
(year as i32, month as u32, day as u32)
595+
}
596+
562597
fn truncate_judge_text(text: &str) -> String {
563598
const MAX_CHARS: usize = 40_000;
564599
if text.chars().count() <= MAX_CHARS {
@@ -649,6 +684,24 @@ fn screenshot_base64_payload(screenshot: &str) -> &str {
649684
}
650685
}
651686

687+
fn prompt_visible_screenshot<'a>(
688+
state: &'a BrowserStateSummary,
689+
settings: &AgentSettings,
690+
) -> Option<&'a str> {
691+
if !settings.use_vision.accepts_prompt_image() || is_new_tab_page(&state.url) {
692+
return None;
693+
}
694+
state
695+
.screenshot
696+
.as_deref()
697+
.filter(|screenshot| !screenshot.trim().is_empty())
698+
.filter(|screenshot| !is_placeholder_4px_screenshot(screenshot))
699+
}
700+
701+
fn is_placeholder_4px_screenshot(screenshot: &str) -> bool {
702+
screenshot_base64_payload(screenshot).trim() == PLACEHOLDER_4PX_SCREENSHOT
703+
}
704+
652705
fn append_latest_action_result_images(
653706
content: &mut Vec<ContentPart>,
654707
history: &AgentHistory,
@@ -774,14 +827,13 @@ fn fallback_page_stats(state: &BrowserStateSummary) -> browser_use_dom::DomPageS
774827
}
775828

776829
fn render_agent_state_description(
777-
task: &str,
778830
page_stats: &str,
779831
history: &AgentHistory,
780832
state: &BrowserStateSummary,
781833
settings: &AgentSettings,
782834
file_system: Option<&ManagedFileSystem>,
783835
) -> String {
784-
let mut description = format!("Task:\n{task}\n\nPage stats:\n{page_stats}");
836+
let mut description = format!("Page stats:\n{page_stats}");
785837
if let Some(file_system) = file_system {
786838
let todo_contents = file_system.get_todo_contents();
787839
let todo_contents = if todo_contents.is_empty() {

crates/browser-use-core/src/tests.rs

Lines changed: 191 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ impl Drop for CurrentDirGuard {
4545
fn target_commit_is_pinned() {
4646
assert_eq!(
4747
INITIAL_UPSTREAM_COMMIT,
48-
"157779338afdcc03023010ec3c24ad63d820453c"
48+
"834269609082d187ca0250de2c06d93799dac92d"
4949
);
5050
}
5151

@@ -4551,6 +4551,11 @@ fn request_text(request: &ChatRequest) -> String {
45514551
.join("\n")
45524552
}
45534553

4554+
fn text_pos(text: &str, needle: &str) -> usize {
4555+
text.find(needle)
4556+
.unwrap_or_else(|| panic!("{needle} not found in text"))
4557+
}
4558+
45544559
#[async_trait]
45554560
impl ChatModel for QueueModel {
45564561
fn provider(&self) -> &str {
@@ -4642,6 +4647,54 @@ async fn agent_history_records_token_usage_summary_and_costs() {
46424647
assert!((usage.total_cost - 0.0000536).abs() < 0.0000000001);
46434648
}
46444649

4650+
#[tokio::test]
4651+
async fn agent_cost_pricing_maps_bu_latest_to_bu_2_0_like_upstream() {
4652+
let done_output = serde_json::json!({
4653+
"current_state": {
4654+
"thinking": "done"
4655+
},
4656+
"action": [
4657+
{
4658+
"done": {
4659+
"text": "finished",
4660+
"success": true
4661+
}
4662+
}
4663+
]
4664+
});
4665+
let usage = ChatUsage {
4666+
prompt_tokens: 100,
4667+
prompt_cached_tokens: Some(40),
4668+
prompt_cache_creation_tokens: None,
4669+
prompt_image_tokens: None,
4670+
completion_tokens: 20,
4671+
total_tokens: 120,
4672+
};
4673+
let settings = AgentSettings {
4674+
calculate_cost: true,
4675+
use_judge: false,
4676+
..AgentSettings::default()
4677+
};
4678+
let mut agent = Agent::with_settings(
4679+
"finish",
4680+
settings,
4681+
QueueModel::with_model_outputs_and_usages(
4682+
"bu-latest",
4683+
vec![done_output],
4684+
vec![Some(usage)],
4685+
),
4686+
MockSession::new(),
4687+
);
4688+
4689+
let history = agent.run(1).await.expect("run");
4690+
let usage = history.usage.as_ref().expect("usage summary");
4691+
4692+
assert!((usage.total_prompt_cost - 0.0000384).abs() < 0.0000000001);
4693+
assert!((usage.total_prompt_cached_cost - 0.0000024).abs() < 0.0000000001);
4694+
assert!((usage.total_completion_cost - 0.00007).abs() < 0.0000000001);
4695+
assert!((usage.total_cost - 0.0001108).abs() < 0.0000000001);
4696+
}
4697+
46454698
#[tokio::test]
46464699
async fn agent_extract_action_uses_llm_result_sections() {
46474700
let agent_output = serde_json::json!({
@@ -5873,6 +5926,7 @@ async fn agent_step_saves_conversation_transcript() {
58735926
]
58745927
});
58755928
let mut state = blank_state();
5929+
state.url = "https://example.test/app".to_owned();
58765930
state.screenshot = Some("abc123".to_owned());
58775931
let settings = AgentSettings {
58785932
save_conversation_path: Some(transcript_dir.display().to_string()),
@@ -8111,6 +8165,90 @@ fn step_request_includes_previous_results() {
81118165
assert!(request_text.contains("Avoid repeating the same action sequence"));
81128166
}
81138167

8168+
#[test]
8169+
fn step_request_places_user_request_before_history_and_step_meta_at_suffix() {
8170+
let mut state = blank_state();
8171+
state.url = "https://example.test/app".to_owned();
8172+
let history = AgentHistory {
8173+
items: vec![AgentHistoryItem {
8174+
model_output: None,
8175+
result: vec![ActionResult::extracted("first step")],
8176+
state: blank_state(),
8177+
metadata: Some(StepMetadata {
8178+
step_start_time: 1.0,
8179+
step_end_time: 2.0,
8180+
step_number: 3,
8181+
step_interval: None,
8182+
}),
8183+
}],
8184+
..AgentHistory::default()
8185+
};
8186+
8187+
let request = build_step_request(
8188+
"Do the cacheable thing",
8189+
&state,
8190+
&history,
8191+
&AgentSettings::default(),
8192+
)
8193+
.expect("step request");
8194+
let user_text = request_text(&request);
8195+
8196+
assert!(text_pos(&user_text, "<user_request>") < text_pos(&user_text, "<agent_history>"));
8197+
assert!(text_pos(&user_text, "</agent_history>") < text_pos(&user_text, "<agent_state>"));
8198+
assert!(text_pos(&user_text, "</agent_state>") < text_pos(&user_text, "<browser_state>"));
8199+
assert!(text_pos(&user_text, "</browser_state>") < text_pos(&user_text, "<step_info>"));
8200+
assert!(user_text.ends_with("</step_info>\n"));
8201+
assert!(user_text.contains("<step_info>Step 4\nToday:"));
8202+
8203+
let agent_state =
8204+
&user_text[text_pos(&user_text, "<agent_state>")..text_pos(&user_text, "</agent_state>")];
8205+
assert!(!agent_state.contains("<user_request>"));
8206+
assert!(!agent_state.contains("<step_info>"));
8207+
assert!(!agent_state.contains("Do the cacheable thing"));
8208+
}
8209+
8210+
#[test]
8211+
fn step_request_prefix_before_step_info_is_stable_across_steps() {
8212+
let mut state = blank_state();
8213+
state.url = "https://example.test/app".to_owned();
8214+
let history_with_step = |step_number| AgentHistory {
8215+
items: vec![AgentHistoryItem {
8216+
model_output: None,
8217+
result: vec![ActionResult::extracted("result")],
8218+
state: blank_state(),
8219+
metadata: Some(StepMetadata {
8220+
step_start_time: 1.0,
8221+
step_end_time: 2.0,
8222+
step_number,
8223+
step_interval: None,
8224+
}),
8225+
}],
8226+
..AgentHistory::default()
8227+
};
8228+
let first = request_text(
8229+
&build_step_request(
8230+
"Keep the stable prefix",
8231+
&state,
8232+
&history_with_step(3),
8233+
&AgentSettings::default(),
8234+
)
8235+
.expect("first request"),
8236+
);
8237+
let second = request_text(
8238+
&build_step_request(
8239+
"Keep the stable prefix",
8240+
&state,
8241+
&history_with_step(4),
8242+
&AgentSettings::default(),
8243+
)
8244+
.expect("second request"),
8245+
);
8246+
8247+
let prefix_end = text_pos(&first, "<step_info>");
8248+
assert_eq!(&first[..prefix_end], &second[..prefix_end]);
8249+
assert_ne!(&first[prefix_end..], &second[prefix_end..]);
8250+
}
8251+
81148252
#[test]
81158253
fn step_request_includes_loading_page_stats_hint_like_upstream() {
81168254
let mut state = blank_state();
@@ -8607,6 +8745,7 @@ fn step_request_redacts_sensitive_values_from_state_and_history() {
86078745
#[test]
86088746
fn step_request_attaches_screenshot_as_image_part() {
86098747
let mut state = blank_state();
8748+
state.url = "https://example.test/app".to_owned();
86108749
state.screenshot = Some("abc123".to_owned());
86118750

86128751
let request = build_step_request(
@@ -8639,6 +8778,7 @@ fn step_request_attaches_screenshot_as_image_part() {
86398778
fn step_request_resizes_screenshot_for_llm_prompt_only() {
86408779
let original_screenshot = test_png_base64(240, 160);
86418780
let mut state = blank_state();
8781+
state.url = "https://example.test/app".to_owned();
86428782
state.screenshot = Some(original_screenshot.clone());
86438783
let settings = AgentSettings {
86448784
llm_screenshot_size: Some(LlmScreenshotSize::new(120, 100).expect("valid size")),
@@ -8669,6 +8809,54 @@ fn step_request_resizes_screenshot_for_llm_prompt_only() {
86698809
);
86708810
}
86718811

8812+
#[test]
8813+
fn step_request_omits_screenshot_for_new_tab_pages() {
8814+
let mut state = blank_state();
8815+
state.url = "chrome://new-tab-page/".to_owned();
8816+
state.screenshot = Some("abc123".to_owned());
8817+
8818+
let request = build_step_request(
8819+
"inspect new tab",
8820+
&state,
8821+
&AgentHistory::default(),
8822+
&AgentSettings::default(),
8823+
)
8824+
.expect("step request");
8825+
let user_message = request
8826+
.messages
8827+
.iter()
8828+
.find(|message| message.role == MessageRole::User)
8829+
.expect("user message");
8830+
8831+
assert_eq!(user_message.content.len(), 1);
8832+
assert!(!request_text(&request).contains("abc123"));
8833+
}
8834+
8835+
#[test]
8836+
fn step_request_omits_placeholder_4px_screenshot() {
8837+
let mut state = blank_state();
8838+
state.url = "https://example.test/app".to_owned();
8839+
state.screenshot = Some(
8840+
"iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII="
8841+
.to_owned(),
8842+
);
8843+
8844+
let request = build_step_request(
8845+
"inspect placeholder",
8846+
&state,
8847+
&AgentHistory::default(),
8848+
&AgentSettings::default(),
8849+
)
8850+
.expect("step request");
8851+
let user_message = request
8852+
.messages
8853+
.iter()
8854+
.find(|message| message.role == MessageRole::User)
8855+
.expect("user message");
8856+
8857+
assert_eq!(user_message.content.len(), 1);
8858+
}
8859+
86728860
#[test]
86738861
fn step_request_attaches_latest_action_result_images_as_image_parts() {
86748862
let history = AgentHistory {
@@ -8748,6 +8936,7 @@ fn step_request_attaches_latest_action_result_images_as_image_parts() {
87488936
#[test]
87498937
fn step_request_inserts_sample_images_before_runtime_images() {
87508938
let mut state = blank_state();
8939+
state.url = "https://example.test/app".to_owned();
87518940
state.screenshot = Some("screen-data".to_owned());
87528941
let history = AgentHistory {
87538942
items: vec![AgentHistoryItem {
@@ -8920,6 +9109,7 @@ fn step_request_omits_screenshot_when_vision_disabled() {
89209109
#[test]
89219110
fn step_request_includes_screenshot_when_auto_vision_state_has_one() {
89229111
let mut state = blank_state();
9112+
state.url = "https://example.test/app".to_owned();
89239113
state.screenshot = Some("abc123".to_owned());
89249114
let settings = AgentSettings {
89259115
use_vision: VisionMode::Auto,

0 commit comments

Comments
 (0)