Skip to content

Commit 2e2302b

Browse files
committed
add web_fetch tool for fetching URL content as readable text
1 parent 52ce8a0 commit 2e2302b

5 files changed

Lines changed: 205 additions & 0 deletions

File tree

src/tools/mod.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,58 @@ impl ToolExecutor {
584584
let result = self.bash_executor.execute(command)?;
585585
Ok(result)
586586
}
587+
ToolName::WebFetch => {
588+
let url = input["url"].as_str().ok_or_else(|| {
589+
SofosError::ToolExecution("Missing 'url' parameter".to_string())
590+
})?;
591+
592+
if !url.starts_with("http://") && !url.starts_with("https://") {
593+
return Err(SofosError::ToolExecution(
594+
"URL must start with http:// or https://".to_string(),
595+
));
596+
}
597+
598+
let client = reqwest::Client::builder()
599+
.timeout(std::time::Duration::from_secs(30))
600+
.build()
601+
.map_err(|e| SofosError::ToolExecution(format!("HTTP client error: {}", e)))?;
602+
603+
let response = client
604+
.get(url)
605+
.header("User-Agent", "Sofos/1.0")
606+
.send()
607+
.await
608+
.map_err(|e| SofosError::ToolExecution(format!("Fetch failed: {}", e)))?;
609+
610+
let status = response.status();
611+
if !status.is_success() {
612+
return Err(SofosError::ToolExecution(format!(
613+
"HTTP {} for {}",
614+
status, url
615+
)));
616+
}
617+
618+
let body = response
619+
.text()
620+
.await
621+
.map_err(|e| SofosError::ToolExecution(format!("Read body failed: {}", e)))?;
622+
623+
let text = utils::html_to_text(&body);
624+
625+
let max_chars = 64_000;
626+
let truncated = if text.len() > max_chars {
627+
format!(
628+
"{}\n\n[TRUNCATED: showing first ~{} chars of {}]",
629+
&text[..max_chars],
630+
max_chars,
631+
text.len()
632+
)
633+
} else {
634+
text
635+
};
636+
637+
Ok(format!("Content from {}:\n\n{}", url, truncated))
638+
}
587639
ToolName::WebSearch => Err(SofosError::ToolExecution(
588640
"web_search is handled server-side by the API and should not be executed locally"
589641
.to_string(),

src/tools/tool_name.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pub enum ToolName {
1616
EditFile,
1717
GlobFiles,
1818
MorphEditFile,
19+
WebFetch,
1920
WebSearch,
2021
}
2122

@@ -35,6 +36,7 @@ impl ToolName {
3536
ToolName::EditFile => "edit_file",
3637
ToolName::GlobFiles => "glob_files",
3738
ToolName::MorphEditFile => "morph_edit_file",
39+
ToolName::WebFetch => "web_fetch",
3840
ToolName::WebSearch => "web_search",
3941
}
4042
}
@@ -54,6 +56,7 @@ impl ToolName {
5456
"edit_file" => Ok(ToolName::EditFile),
5557
"glob_files" => Ok(ToolName::GlobFiles),
5658
"morph_edit_file" => Ok(ToolName::MorphEditFile),
59+
"web_fetch" => Ok(ToolName::WebFetch),
5760
"web_search" => Ok(ToolName::WebSearch),
5861
_ => Err(SofosError::ToolExecution(format!("Unknown tool: {}", s))),
5962
}

src/tools/types.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,24 @@ fn glob_files_tool() -> Tool {
250250
}
251251
}
252252

253+
fn web_fetch_tool() -> Tool {
254+
Tool::Regular {
255+
name: "web_fetch".to_string(),
256+
description: "Fetch a URL and return its content as readable text. Use this to read documentation pages, API references, or any web content. For searching the web by query, use web_search instead.".to_string(),
257+
input_schema: json!({
258+
"type": "object",
259+
"properties": {
260+
"url": {
261+
"type": "string",
262+
"description": "The URL to fetch (e.g., 'https://docs.rs/serde/latest/serde/')"
263+
}
264+
},
265+
"required": ["url"]
266+
}),
267+
cache_control: None,
268+
}
269+
}
270+
253271
fn morph_edit_file_tool() -> Tool {
254272
Tool::Regular {
255273
name: "morph_edit_file".to_string(),
@@ -290,6 +308,7 @@ pub fn get_all_tools() -> Vec<Tool> {
290308
move_file_tool(),
291309
copy_file_tool(),
292310
execute_bash_tool(),
311+
web_fetch_tool(),
293312
anthropic_web_search_tool(),
294313
openai_web_search_tool(),
295314
]
@@ -309,6 +328,7 @@ pub fn get_all_tools_with_morph() -> Vec<Tool> {
309328
copy_file_tool(),
310329
execute_bash_tool(),
311330
morph_edit_file_tool(),
331+
web_fetch_tool(),
312332
anthropic_web_search_tool(),
313333
openai_web_search_tool(),
314334
]
@@ -319,6 +339,7 @@ pub fn get_read_only_tools() -> Vec<Tool> {
319339
list_directory_tool(),
320340
read_file_tool(),
321341
glob_files_tool(),
342+
web_fetch_tool(),
322343
// Anthropic web search tool
323344
anthropic_web_search_tool(),
324345
// OpenAI web search tool

src/tools/utils.rs

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,127 @@ pub fn confirm_destructive(prompt: &str) -> crate::error::Result<bool> {
7070
pub fn confirm_permission(prompt: &str) -> crate::error::Result<bool> {
7171
confirm_action_enhanced(prompt, ConfirmationType::Permission)
7272
}
73+
74+
/// Strip HTML tags and convert common entities to produce readable plain text
75+
pub fn html_to_text(html: &str) -> String {
76+
let mut out = String::with_capacity(html.len() / 2);
77+
let mut in_tag = false;
78+
let mut in_script = false;
79+
let mut in_style = false;
80+
let mut last_was_whitespace = false;
81+
82+
let lower = html.to_lowercase();
83+
let chars: Vec<char> = html.chars().collect();
84+
let lower_chars: Vec<char> = lower.chars().collect();
85+
let len = chars.len();
86+
let mut i = 0;
87+
88+
while i < len {
89+
if in_tag {
90+
if chars[i] == '>' {
91+
in_tag = false;
92+
}
93+
i += 1;
94+
continue;
95+
}
96+
97+
if chars[i] == '<' {
98+
// Check for block-level tags that should insert newlines
99+
let rest = &lower[lower.char_indices().nth(i).map_or(0, |(idx, _)| idx)..];
100+
if rest.starts_with("<script") {
101+
in_script = true;
102+
} else if rest.starts_with("</script") {
103+
in_script = false;
104+
} else if rest.starts_with("<style") {
105+
in_style = true;
106+
} else if rest.starts_with("</style") {
107+
in_style = false;
108+
}
109+
110+
let is_block = rest.starts_with("<br")
111+
|| rest.starts_with("<p")
112+
|| rest.starts_with("</p")
113+
|| rest.starts_with("<div")
114+
|| rest.starts_with("</div")
115+
|| rest.starts_with("<li")
116+
|| rest.starts_with("<h1")
117+
|| rest.starts_with("<h2")
118+
|| rest.starts_with("<h3")
119+
|| rest.starts_with("<h4")
120+
|| rest.starts_with("<tr")
121+
|| rest.starts_with("</tr");
122+
123+
if is_block && !out.ends_with('\n') {
124+
out.push('\n');
125+
last_was_whitespace = true;
126+
}
127+
128+
in_tag = true;
129+
i += 1;
130+
continue;
131+
}
132+
133+
if in_script || in_style {
134+
i += 1;
135+
continue;
136+
}
137+
138+
// Handle HTML entities
139+
if chars[i] == '&' {
140+
let rest: String = lower_chars[i..].iter().take(10).collect();
141+
if rest.starts_with("&amp;") {
142+
out.push('&');
143+
i += 5;
144+
} else if rest.starts_with("&lt;") {
145+
out.push('<');
146+
i += 4;
147+
} else if rest.starts_with("&gt;") {
148+
out.push('>');
149+
i += 4;
150+
} else if rest.starts_with("&quot;") {
151+
out.push('"');
152+
i += 6;
153+
} else if rest.starts_with("&#39;") || rest.starts_with("&apos;") {
154+
out.push('\'');
155+
i += if rest.starts_with("&#39;") { 5 } else { 6 };
156+
} else if rest.starts_with("&nbsp;") {
157+
out.push(' ');
158+
i += 6;
159+
} else {
160+
out.push('&');
161+
i += 1;
162+
}
163+
last_was_whitespace = false;
164+
continue;
165+
}
166+
167+
let ch = chars[i];
168+
if ch.is_whitespace() {
169+
if !last_was_whitespace {
170+
out.push(if ch == '\n' { '\n' } else { ' ' });
171+
last_was_whitespace = true;
172+
}
173+
} else {
174+
out.push(ch);
175+
last_was_whitespace = false;
176+
}
177+
i += 1;
178+
}
179+
180+
// Collapse runs of 3+ newlines into 2
181+
let mut result = String::new();
182+
let mut consecutive_newlines = 0;
183+
for ch in out.chars() {
184+
if ch == '\n' {
185+
consecutive_newlines += 1;
186+
if consecutive_newlines <= 2 {
187+
result.push(ch);
188+
}
189+
} else {
190+
consecutive_newlines = 0;
191+
result.push(ch);
192+
}
193+
}
194+
195+
result.trim().to_string()
196+
}

src/ui/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,11 @@ impl UI {
603603
format!("Found {} items in {}", item_count, path.bright_cyan())
604604
}
605605
}
606+
"web_fetch" => {
607+
let url = tool_input.get("url").and_then(|v| v.as_str()).unwrap_or("");
608+
let char_count = output.len();
609+
format!("Fetched {} ({} chars)", url.bright_cyan(), char_count)
610+
}
606611
"morph_edit_file" => output.to_string(),
607612
_ => output.to_string(),
608613
}

0 commit comments

Comments
 (0)