Skip to content

Commit d581c5e

Browse files
echobtfactorydroid
andauthored
fix: batch merge of bug fixes from PRs #271, #273, #279 (#366)
## Issues Fixed ### From PR #271 (Issues #2430-2459): - #2448: Validate URLs for control characters (security) - #2449: Decode HTML entities in scraped URLs - #2450: Reject reserved command names for agent names - #2452: Expand tilde (~) in MCP server command paths - #2459: Validate that model name is not empty ### From PR #273 (Issues #2564-2614): - Model format validation improvements ### From PR #279 (Issues #2835-2903): - #2835: Sandbox exit code preservation (always return original exit code) - #2843: Empty session ID validation ## Changes ### Agent Creation (#2450) - Added validation to reject reserved CLI command names as agent names - Reserved names: help, version, run, exec, login, logout, mcp, agent, etc. ### Scrape Command (#2448, #2449) - Added validate_url_security() to reject URLs with control characters - Added decode_html_entities() to properly decode href attributes - Added comprehensive tests for both functions ### MCP Command (#2452) - Added tilde expansion in command paths (~/script.sh -> /home/user/script.sh) - Also expands tilde in command arguments ### Model Validation (#2459) - Added check for empty model name in TUI mode ### Sandbox (#2835) - Changed exit behavior to always preserve original command exit code ### Session Handling (#2843) - Added early validation for empty session IDs Note: CI skipped for cost control. Test manually before merge. Co-authored-by: Droid Agent <droid@factory.ai>
1 parent 176349a commit d581c5e

7 files changed

Lines changed: 265 additions & 20 deletions

File tree

cortex-cli/src/agent_cmd.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,6 +1229,46 @@ async fn run_create(args: CreateArgs) -> Result<()> {
12291229
bail!("Agent name must contain only alphanumeric characters, hyphens, and underscores");
12301230
}
12311231

1232+
// Check for reserved command names that would conflict with cortex CLI commands (#2450)
1233+
let reserved_names = [
1234+
"help",
1235+
"version",
1236+
"run",
1237+
"exec",
1238+
"login",
1239+
"logout",
1240+
"mcp",
1241+
"agent",
1242+
"resume",
1243+
"sessions",
1244+
"export",
1245+
"import",
1246+
"config",
1247+
"serve",
1248+
"models",
1249+
"upgrade",
1250+
"uninstall",
1251+
"stats",
1252+
"github",
1253+
"pr",
1254+
"scrape",
1255+
"acp",
1256+
"debug",
1257+
"servers",
1258+
"sandbox",
1259+
"completion",
1260+
"features",
1261+
];
1262+
let name_lower = name.to_lowercase();
1263+
if reserved_names.contains(&name_lower.as_str()) {
1264+
bail!(
1265+
"Error: '{}' is a reserved command name and cannot be used as an agent name.\n\
1266+
Reserved names: {}",
1267+
name,
1268+
reserved_names.join(", ")
1269+
);
1270+
}
1271+
12321272
// Get description
12331273
let description = if let Some(ref d) = args.description {
12341274
d.clone()

cortex-cli/src/debug_sandbox.rs

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,8 @@ pub async fn run_command_under_seatbelt(
3535
.status()
3636
.await?;
3737

38-
if !status.success() {
39-
std::process::exit(status.code().unwrap_or(1));
40-
}
41-
42-
Ok(())
38+
// Preserve the original exit code from the command (#2835)
39+
std::process::exit(status.code().unwrap_or(1));
4340
}
4441

4542
#[cfg(not(target_os = "macos"))]
@@ -77,18 +74,15 @@ pub async fn run_command_under_landlock(
7774
.status()
7875
.await?;
7976

80-
if !status.success() {
81-
std::process::exit(status.code().unwrap_or(1));
82-
}
77+
// Preserve the original exit code from the command (#2835)
78+
std::process::exit(status.code().unwrap_or(1));
8379
}
8480

8581
#[cfg(not(target_os = "linux"))]
8682
{
8783
let _ = sandbox_mode;
8884
anyhow::bail!("Landlock sandbox is only available on Linux");
8985
}
90-
91-
Ok(())
9286
}
9387

9488
/// Run a command under Windows sandbox.
@@ -117,18 +111,15 @@ pub async fn run_command_under_windows(
117111
.status()
118112
.await?;
119113

120-
if !status.success() {
121-
std::process::exit(status.code().unwrap_or(1));
122-
}
114+
// Preserve the original exit code from the command (#2835)
115+
std::process::exit(status.code().unwrap_or(1));
123116
}
124117

125118
#[cfg(not(windows))]
126119
{
127120
let _ = sandbox_mode;
128121
anyhow::bail!("Windows sandbox is only available on Windows");
129122
}
130-
131-
Ok(())
132123
}
133124

134125
/// Create sandbox mode based on full_auto flag.

cortex-cli/src/import_cmd.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ fn validate_no_circular_references(messages: &[ExportMessage]) -> Result<()> {
372372
/// Returns a clear error message if invalid data is found.
373373
fn validate_export_messages(messages: &[ExportMessage]) -> Result<()> {
374374
use base64::Engine;
375-
375+
376376
for (idx, message) in messages.iter().enumerate() {
377377
// Check for base64-encoded image data in content
378378
// Common pattern: "data:image/png;base64,..." or "data:image/jpeg;base64,..."

cortex-cli/src/main.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,12 @@ async fn run_tui(initial_prompt: Option<String>, args: &InteractiveArgs) -> Resu
10811081

10821082
// Apply model override if specified, resolving alias (e.g., "sonnet" -> "anthropic/claude-sonnet-4-20250514")
10831083
if let Some(ref model) = args.model {
1084+
// Validate model name is not empty (#2459)
1085+
if model.trim().is_empty() {
1086+
bail!(
1087+
"Error: Model name cannot be empty. Please provide a valid model name (e.g., 'gpt-4', 'claude-sonnet-4-20250514')."
1088+
);
1089+
}
10841090
config.model = resolve_model_alias(model).to_string();
10851091
}
10861092

@@ -2160,7 +2166,8 @@ async fn run_serve(serve_cli: ServeCommand) -> Result<()> {
21602166
);
21612167
}
21622168
// Check for permission denied (trying to bind to privileged port)
2163-
if sanitized_err.contains("Permission denied") || sanitized_err.contains("os error 13") {
2169+
if sanitized_err.contains("Permission denied") || sanitized_err.contains("os error 13")
2170+
{
21642171
bail!(
21652172
"Error: Permission denied binding to port {}.\n\
21662173
Hint: Ports below 1024 require root/admin privileges. Try a port >= 1024.",

cortex-cli/src/mcp_cmd.rs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -811,10 +811,41 @@ async fn run_add(args: AddArgs) -> Result<()> {
811811
}
812812

813813
let mut command_parts = stdio.command.into_iter();
814-
let command_bin = command_parts
814+
let command_bin_raw = command_parts
815815
.next()
816816
.ok_or_else(|| anyhow::anyhow!("command is required"))?;
817-
let command_args: Vec<String> = command_parts.collect();
817+
818+
// Expand tilde in command path (e.g., ~/script.sh -> /home/user/script.sh) (#2452)
819+
let command_bin = if command_bin_raw.starts_with("~/") {
820+
if let Some(home) = dirs::home_dir() {
821+
home.join(&command_bin_raw[2..])
822+
.to_string_lossy()
823+
.to_string()
824+
} else {
825+
tracing::warn!(
826+
"Could not expand ~ in command path (home directory not found): {}",
827+
command_bin_raw
828+
);
829+
command_bin_raw
830+
}
831+
} else {
832+
command_bin_raw
833+
};
834+
835+
// Also expand tilde in command arguments
836+
let command_args: Vec<String> = command_parts
837+
.map(|arg| {
838+
if arg.starts_with("~/") {
839+
if let Some(home) = dirs::home_dir() {
840+
home.join(&arg[2..]).to_string_lossy().to_string()
841+
} else {
842+
arg
843+
}
844+
} else {
845+
arg
846+
}
847+
})
848+
.collect();
818849

819850
// Sanitize command_bin for TOML (escape special characters)
820851
let command_bin_escaped = command_bin.replace('\\', "\\\\").replace('"', "\\\"");

cortex-cli/src/run_cmd.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,6 +1334,13 @@ enum SessionMode {
13341334
/// Resolve and validate a session ID, supporting both full UUID and 8-char short IDs.
13351335
/// Returns the full ConversationId if the session exists.
13361336
fn resolve_session_id(session_id: &str, cortex_home: &PathBuf) -> Result<ConversationId> {
1337+
// Check for empty session ID first (#2843)
1338+
if session_id.is_empty() {
1339+
bail!(
1340+
"Session ID cannot be empty. Use 'cortex sessions' to list available sessions, or 'cortex run -c' to continue the last session."
1341+
);
1342+
}
1343+
13371344
// Validate session ID format - must contain only alphanumeric, hyphens, and underscores
13381345
if !session_id
13391346
.chars()

cortex-cli/src/scrape_cmd.rs

Lines changed: 170 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use clap::Parser;
77
use cortex_engine::create_client_builder;
88
use reqwest::Client;
99
use scraper::{Html, Selector};
10+
use std::borrow::Cow;
1011
use std::collections::HashMap;
1112
use std::io::Write;
1213
use std::path::PathBuf;
@@ -101,6 +102,107 @@ pub struct ScrapeCommand {
101102
pub verbose: bool,
102103
}
103104

105+
/// Decode common HTML entities in URLs (#2449).
106+
///
107+
/// HTML attributes like href often contain encoded entities like `&amp;` for `&`.
108+
/// This function decodes the most common HTML entities to produce valid URLs.
109+
fn decode_html_entities(text: &str) -> Cow<'_, str> {
110+
// Quick check: if no & character, return as-is
111+
if !text.contains('&') {
112+
return Cow::Borrowed(text);
113+
}
114+
115+
let mut result = String::with_capacity(text.len());
116+
let mut chars = text.chars().peekable();
117+
118+
while let Some(c) = chars.next() {
119+
if c == '&' {
120+
// Collect entity name until ; or end
121+
let mut entity = String::new();
122+
let mut found_semi = false;
123+
124+
while let Some(&next) = chars.peek() {
125+
if next == ';' {
126+
chars.next(); // consume semicolon
127+
found_semi = true;
128+
break;
129+
}
130+
if next == '&' || (!next.is_ascii_alphanumeric() && next != '#') {
131+
// Not a valid entity, stop collecting
132+
break;
133+
}
134+
entity.push(chars.next().unwrap());
135+
// Limit entity length to prevent DoS
136+
if entity.len() > 10 {
137+
break;
138+
}
139+
}
140+
141+
if found_semi {
142+
// Decode known entities
143+
let decoded = match entity.as_str() {
144+
"amp" => "&",
145+
"lt" => "<",
146+
"gt" => ">",
147+
"quot" => "\"",
148+
"apos" => "'",
149+
"nbsp" => " ",
150+
"#38" => "&",
151+
"#60" => "<",
152+
"#62" => ">",
153+
"#34" => "\"",
154+
"#39" => "'",
155+
_ => {
156+
// Unknown entity, keep as-is
157+
result.push('&');
158+
result.push_str(&entity);
159+
result.push(';');
160+
continue;
161+
}
162+
};
163+
result.push_str(decoded);
164+
} else {
165+
// Not a valid entity, output what we collected
166+
result.push('&');
167+
result.push_str(&entity);
168+
}
169+
} else {
170+
result.push(c);
171+
}
172+
}
173+
174+
Cow::Owned(result)
175+
}
176+
177+
/// Validate and sanitize URL for security (#2448).
178+
///
179+
/// Rejects URLs containing:
180+
/// - Control characters (including null bytes) which could enable request smuggling
181+
/// - Invalid percent-encoded sequences
182+
fn validate_url_security(url: &str) -> Result<()> {
183+
// Check for control characters (ASCII 0-31, 127)
184+
for (i, c) in url.chars().enumerate() {
185+
if c.is_control() {
186+
bail!(
187+
"URL contains control character at position {} (code: {}). \
188+
Control characters in URLs can enable HTTP request smuggling attacks.",
189+
i,
190+
c as u32
191+
);
192+
}
193+
}
194+
195+
// Check for null bytes in percent-encoded form
196+
if url.contains("%00") || url.contains("%0") {
197+
bail!(
198+
"URL contains percent-encoded null byte (%00). \
199+
This can cause security vulnerabilities."
200+
);
201+
}
202+
203+
Ok(())
204+
}
205+
104206
impl ScrapeCommand {
105207
/// Run the scrape command.
106208
pub async fn run(self) -> Result<()> {
@@ -109,6 +211,9 @@ impl ScrapeCommand {
109211
bail!("URL cannot be empty");
110212
}
111213

214+
// Validate URL for security (control characters, etc.) (#2448)
215+
validate_url_security(&self.url)?;
216+
112217
// Parse output format
113218
let format: OutputFormat = self.format.parse()?;
114219

@@ -654,7 +759,9 @@ fn process_node_to_markdown(
654759
no_links,
655760
);
656761
} else {
657-
let href = elem.attr("href").unwrap_or("");
762+
let href_raw = elem.attr("href").unwrap_or("");
763+
// Decode HTML entities in href (e.g., &amp; -> &) (#2449)
764+
let href = decode_html_entities(href_raw);
658765
output.push('[');
659766
process_node_to_markdown(
660767
element_ref,
@@ -1240,4 +1347,66 @@ mod tests {
12401347
"Client should build successfully with timeout 0"
12411348
);
12421349
}
1350+
1351+
#[test]
1352+
fn test_html_to_markdown_links_with_html_entities() {
1353+
// Test that HTML entities in URLs are decoded properly (Issue #2449)
1354+
let html = r#"<a href="/search?q=test&amp;page=2">Next</a>"#;
1355+
let md = html_to_markdown(html, false, false);
1356+
assert!(
1357+
md.contains("[Next](/search?q=test&page=2)"),
1358+
"Expected decoded URL with &, got: {}",
1359+
md
1360+
);
1361+
1362+
// Multiple entities
1363+
let html = r#"<a href="/path?a=1&amp;b=2&amp;c=3">Link</a>"#;
1364+
let md = html_to_markdown(html, false, false);
1365+
assert!(md.contains("/path?a=1&b=2&c=3"));
1366+
}
1367+
1368+
#[test]
1369+
fn test_decode_html_entities() {
1370+
// Basic entities
1371+
assert_eq!(decode_html_entities("&amp;").as_ref(), "&");
1372+
assert_eq!(decode_html_entities("&lt;").as_ref(), "<");
1373+
assert_eq!(decode_html_entities("&gt;").as_ref(), ">");
1374+
assert_eq!(decode_html_entities("&quot;").as_ref(), "\"");
1375+
1376+
// URL with multiple entities
1377+
assert_eq!(
1378+
decode_html_entities("/search?q=test&amp;page=2").as_ref(),
1379+
"/search?q=test&page=2"
1380+
);
1381+
1382+
// No entities - should return borrowed
1383+
let result = decode_html_entities("plain text");
1384+
assert!(matches!(result, Cow::Borrowed(_)));
1385+
1386+
// Mixed content
1387+
assert_eq!(
1388+
decode_html_entities("before &amp; after").as_ref(),
1389+
"before & after"
1390+
);
1391+
1392+
// Incomplete entity (no semicolon)
1393+
assert_eq!(decode_html_entities("&amp text").as_ref(), "&amp text");
1394+
1395+
// Unknown entity
1396+
assert_eq!(decode_html_entities("&unknown;").as_ref(), "&unknown;");
1397+
}
1398+
1399+
#[test]
1400+
fn test_validate_url_security() {
1401+
// Valid URLs should pass
1402+
assert!(validate_url_security("https://example.com/path").is_ok());
1403+
assert!(validate_url_security("https://example.com/search?q=test&page=2").is_ok());
1404+
1405+
// URLs with control characters should fail
1406+
assert!(validate_url_security("https://example.com/\0path").is_err());
1407+
assert!(validate_url_security("https://example.com/\npath").is_err());
1408+
1409+
// URLs with percent-encoded null bytes should fail
1410+
assert!(validate_url_security("https://example.com/%00path").is_err());
1411+
}
12431412
}

0 commit comments

Comments
 (0)