Skip to content

Commit 2b220de

Browse files
committed
Walk all sub-commands of a compound shell for permission and volatile-args checks
1 parent db4264c commit 2b220de

2 files changed

Lines changed: 302 additions & 18 deletions

File tree

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ All notable changes to Sofos are documented in this file.
44

55
## [Unreleased]
66

7+
### Changed
8+
9+
- **Permission check walks every sub-command of a compound shell.** A `for f in *.rs; do echo $f; sed -n '1,320p' $f | nl -ba; done` pipeline used to hit `Ask` because `extract_base_command` only saw the structural keyword `for`, even though every step (`echo`, `sed`, `nl`) is on the built-in read-only allow-list. `check_command_permission` now splits on `;`, `\n`, `|`, `||`, `&&` (quote-aware; `2>&1` preserved), strips shell-control prefixes (`do` / `done` / `then` / `else` / `elif` / `fi` / `case` / `esac` / `{` / `}` / `(`, `while` / `until` / `if` heads, `for VAR in WORDS` headers, `# comment` segments), and auto-allows when every resulting base is in `allowed_commands`. Volatile-args detection (`sed -n '1,N'p`, `head -n N`, `grep -A N`, `awk 'NR==N'`) uses the same splitter, so the Yes/No-only prompt fires for volatile sub-commands buried inside a `for` loop or `&&` chain — not just inside a `|` pipeline.
10+
11+
### Security
12+
13+
- **`cat foo && rm bar` no longer slips past as Allowed.** `extract_base_command` returned `cat`, which is on the built-in allow-list, so the smuggled `rm` was never looked up against the forbidden set. The new compound-aware check denies the whole pipeline as soon as any base is in `forbidden_commands` (`rm`, `chmod`, `sudo`, …). The `$(...)` / backtick substitution blind spot is unchanged — a command smuggled inside a substitution is still invisible to the permission system, both before and after this change.
14+
715
## [0.2.6] - 2026-05-01
816

917
### Fixed

src/tools/permissions.rs

Lines changed: 294 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,24 @@ use std::path::PathBuf;
99
const LOCAL_CONFIG_FILE: &str = ".sofos/config.local.toml";
1010
const GLOBAL_CONFIG_FILE: &str = ".sofos/config.toml";
1111

12+
/// POSIX shell control keywords that punctuate compound commands but
13+
/// don't introduce one of their own. Stripped from segment heads so the
14+
/// next non-keyword token is the base command we actually want to inspect.
15+
const COMPOUND_KEYWORDS: &[&str] = &[
16+
"do", "done", "then", "else", "elif", "fi", "case", "esac", "{", "}", "(",
17+
];
18+
19+
/// Loop / conditional headers whose tail *is* a real command —
20+
/// `while CMD; do ...`, `until CMD; do ...`, `if CMD; then ...`. The
21+
/// keyword itself is skipped, and the rest of the segment is parsed
22+
/// like any other command.
23+
const COMPOUND_HEADERS_WITH_BODY: &[&str] = &["while", "until", "if"];
24+
25+
/// Loop headers whose segment is a word list, *not* a command —
26+
/// `for VAR in WORDS`. Matching one means the segment carries no
27+
/// base command at all and should yield no entry.
28+
const COMPOUND_HEADERS_NO_BODY: &[&str] = &["for"];
29+
1230
/// Match POSIX-shell `KEY=value` assignment tokens: the key starts with
1331
/// a letter or underscore, is alphanumeric+`_` throughout, and is
1432
/// followed by `=`. Used by `extract_base_command` to skip leading
@@ -745,14 +763,37 @@ impl PermissionManager {
745763
return Ok(CommandPermission::Denied);
746764
}
747765

748-
if self.allowed_commands.contains(base_command) {
749-
return Ok(CommandPermission::Allowed);
750-
}
766+
// Walk every sub-command in a compound shell (`for ...; do ...; done`,
767+
// `cmd1 && cmd2`, `cmd1; cmd2 | cmd3`) so the verdict reflects the
768+
// whole pipeline, not just the first token. Two reasons:
769+
//
770+
// 1. `for f in *; do echo $f; sed -n '1,N'p $f; done` leads with the
771+
// structural keyword `for`, which isn't in `allowed_commands` —
772+
// the old single-token lookup forced a prompt even though every
773+
// real step is read-only. Bases pulled from the compound let the
774+
// same shell auto-allow.
775+
// 2. `cat foo && rm bar` used to slip past as Allowed because `cat`
776+
// is on the allow-list; the smuggled `rm` was never seen. Any
777+
// forbidden base anywhere in the pipeline now wins.
778+
//
779+
// The splitter does NOT descend into `$(...)` / backticks, so a
780+
// command smuggled there is still seen as part of the parent
781+
// command's args (same blind spot as before this change).
782+
let compound_bases = Self::enumerate_compound_bases(command);
783+
let bases: Vec<&str> = if compound_bases.is_empty() {
784+
vec![base_command]
785+
} else {
786+
compound_bases.iter().map(String::as_str).collect()
787+
};
751788

752-
if self.forbidden_commands.contains(base_command) {
789+
if bases.iter().any(|&b| self.forbidden_commands.contains(b)) {
753790
return Ok(CommandPermission::Denied);
754791
}
755792

793+
if bases.iter().all(|&b| self.allowed_commands.contains(b)) {
794+
return Ok(CommandPermission::Allowed);
795+
}
796+
756797
Ok(CommandPermission::Ask)
757798
}
758799

@@ -788,9 +829,110 @@ impl PermissionManager {
788829
Ok((confirmed, remember))
789830
}
790831

832+
/// Quote-aware split of a compound command on shell separators
833+
/// (`;`, `\n`, `|`, `||`, `&&`). Lone `&` is preserved so that
834+
/// `2>&1` stays glued to its preceding token. Quoted regions are
835+
/// kept whole so `echo 'a; b'` doesn't split mid-string.
836+
///
837+
/// Does NOT descend into `$(...)` command substitution or backtick
838+
/// substitution — a `rm` smuggled inside `echo $(rm bad)` is
839+
/// invisible to this splitter (and to the rest of the permission
840+
/// system, both before and after this change). Closing that hole
841+
/// would require yielding the inner sub-commands as separate
842+
/// segments and is intentionally out of scope here.
843+
fn split_compound_command(command: &str) -> Vec<String> {
844+
let mut segments: Vec<String> = Vec::new();
845+
let mut current = String::new();
846+
let mut chars = command.chars().peekable();
847+
let mut quote: Option<char> = None;
848+
849+
while let Some(c) = chars.next() {
850+
if let Some(q) = quote {
851+
current.push(c);
852+
if c == q {
853+
quote = None;
854+
}
855+
continue;
856+
}
857+
match c {
858+
'\'' | '"' => {
859+
quote = Some(c);
860+
current.push(c);
861+
}
862+
';' | '\n' => Self::push_segment(&mut segments, &mut current),
863+
'|' => {
864+
if chars.peek() == Some(&'|') {
865+
chars.next();
866+
}
867+
Self::push_segment(&mut segments, &mut current);
868+
}
869+
'&' if chars.peek() == Some(&'&') => {
870+
chars.next();
871+
Self::push_segment(&mut segments, &mut current);
872+
}
873+
_ => current.push(c),
874+
}
875+
}
876+
Self::push_segment(&mut segments, &mut current);
877+
segments
878+
}
879+
880+
fn push_segment(segments: &mut Vec<String>, current: &mut String) {
881+
let trimmed = current.trim();
882+
if !trimmed.is_empty() {
883+
segments.push(trimmed.to_string());
884+
}
885+
current.clear();
886+
}
887+
888+
/// Strip leading shell-control prefixes from a segment and return
889+
/// the next "real" command (base + args). Returns `None` for
890+
/// segments that carry no command of their own — `for VAR in WORDS`
891+
/// loop headers, bare `done` / `fi` / `esac` closers, or shell
892+
/// comments (`# anything`).
893+
fn extract_segment_base_with_args(segment: &str) -> Option<(&str, Vec<&str>)> {
894+
let mut tokens = segment.split_whitespace().peekable();
895+
while let Some(&tok) = tokens.peek() {
896+
if COMPOUND_HEADERS_NO_BODY.contains(&tok) {
897+
return None;
898+
}
899+
if is_env_assignment(tok)
900+
|| COMPOUND_KEYWORDS.contains(&tok)
901+
|| COMPOUND_HEADERS_WITH_BODY.contains(&tok)
902+
{
903+
tokens.next();
904+
continue;
905+
}
906+
break;
907+
}
908+
let base = tokens.next()?;
909+
// A bare `#` (or token starting with `#`) at the head of a
910+
// segment marks the rest as a comment — `ls; # tail msg`
911+
// splits to `["ls", "# tail msg"]`, and the second segment is
912+
// entirely commentary, not a command. Treating it as a base
913+
// would force a needless Ask prompt for what the shell ignores.
914+
if base.starts_with('#') {
915+
return None;
916+
}
917+
let args: Vec<&str> = tokens.collect();
918+
Some((base, args))
919+
}
920+
921+
/// Base-command names of every sub-command in a compound shell.
922+
/// Empty for a single command with no separators (callers fall back
923+
/// to `extract_base_command` in that case).
924+
fn enumerate_compound_bases(command: &str) -> Vec<String> {
925+
Self::split_compound_command(command)
926+
.iter()
927+
.filter_map(|seg| {
928+
Self::extract_segment_base_with_args(seg).map(|(base, _)| base.to_string())
929+
})
930+
.collect()
931+
}
932+
791933
/// Heuristic: does `command` carry line-number / line-range args that
792-
/// make the exact string un-rememberable? Checks each pipe segment
793-
/// for:
934+
/// make the exact string un-rememberable? Walks every sub-command of
935+
/// a compound shell looking for:
794936
///
795937
/// - sed numeric addresses: `sed -n '10,20p'`, `sed '5d'`, `sed 1,5q`
796938
/// - head/tail numeric counts: `head -50`, `head -n 50`, `tail +20`
@@ -800,22 +942,15 @@ impl PermissionManager {
800942
/// False positives just downgrade the prompt to Yes/No, so the
801943
/// heuristic prefers simple matches over exhaustive parsing.
802944
fn command_has_volatile_line_args(command: &str) -> bool {
803-
command
804-
.split('|')
805-
.any(|segment| Self::segment_has_volatile_line_args(segment.trim()))
945+
Self::split_compound_command(command)
946+
.iter()
947+
.any(|segment| Self::segment_has_volatile_line_args(segment))
806948
}
807949

808950
fn segment_has_volatile_line_args(segment: &str) -> bool {
809-
let mut tokens = segment.split_whitespace();
810-
// Skip leading env-var assignments the same way extract_base_command does.
811-
let base = loop {
812-
match tokens.next() {
813-
Some(t) if is_env_assignment(t) => continue,
814-
Some(t) => break t,
815-
None => return false,
816-
}
951+
let Some((base, args)) = Self::extract_segment_base_with_args(segment) else {
952+
return false;
817953
};
818-
let args: Vec<&str> = tokens.collect();
819954
match base {
820955
"sed" => Self::sed_has_numeric_address(&args),
821956
"head" | "tail" => Self::head_tail_has_numeric_count(&args),
@@ -2052,4 +2187,145 @@ ask = []
20522187
"ls -la src/"
20532188
));
20542189
}
2190+
2191+
/// Volatile sed/head/tail buried inside a `for ...; do ...; done`
2192+
/// loop or behind `;` / `&&` should still be detected — the old
2193+
/// `split('|')` pass missed everything that wasn't pipe-separated.
2194+
#[test]
2195+
fn test_volatile_line_args_inside_compound_shell() {
2196+
assert!(PermissionManager::command_has_volatile_line_args(
2197+
"for f in src/*.rs; do sed -n '1,320p' \"$f\" | nl -ba; done"
2198+
));
2199+
assert!(PermissionManager::command_has_volatile_line_args(
2200+
"cat README.md && head -n 50 CHANGELOG.md"
2201+
));
2202+
assert!(PermissionManager::command_has_volatile_line_args(
2203+
"echo start; tail -n 20 build.log"
2204+
));
2205+
assert!(!PermissionManager::command_has_volatile_line_args(
2206+
"for f in *.rs; do echo \"$f\"; cat \"$f\"; done"
2207+
));
2208+
}
2209+
2210+
/// `;` and `&&` inside quotes must NOT split a segment — `echo 'a; b'`
2211+
/// is still a single `echo` command.
2212+
#[test]
2213+
fn test_split_compound_command_respects_quotes() {
2214+
let segs = PermissionManager::split_compound_command("echo 'a; b' && ls");
2215+
assert_eq!(segs, vec!["echo 'a; b'", "ls"]);
2216+
2217+
let segs = PermissionManager::split_compound_command("echo \"x | y\" | wc -l");
2218+
assert_eq!(segs, vec!["echo \"x | y\"", "wc -l"]);
2219+
}
2220+
2221+
/// Lone `&` is part of `2>&1`, not a separator. The pre-existing
2222+
/// `2>&1` allowance in `is_safe_command_structure` would be undone
2223+
/// if our splitter chopped commands at every `&`.
2224+
#[test]
2225+
fn test_split_compound_command_keeps_stderr_redirect() {
2226+
let segs = PermissionManager::split_compound_command("cargo test 2>&1 | tee out.log");
2227+
assert_eq!(segs, vec!["cargo test 2>&1", "tee out.log"]);
2228+
}
2229+
2230+
/// The user-reported regression: a `for ...; do echo; sed; nl; done`
2231+
/// pipeline of read-only tools should resolve as Allowed without a
2232+
/// prompt, not get stuck on the `for` keyword.
2233+
#[test]
2234+
fn compound_for_loop_of_allowed_commands_is_allowed() {
2235+
let temp_dir = TempDir::new().unwrap();
2236+
let mut manager = PermissionManager::new(temp_dir.path().to_path_buf()).unwrap();
2237+
2238+
assert_eq!(
2239+
manager
2240+
.check_command_permission(
2241+
"for f in src/recipient/*.rs src/recipient/native/*.rs; \
2242+
do echo '===== '\"$f\"' ====='; sed -n '1,320p' \"$f\" | nl -ba; done"
2243+
)
2244+
.unwrap(),
2245+
CommandPermission::Allowed
2246+
);
2247+
}
2248+
2249+
/// A forbidden base anywhere inside a compound shell wins over an
2250+
/// allowed leader. Closes the `cat foo && rm bar` smuggling hole.
2251+
#[test]
2252+
fn compound_with_forbidden_base_is_denied() {
2253+
let temp_dir = TempDir::new().unwrap();
2254+
let mut manager = PermissionManager::new(temp_dir.path().to_path_buf()).unwrap();
2255+
2256+
assert_eq!(
2257+
manager
2258+
.check_command_permission("cat foo && rm bar")
2259+
.unwrap(),
2260+
CommandPermission::Denied
2261+
);
2262+
assert_eq!(
2263+
manager
2264+
.check_command_permission("for f in *.rs; do rm \"$f\"; done")
2265+
.unwrap(),
2266+
CommandPermission::Denied
2267+
);
2268+
assert_eq!(
2269+
manager.check_command_permission("ls; sudo whoami").unwrap(),
2270+
CommandPermission::Denied
2271+
);
2272+
}
2273+
2274+
/// Compound shells that include an unknown tool stay at Ask — only
2275+
/// fully-allowed pipelines auto-pass.
2276+
#[test]
2277+
fn compound_with_unknown_base_asks() {
2278+
let temp_dir = TempDir::new().unwrap();
2279+
let mut manager = PermissionManager::new(temp_dir.path().to_path_buf()).unwrap();
2280+
2281+
assert_eq!(
2282+
manager
2283+
.check_command_permission("cat foo && some_custom_tool bar")
2284+
.unwrap(),
2285+
CommandPermission::Ask
2286+
);
2287+
assert_eq!(
2288+
manager
2289+
.check_command_permission("for f in *; do unknown_tool \"$f\"; done")
2290+
.unwrap(),
2291+
CommandPermission::Ask
2292+
);
2293+
}
2294+
2295+
/// `ls; # trailing comment` is shell-equivalent to plain `ls` —
2296+
/// the `#` segment is commentary, not a command, and the verdict
2297+
/// must not regress to Ask just because we now look past the head.
2298+
#[test]
2299+
fn trailing_shell_comment_does_not_force_ask() {
2300+
let temp_dir = TempDir::new().unwrap();
2301+
let mut manager = PermissionManager::new(temp_dir.path().to_path_buf()).unwrap();
2302+
2303+
assert_eq!(
2304+
manager
2305+
.check_command_permission("ls -la; # quick listing")
2306+
.unwrap(),
2307+
CommandPermission::Allowed
2308+
);
2309+
}
2310+
2311+
/// `while CMD; do CMD; done` and `if CMD; then CMD; fi` should
2312+
/// likewise be inspected sub-command by sub-command.
2313+
#[test]
2314+
fn while_and_if_compounds_are_inspected() {
2315+
let temp_dir = TempDir::new().unwrap();
2316+
let mut manager = PermissionManager::new(temp_dir.path().to_path_buf()).unwrap();
2317+
2318+
assert_eq!(
2319+
manager
2320+
.check_command_permission("if grep -q foo file.txt; then echo found; fi")
2321+
.unwrap(),
2322+
CommandPermission::Allowed
2323+
);
2324+
assert_eq!(
2325+
manager
2326+
.check_command_permission("if true; then rm file; fi")
2327+
.unwrap(),
2328+
CommandPermission::Denied
2329+
);
2330+
}
20552331
}

0 commit comments

Comments
 (0)