Skip to content

Commit cfe2a64

Browse files
committed
review comments
Signed-off-by: Akihiko Kuroda <akihikokuroda2020@gmail.com>
1 parent aa295a3 commit cfe2a64

3 files changed

Lines changed: 67 additions & 18 deletions

File tree

docs/examples/tools/shell_example.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ def example_3_llm_with_forced_tool_use(m: MelleaSession) -> None:
8484
8585
This mirrors the Python interpreter pattern: ask the LLM to generate
8686
a bash command, force it to use the tool, then execute the command.
87+
88+
Requirements:
89+
- Ollama running locally (or compatible LLM configured)
90+
- Run: ollama serve
8791
"""
8892
print("=== Example 3: LLM-Generated Bash Commands with Forced Tool Use ===")
8993

@@ -99,15 +103,32 @@ def example_3_llm_with_forced_tool_use(m: MelleaSession) -> None:
99103
if result.tool_calls is None:
100104
raise ValueError("Expected tool_calls but got None")
101105

106+
if "unsafe_local_bash_executor" not in result.tool_calls:
107+
available_tools = list(result.tool_calls.keys())
108+
raise ValueError(
109+
f"Expected tool 'unsafe_local_bash_executor' in tool_calls, "
110+
f"but got: {available_tools}"
111+
)
112+
102113
# Extract the bash command the LLM generated
103-
command = result.tool_calls["unsafe_local_bash_executor"].args["command"]
114+
tool_call = result.tool_calls["unsafe_local_bash_executor"]
115+
if "command" not in tool_call.args:
116+
raise ValueError(
117+
f"Expected 'command' argument in tool call args, "
118+
f"but got: {list(tool_call.args.keys())}"
119+
)
120+
121+
command = tool_call.args["command"]
104122
print(f"LLM generated bash command:\n {command}\n")
105123

106124
# Execute the command
107-
exec_result = result.tool_calls["unsafe_local_bash_executor"].call_func()
125+
exec_result = tool_call.call_func()
108126

109127
print("Execution result:")
110128
print(f" Success: {exec_result.success}")
129+
print(f" Skipped: {exec_result.skipped}")
130+
if exec_result.skip_message:
131+
print(f" Skip reason: {exec_result.skip_message}")
111132
print(f" Output: {exec_result.stdout}")
112133
if exec_result.stderr:
113134
print(f" Error: {exec_result.stderr}")
@@ -202,10 +223,15 @@ def example_5_error_handling() -> None:
202223
example_1_direct_execution()
203224
example_2_wrapped_as_tool()
204225

205-
# Example 3 requires a running Mellea session with LLM (Ollama recommended)
206-
# Uncomment to run with LLM:
207-
# m = start_session()
208-
# example_3_llm_with_forced_tool_use(m)
226+
# Example 3: Run with LLM-based tool calling (requires Ollama or compatible LLM)
227+
# Uncomment these lines to test LLM-generated commands:
228+
# try:
229+
# m = start_session()
230+
# example_3_llm_with_forced_tool_use(m)
231+
# except Exception as e:
232+
# print(f"Example 3 skipped: {e!s}")
233+
# print(" Requires: Ollama running locally or compatible LLM configured")
234+
# print(" See: https://docs.ollama.ai/")
209235

210236
example_3_with_working_dir()
211237
example_4_safety_features()

mellea/stdlib/tools/shell.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
``working_dir`` and ``allowed_paths``. The top-level ``bash_executor`` (recommended
1010
for production) and ``unsafe_local_bash_executor`` (development-only) functions are
1111
ready to be wrapped as ``MelleaTool`` instances for ReACT or other agentic loops.
12+
13+
Security note: The denylist covers inline code execution (e.g., bash -c, python -e) and
14+
dangerous commands in argv. However, it does not prevent execution of pre-existing
15+
script files (e.g., bash script.sh, python script.py), which can execute arbitrary
16+
code from the file. For untrusted inputs, ensure that script files are either absent
17+
or come from a trusted source.
1218
"""
1319

1420
import shlex
@@ -138,10 +144,18 @@ def _is_dangerous_command(argv: list[str]) -> tuple[bool, str]:
138144
)
139145

140146
# Check if any argument is a dangerous command (e.g., env sudo, timeout sudo)
141-
# Only check arguments that are not paths (don't contain / or are not flag values)
147+
# Only check positional arguments that are not paths or flag values.
148+
# Known value-taking flags that consume the next argument (space-separated only):
149+
flag_value_flags = {
150+
"-c", "--config", "-f", "--file", "-o", "--output",
151+
"-i", "--input", "-d", "--dir", "-p", "--path",
152+
"-t", "--timeout", "-w", "--wait",
153+
}
142154
for i, arg in enumerate(argv[1:], start=1):
143-
# Skip if this looks like a flag value (argument to a preceding flag)
144-
if i > 1 and argv[i - 1].startswith("-"):
155+
# Skip if this argument is the value for a preceding flag (space-separated)
156+
# E.g., in "timeout -t 10 sudo", skip "10" (it's the value for -t)
157+
# But don't skip "sudo" when the flag uses = notation (e.g., --kill-after=1)
158+
if i > 1 and argv[i - 1] in flag_value_flags:
145159
continue
146160
# Skip if argument contains / (it's a path, not a command name)
147161
if "/" in arg:
@@ -640,11 +654,11 @@ def execute(self, command: str) -> ExecutionResult:
640654
)
641655

642656
sandbox_workdir = self.working_dir or "/sandbox"
643-
shell_command = " ".join(shlex.quote(arg) for arg in argv)
657+
# Pass argv as a list (not shell string) to avoid re-parse and unnecessary quoting
644658
python_wrapper = (
645659
"import subprocess\n"
646660
"import sys\n"
647-
f"result = subprocess.run({shell_command!r}, shell=True, cwd={sandbox_workdir!r}, "
661+
f"result = subprocess.run({argv!r}, shell=False, cwd={sandbox_workdir!r}, "
648662
"capture_output=True, text=True)\n"
649663
"sys.stdout.write(result.stdout)\n"
650664
"sys.stderr.write(result.stderr)\n"

test/stdlib/tools/test_shell.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,17 @@ def test_timeout_with_sudo_rejected(self) -> None:
150150
assert result.skip_message is not None
151151
assert "not allowed" in result.skip_message.lower()
152152

153+
def test_timeout_with_flag_value_and_sudo_rejected(self) -> None:
154+
"""timeout with --kill-after=value and sudo should be rejected (checks value-taking flags)."""
155+
env = StaticBashEnvironment()
156+
# Regression test: ensure sudo is detected despite --kill-after=1 consuming the value
157+
result = env.execute("timeout --kill-after=1 sudo whoami")
158+
159+
assert result.skipped is True
160+
assert result.success is False
161+
assert result.skip_message is not None
162+
assert "not allowed" in result.skip_message.lower()
163+
153164
def test_python_c_arbitrary_code_rejected(self) -> None:
154165
"""python -c with arbitrary code should be rejected."""
155166
env = StaticBashEnvironment()
@@ -363,18 +374,16 @@ class TestWorkingDirRestriction:
363374
"""Tests for working directory restrictions."""
364375

365376
def test_working_dir_restriction_blocks_outside_writes(self) -> None:
366-
"""Writing outside working_dir should be rejected."""
377+
"""Writing outside working_dir should be rejected by working_dir check."""
367378
env = StaticBashEnvironment(working_dir="/home/user/project")
368-
result = env.execute("touch /var/log/test.log")
379+
# Use a safe path that is not in DANGEROUS_PATHS (so working_dir check fires first)
380+
result = env.execute("touch /home/other/file.txt")
369381

370382
assert result.skipped is True
371383
assert result.success is False
372-
# Could be blocked by either path restriction or working dir restriction
373384
assert result.skip_message is not None
374-
assert (
375-
"not allowed" in result.skip_message.lower()
376-
or "outside" in result.skip_message.lower()
377-
)
385+
# Must be rejected by working_dir check, not dangerous-path check
386+
assert "outside" in result.skip_message.lower()
378387

379388
def test_working_dir_allows_inside_writes(self) -> None:
380389
"""Writing inside working_dir should be allowed."""

0 commit comments

Comments
 (0)