Skip to content

Commit 55072bb

Browse files
committed
Version to 1.15.1. Clean and extend agent tools. Small fix to discovery error handling.
1 parent 8d99473 commit 55072bb

9 files changed

Lines changed: 259 additions & 43 deletions

File tree

dreadnode/agent/tools/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
)
1717

1818
if t.TYPE_CHECKING:
19-
from dreadnode.agent.tools import fs, planning, reporting, tasking
19+
from dreadnode.agent.tools import execute, fs, memory, planning, reporting, tasking
2020

2121
__all__ = [
2222
"AnyTool",
@@ -28,15 +28,17 @@
2828
"ToolMode",
2929
"Toolset",
3030
"discover_tools_on_obj",
31+
"execute",
3132
"fs",
33+
"memory",
3234
"planning",
3335
"reporting",
3436
"tasking",
3537
"tool",
3638
"tool_method",
3739
]
3840

39-
__lazy_submodules__: list[str] = ["fs", "planning", "reporting", "tasking"]
41+
__lazy_submodules__: list[str] = ["fs", "planning", "reporting", "tasking", "execute", "memory"]
4042
__lazy_components__: dict[str, str] = {}
4143

4244

dreadnode/agent/tools/execute.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import asyncio
2+
import contextlib
3+
import sys
4+
5+
from loguru import logger
6+
7+
from dreadnode.agent.tools.base import tool
8+
9+
10+
@tool(catch=True)
11+
async def command(
12+
cmd: list[str],
13+
*,
14+
timeout: int = 120,
15+
cwd: str | None = None,
16+
env: dict[str, str] | None = None,
17+
) -> str:
18+
"""
19+
Execute a shell command.
20+
21+
Use this tool to run system utilities and command-line programs (e.g., `ls`, `cat`, `grep`). \
22+
It is designed for straightforward, single-shot operations and returns the combined output and error streams.
23+
24+
## Best Practices
25+
- Argument Format: The command and its arguments *must* be provided as a \
26+
list of strings (e.g., `["ls", "-la", "/tmp"]`), not as a single string.
27+
- No Shell Syntax: Does not use a shell. Features like pipes (`|`), \
28+
redirection (`>`), and variable expansion (`$VAR`) are not supported.
29+
- Error on Failure: The tool will raise a `RuntimeError` if the command returns a non-zero exit code.
30+
31+
Args:
32+
cmd: The command to execute, provided as a list of strings.
33+
timeout: Maximum time in seconds to allow for command execution.
34+
cwd: The working directory in which to execute the command.
35+
env: Optional environment variables to set for the command.
36+
"""
37+
try:
38+
command_str = " ".join(cmd)
39+
logger.debug(f"Executing '{command_str}'")
40+
proc = await asyncio.create_subprocess_exec(
41+
*cmd,
42+
stdout=asyncio.subprocess.PIPE,
43+
stderr=asyncio.subprocess.PIPE,
44+
env=env,
45+
cwd=cwd,
46+
)
47+
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
48+
output = stdout.decode() + stderr.decode()
49+
except asyncio.TimeoutError as e:
50+
logger.warning(f"Command '{command_str}' timed out after {timeout} seconds.")
51+
with contextlib.suppress(OSError):
52+
proc.kill()
53+
raise TimeoutError(f"Command timed out after {timeout} seconds") from e
54+
except Exception as e:
55+
logger.error(f"Error executing '{command_str}': {e}")
56+
raise
57+
58+
if proc.returncode != 0:
59+
logger.error(f"Command '{command_str}' failed with return code {proc.returncode}: {output}")
60+
raise RuntimeError(f"Command failed ({proc.returncode}): {output}")
61+
62+
logger.debug(f"Command '{command_str}':\n{output}")
63+
return output
64+
65+
66+
@tool(catch=True)
67+
async def python(code: str, *, timeout: int = 120) -> str:
68+
"""
69+
Execute Python code.
70+
71+
This tool is ideal for tasks that require custom logic like loops and conditionals, \
72+
or for parsing and transforming the output from other tools. Use it to implement a \
73+
sequence of actions, perform file I/O, or create functionality not covered by other \
74+
available tools.
75+
76+
## Best Practices
77+
- Capture Output: Your script *must* print results to standard output (`print(...)`) to be captured.
78+
- Self-Contained: Import all required standard libraries (e.g., `os`, `json`) within the script.
79+
- Handle Errors: Write robust code. Unhandled exceptions in your script will cause the tool to fail.
80+
- String-Based I/O: Ensure all printed output can be represented as a string. Use formats like JSON (`json.dumps`) for complex data.
81+
82+
Args:
83+
code: The Python code to execute as a string.
84+
timeout: Maximum time in seconds to allow for code execution.
85+
"""
86+
try:
87+
logger.debug(f"Executing python:\n{code}")
88+
proc = await asyncio.create_subprocess_exec(
89+
*[sys.executable, "-"],
90+
stdin=asyncio.subprocess.PIPE,
91+
stdout=asyncio.subprocess.PIPE,
92+
stderr=asyncio.subprocess.PIPE,
93+
)
94+
stdout, stderr = await asyncio.wait_for(
95+
proc.communicate(input=code.encode("utf-8")), timeout=timeout
96+
)
97+
output = stdout.decode(errors="ignore") + stderr.decode(errors="ignore")
98+
except asyncio.TimeoutError as e:
99+
with contextlib.suppress(ProcessLookupError):
100+
proc.kill()
101+
raise TimeoutError(f"Execution timed out after {timeout} seconds") from e
102+
except Exception as e:
103+
logger.error(f"Error executing code in Python: {e}")
104+
raise
105+
106+
if proc.returncode != 0:
107+
logger.error(f"Execution failed with return code {proc.returncode}:\n{output}")
108+
raise RuntimeError(f"Execution failed ({proc.returncode}):\n{output}")
109+
110+
logger.debug(f"Execution successful. Output:\n{output}")
111+
return output

dreadnode/agent/tools/memory.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import typing as t
2+
3+
from pydantic import PrivateAttr
4+
5+
from dreadnode.agent.tools import Toolset, tool_method
6+
7+
8+
class Memory(Toolset):
9+
"""
10+
Provides a stateful, in-memory key-value store for the toolset's lifetime.
11+
12+
This toolset allows the agent to save, retrieve, and manage data, enabling it to
13+
remember information across multiple steps and tool calls.
14+
"""
15+
16+
_memory: dict[str, str] = PrivateAttr(default_factory=dict)
17+
18+
@tool_method
19+
def save_memory(
20+
self,
21+
key: t.Annotated[str, "The unique key to store the value under."],
22+
value: t.Annotated[str, "The string value to store in memory."],
23+
) -> str:
24+
"""Saves a value to memory with the specified key, overwriting any existing value."""
25+
self._memory[key] = value
26+
return f"Value saved to memory key: '{key}'"
27+
28+
@tool_method(catch=True)
29+
def retrieve_memory(self, key: t.Annotated[str, "The key of the value to retrieve."]) -> str:
30+
"""Retrieves a value from memory using the specified key."""
31+
return self._memory[key]
32+
33+
@tool_method
34+
def list_memory_keys(self) -> list[str]:
35+
"""Lists all keys currently stored in memory."""
36+
return list(self._memory.keys())
37+
38+
@tool_method(catch=True)
39+
def clear_memory(
40+
self,
41+
key: t.Annotated[
42+
str | None, "The specific key to clear. If not provided, all memory is cleared."
43+
] = None,
44+
) -> str:
45+
"""
46+
Clears a specific key from memory, or clears all memory if no key is provided.
47+
"""
48+
if key is None:
49+
self._memory.clear()
50+
return "All memory has been cleared."
51+
52+
if key not in self._memory:
53+
return f"Key '{key}' not found in memory. Nothing to clear."
54+
55+
del self._memory[key]
56+
return f"Cleared memory for key: '{key}'"

dreadnode/agent/tools/planning.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,14 @@ class TodoItem(BaseModel):
2222
)
2323

2424

25-
@tool
25+
@tool(catch=True)
2626
def update_todo(todos: t.Annotated[list[TodoItem], "The full, updated list of todo items."]) -> str:
2727
"""
2828
Use this tool to create and manage a structured task list for your current session.
2929
This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user.
3030
It also helps the user understand the progress of the task and overall progress of their requests.
3131
3232
## When to Use This Tool
33-
Use this tool proactively in these scenarios:
3433
3534
1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions
3635
2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations
@@ -42,7 +41,6 @@ def update_todo(todos: t.Annotated[list[TodoItem], "The full, updated list of to
4241
4342
## When NOT to Use This Tool
4443
45-
Skip using this tool when:
4644
1. There is only a single, straightforward task
4745
2. The task is trivial and tracking it provides no organizational benefit
4846
3. The task can be completed in less than 3 trivial steps
@@ -111,3 +109,28 @@ def update_todo(todos: t.Annotated[list[TodoItem], "The full, updated list of to
111109
f"{status_counts['in_progress']} in progress, "
112110
f"{status_counts['pending']} pending."
113111
)
112+
113+
114+
@tool
115+
def think(thought: str) -> None:
116+
"""
117+
Records a thought, reflection, or plan to document your reasoning process.
118+
119+
This tool acts as your internal monologue, allowing you to articulate your strategy. Use it to:
120+
- Break down a complex problem into smaller steps.
121+
- Formulate a multi-step plan before you act.
122+
- Interpret the results of another tool's output.
123+
- Document a change in strategy (self-correction).
124+
125+
A clear chain of thought is essential for explaining your actions.
126+
127+
## Best Practices
128+
- Do Not Substitute for Action**: After thinking, you must call the appropriate \
129+
tool to execute your plan. This tool performs no action on its own.
130+
- Do Not Repeat Information**: Never use this to repeat the output of other tools. \
131+
Use it to state your *conclusion* or *next step* based on that output.
132+
133+
Args:
134+
thought: A clear, concise statement of your thought process or plan.
135+
"""
136+
logger.info(f"Agent thought: {thought}")

dreadnode/agent/tools/reporting.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,21 @@
44
from dreadnode.data_types import Markdown
55

66

7-
@tool
7+
@tool(catch=True)
88
async def highlight_for_review(title: str, interest_level: str, justification: str) -> str:
99
"""
10-
Flags a potential area of interest for a human operator to review.
11-
12-
This is your primary tool for surfacing leads. Use it when you discover something
13-
anomalous, high-value, or potentially vulnerable that warrants human attention.
14-
15-
`interest_level` should be one of:
16-
- "high": Urgent. Potential for immediate impact (e.g., exposed login, sensitive keywords).
17-
- "medium": Interesting. Warrants follow-up (e.g., dev subdomain, unusual tech stack).
18-
- "low": Informational. Good context but not an immediate priority (e.g., interesting directory found).
19-
20-
`justification` should be a structured technical markdown explanation of *why* this is
21-
interesting and what the potential next steps for a human could be.
10+
Flag a finding for human review. Use this to surface leads that warrant further investigation.
11+
12+
This tool is essential for escalating findings that appear anomalous, valuable, or potentially
13+
vulnerable. It creates a "lead" for a human operator to pick up.
14+
15+
Args:
16+
title: A brief, descriptive summary of the finding.
17+
interest_level: The priority of the finding. Must be one of:
18+
- "high": Urgent. Potential for immediate impact or exploitation. (exposed credentials, pre-authentication vulnerability).
19+
- "medium": Noteworthy. Suggests a potential weakness or area for deeper investigation. (debug endpoint, verbose error messages, PII exposure).
20+
- "low": Informational. Provides useful context but is not an immediate risk. (software version disclosure, interesting file path).
21+
justification: A technical, markdown-formatted explanation. Detail *why* the finding is interesting, what its potential impact is, and suggest next steps for a human analyst.
2222
"""
2323
from dreadnode import log_metric, log_output, tag
2424

@@ -32,4 +32,4 @@ async def highlight_for_review(title: str, interest_level: str, justification: s
3232
log_output("markdown", Markdown(f"# {title} ({interest_level})\n\n{justification}"))
3333
log_metric("count", 1, mode="count")
3434

35-
return "Area of interest has been highlighted for human review."
35+
return "Highlighted."

dreadnode/agent/tools/tasking.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,44 +7,52 @@
77
@tool
88
async def finish_task(success: bool, summary: str) -> None: # noqa: ARG001, FBT001
99
"""
10-
Mark your task as complete with a success/failure status and markdown summary of actions taken.
10+
Concludes the task by reporting a final status and a comprehensive summary.
1111
12-
## When to Use This Tool
13-
This tool should be called under the following circumstances:
14-
1. **All TODOs are complete**: If you are managing todos, every task in your TODO list has been marked as 'completed'.
15-
2. **No more actions**: You have no further actions to take and have addressed all aspects of the user's request.
16-
3. **Irrecoverable failure**: You have encountered an error that you cannot resolve, and there are no further steps you can take.
17-
4. **Final Summary**: You are ready to provide a comprehensive summary of all actions taken.
18-
19-
## When NOT to Use This Tool
20-
Do not use this tool if:
21-
2. **You are in the middle of a multi-step process**: The overall task is not yet finished.
22-
3. **A recoverable error has occurred**: You should first attempt to fix the error through all available means.
23-
4. **You are waiting for user feedback**: The task is paused, not finished.
12+
This is the **final tool** to call when your planned sequence of actions is complete, \
13+
regardless of whether the outcome was successful. Use it when you have no more \
14+
steps to take and are ready to present a final report.
2415
2516
## Best Practices
26-
* **Final Step**: This should be the absolute last tool you call. Once invoked, your task is considered finished.
27-
* **Honest Status**: Accurately report the success or failure of the overall task. If any part of the task failed or was not completed, `success` should be `False`.
28-
* **Comprehensive Summary**: The `summary` should be a complete and detailed markdown-formatted report of everything you did, including steps taken, tools used, and the final outcome. This is your final report to the user.
17+
- Honest Status: The `success` flag must accurately reflect the final outcome. \
18+
If any part of the task failed or objectives were not met, it must be `False`.
19+
- Comprehensive Summary: The `summary` is your final report. It must be a complete, \
20+
markdown-formatted document detailing all actions taken, tools used, and the results.
21+
22+
Args:
23+
success: True if the task's objectives were fully met, False otherwise.
24+
summary: A complete markdown-formatted report of all actions and outcomes.
2925
"""
3026
from dreadnode import log_metric
3127

3228
log_func = logger.success if success else logger.warning
3329
log_func(f"Agent finished the task (success={success})")
34-
3530
log_metric("task_success", success)
3631

3732
raise Finish if success else Fail("Agent marked the task as failed.")
3833

3934

4035
@tool
41-
async def give_up_on_task(reason: str) -> None: # noqa: ARG001
36+
async def give_up_on_task(reason: str) -> None:
4237
"""
43-
Give up on your task.
38+
Aborts the task when you are irrecoverably stuck and cannot make progress.
39+
40+
This tool is a last resort and should only be used when you have exhausted all \
41+
possible strategies and alternative approaches. It signals that you were unable \
42+
to complete your assigned process.
43+
44+
## Best Practices
45+
- Do Not Use for a Failed Outcome**: If the `finish_task` tool is available, use it to report failures. \
46+
This tool is strictly for when you cannot *finish* your work.
47+
- Provide a Clear Justification**: The `reason` must clearly explain why you are stuck. \
48+
Detail the final obstacle you could not overcome and the approaches you already tried.
49+
50+
Args:
51+
reason: A concise explanation of why you are unable to continue the task.
4452
"""
4553
from dreadnode import log_metric
4654

47-
logger.info("Agent gave up on the task")
55+
logger.warning(f"Agent gave up on the task: {reason}")
4856
log_metric("task_give_up", 1)
4957

5058
raise Fail("Agent gave up on the task.")

0 commit comments

Comments
 (0)