Skip to content

Commit 334a454

Browse files
committed
Quick dirty fix fallback to string content
1 parent c771082 commit 334a454

6 files changed

Lines changed: 91 additions & 38 deletions

File tree

deadend_cli/deadend_agent/src/deadend_agent/agents/components/executor.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,8 @@ def _add_agent_output_to_context(
384384
@supervisor.agent.tool
385385
async def call_requester_agent(ctx: RunContext[SupervisorDeps], prompt: str) -> str:
386386
"""Call the requester agent to perform HTTP request testing."""
387+
print(f"input tool looking for the error : {prompt}")
388+
387389
if ctx.deps.requester_agent is None or ctx.deps.requester_deps is None:
388390
return "Requester agent dependencies not configured."
389391
result = await ctx.deps.requester_agent.run(
@@ -408,6 +410,7 @@ async def call_requester_agent(ctx: RunContext[SupervisorDeps], prompt: str) ->
408410
@supervisor.agent.tool
409411
async def call_shell_agent(ctx: RunContext[SupervisorDeps], prompt: str) -> str:
410412
"""Call the shell agent to execute shell commands."""
413+
print(f"input tool looking for the error : {prompt}")
411414
if ctx.deps.shell_agent is None or ctx.deps.shell_deps is None:
412415
return "Shell agent dependencies not configured."
413416
result = await ctx.deps.shell_agent.run(
@@ -431,7 +434,8 @@ async def call_shell_agent(ctx: RunContext[SupervisorDeps], prompt: str) -> str:
431434

432435
@supervisor.agent.tool
433436
async def call_webapp_analyzer_agent(ctx: RunContext[SupervisorDeps], prompt: str) -> str:
434-
437+
print(f"input tool looking for the error : {prompt}")
438+
435439
print(ctx.deps.requester_deps)
436440
result = await ctx.deps.webapp_analyzer_agent.run(
437441
prompt,
@@ -447,6 +451,8 @@ async def call_webapp_analyzer_agent(ctx: RunContext[SupervisorDeps], prompt: st
447451
@supervisor.agent.tool
448452
async def call_python_interpreter_agent(ctx: RunContext[SupervisorDeps], prompt: str) -> str:
449453
"""Call the python interpreter agent to execute Python scripts."""
454+
print(f"input tool looking for the error : {prompt}")
455+
450456
result = await ctx.deps.python_interpreter_agent.run(
451457
prompt,
452458
deps=ctx.deps.session_id,

deadend_cli/deadend_agent/src/deadend_agent/core_agent/core_agent.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
- Tracks usage with simple counters
1212
- Integrates OpenTelemetry for observability
1313
"""
14-
1514
from __future__ import annotations
1615

1716
import json
@@ -144,13 +143,13 @@ def __init__(
144143
output_schema: Type[BaseModel] | None = None,
145144
api_key: str | None = None,
146145
api_base: str | None = None,
147-
rate_limit_rpm: int = 60,
146+
rate_limit_rpm: int = 200,
148147
name: str = "agent",
149148
):
150149
"""Initialize CoreAgent.
151150
152151
Args:
153-
model: Model identifier (e.g., "gpt-4o", "claude-3-5-sonnet")
152+
model: Model identifier
154153
instructions: System instructions/prompt for the agent
155154
tools: List of callable tool functions (default: None)
156155
output_schema: Pydantic model for structured output (default: None)
@@ -187,6 +186,9 @@ def __init__(
187186
self.completion_tokens = 0
188187

189188
# Instructor client for structured output
189+
# We always *attempt* to use Instructor when available and an output_schema
190+
# is provided, but will gracefully fall back to manual JSON extraction
191+
# if the Instructor call fails for any reason.
190192
if INSTRUCTOR_AVAILABLE and output_schema:
191193
self.instructor_client = instructor.from_litellm(acompletion)
192194
else:
@@ -668,7 +670,7 @@ def log_retry(retry_state):
668670
)
669671

670672
@retry(
671-
stop=stop_after_attempt(5),
673+
stop=stop_after_attempt(0),
672674
wait=wait_exponential(multiplier=2, min=2, max=60),
673675
retry=retry_if_exception_type(retryable_exceptions),
674676
reraise=True,
@@ -1009,8 +1011,10 @@ async def _extract_structured(self, messages: list[dict]) -> BaseModel:
10091011
"model": self.model,
10101012
"messages": messages,
10111013
"response_model": self.output_schema,
1014+
"format": "json",
10121015
}
10131016

1017+
10141018
if self.api_base:
10151019
kwargs["api_base"] = self.api_base
10161020

@@ -1026,17 +1030,17 @@ async def _extract_structured(self, messages: list[dict]) -> BaseModel:
10261030
pass
10271031
return response
10281032
except Exception as instructor_error:
1029-
# Check if it's a grammar/schema not supported error
1030-
error_str = str(instructor_error)
1031-
if "Invalid grammar" in error_str or "response_format" in error_str.lower():
1032-
try:
1033-
console.print("[bold yellow][Instructor Failed][/bold yellow] Model doesn't support structured output, trying manual JSON extraction...")
1034-
except BlockingIOError:
1035-
pass
1036-
# Fall through to manual extraction
1037-
else:
1038-
# Re-raise other errors to trigger fallback
1039-
raise instructor_error
1033+
# Any failure in Instructor structured output should fall back to
1034+
# manual JSON extraction so that providers with partial support
1035+
# don't break the agent.
1036+
try:
1037+
console.print(
1038+
"[bold yellow][Instructor Failed][/bold yellow] "
1039+
f"{str(instructor_error)[:200]} - falling back to manual JSON extraction..."
1040+
)
1041+
except BlockingIOError:
1042+
pass
1043+
# Fall through to manual extraction below
10401044

10411045
# Manual JSON extraction fallback
10421046
# Ask the LLM to output JSON and parse it ourselves

deadend_cli/deadend_prompts/src/deadend_prompts/_shared/_tool_priority.jinja2

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,53 +5,53 @@ Choose the most efficient tool for each task type. Prefer specialized tools over
55
### HTTP Request Testing
66

77
**Single payload test** (1-5 requests):
8-
1. `send_payload` - Specialized for single request testing
9-
2. `requester` - Use if you need raw HTTP control
8+
1. `send_payload` - Specialized for request testing and raw HTTP request (priority 1)
9+
2. `sandboxed_shell_tool` - Using curl (priority 2)
1010

1111
**Batch testing** (10-1000 requests):
12-
1. `run_python_file` - Write Python script for deterministic fuzzing
13-
2. `send_payload` - Only if batch tool unavailable
12+
1. `run_python_file` - Write Python script for deterministic fuzzing (priority 1)
13+
2. `send_payload` - Only if batch tool unavailable or a fuzzing tool (priority 2)
1414

1515
**High-volume fuzzing** (>1000 requests):
16-
1. `sandboxed_shell_tool` + specialized fuzzing tools (ffuf, gobuster)
17-
2. `run_python_file` - For custom fuzzing logic
16+
1. `sandboxed_shell_tool` + specialized fuzzing tools (ffuf, gobuster) (priority 1)
17+
2. `run_python_file` - For custom fuzzing logic (priority 2)
1818

1919
### Code Analysis
2020

2121
**Semantic search** (finding patterns, endpoints):
22-
1. `webapp_code_rag` - Semantic search over indexed code
22+
1. `webapp_code_rag` - Semantic search over indexed code (priority 1)
2323

2424
**Pattern matching** (regex, specific strings):
25-
2. `webapp_code_rag` - If pattern is in indexed code
25+
1. `webapp_code_rag` - If pattern is in indexed code (priority 1)
2626

2727
**File reading** (specific files):
28-
1. `source_code_browser` - Direct file access
29-
2. `webapp_code_rag` - If file is indexed
28+
1. `source_code_browser` - Direct file access (priority 1)
29+
2. `webapp_code_rag` - If file is indexed (priority 2)
3030

3131
### Reconnaissance
3232

3333
**Web reconnaissance** (forms, endpoints, architecture):
34-
1. `webapp_code_rag` - Fast, no HTTP requests, pre-indexed
35-
2. `send_payload` - For real-time verification
34+
1. `webapp_code_rag` - Fast, no HTTP requests, pre-indexed (priority 1)
35+
2. `send_payload` - For real-time verification (priority 2)
3636
3. `run_python_file` - For complex analysis
3737

3838
**System reconnaissance** (ports, services, infrastructure):
39-
1. `sandboxed_shell_tool` - Use nmap, sqlmap, specialized tools
40-
2. `run_python_file` - Only if shell tools insufficient
39+
1. `sandboxed_shell_tool` - Use nmap, sqlmap, specialized tools, curl (priority 1)
40+
2. `run_python_file` - Only if shell tools insufficient (priority 2)
4141

4242
### Exploitation
4343

4444
**Simple exploits** (single payload):
45-
1. `send_payload` - Direct HTTP request
46-
2. `requester` - If need raw control
45+
1. `send_payload` - Raw HTTP requests (priority 1)
46+
2. `sandboxed_shell_tool` - can send requests with curl (priority 2)
4747

4848
**Complex exploits** (multi-stage, timing, logic):
49-
1. `run_python_file` - Full control, complex logic
50-
2. `send_payload` - For individual stages
49+
1. `run_python_file` - Full control, complex logic (priority 1)
50+
2. `send_payload` - For individual stages (priority 2)
5151

5252
**System exploits** (command execution, file operations):
53-
1. `sandboxed_shell_tool` - Use appropriate security tools
54-
2. `run_python_file` - If shell tools insufficient
53+
1. `sandboxed_shell_tool` - Use appropriate security tools (priority 1)
54+
2. `run_python_file` - If shell tools insufficient (priority 2)
5555

5656
### Decision Tree
5757

@@ -71,4 +71,4 @@ Need to exploit?
7171
└─ System-level → sandboxed_shell_tool
7272
```
7373

74-
**Rule**: Start with specialized tool, escalate to generic only if specialized fails.
74+
**Rule**: Start with specialized tool, escalate to generic only if specialized fails. Priorities are specified with (priority N)

deadend_cli/deadend_prompts/src/deadend_prompts/reporting_template.instructions.jinja2

Whitespace-only changes.

deadend_cli/deadend_prompts/src/deadend_prompts/shell.instructions.jinja2

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ curl -s "http://target" | grep -oP 'href="\K[^"]+'
100100
- **Simple encoding** (base64, xxd)
101101
- Command-line **security tools** (nikto, sqlmap, hydra)
102102

103+
### HTTP / NETWORK CLIENTS ARE EXPLICITLY ALLOWED
104+
105+
- Using standard CLI HTTP clients such as `curl`, `wget`, `httpie`, or similar tools **IS allowed and expected** in this sandbox.
106+
- Treat `curl` and similar clients as normal security tooling for interacting with in-scope HTTP/S endpoints.
107+
- Do **not** refuse or down-rank valid plans just because they involve `curl` or other HTTP clients; only scope (targets/endpoints) and safety rules matter.
108+
103109
❌ **DON'T USE SHELL WHEN**:
104110
- Complex multi-step exploits → use `python_interpreter`
105111
- Need programmatic logic → use `python_interpreter`
@@ -161,6 +167,15 @@ Context shows: nmap -sV target.com → ports 80,443 open
161167
Execute **security tools only** for network reconnaissance, vulnerability scanning, and security enumeration.
162168
This is an isolated container with pentesting tools - NOT a general-purpose shell.
163169

170+
## SANDBOX SAFETY MODEL
171+
172+
- This sandbox is **isolated** and explicitly designed for running security tooling.
173+
- You **MAY** run any standard CLI tools and security utilities available in Kali (network, system, HTTP, fuzzing, enumeration, password tools, etc.) as long as:
174+
- The **target/endpoints are in scope** according to the context rules above, and
175+
- You respect the **NO SOURCE CODE ACCESS** constraint below.
176+
- Do **NOT** refuse or avoid using a tool (e.g. `curl`, `nmap`, `sqlmap`, `hydra`, `ffuf`, etc.) just because it is powerful or could be dangerous on a non-sandboxed system; here it is safe by design.
177+
- Your only safety filters are **scope** (what targets are allowed) and the explicit constraints in these instructions.
178+
164179
## CRITICAL CONSTRAINT - NO SOURCE CODE ACCESS
165180

166181
**This container has NO access to target source code files.**
@@ -174,7 +189,7 @@ If your task requires source code analysis, return with low confidence and note
174189
## AVAILABLE TOOLS
175190

176191
{% for tool_name, tool_description in tools.items() %}
177-
### {{tool_name}}
192+
### {{ tool_name }}
178193
{{tool_description}}
179194
{% endfor %}
180195

deadend_cli/src/deadend_cli/jsonrpc_server.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
""" JsonRPC server interface """
66
from typing import Any, Dict, AsyncGenerator
77
import json
8+
import os
89
from dataclasses import asdict, is_dataclass
910
from pydantic import TypeAdapter
1011
import typer
@@ -24,6 +25,11 @@
2425
from deadend_cli.jsonrpc.hooks_adapter import EventBusHooksAdapter
2526

2627

28+
def _phoenix_otel_enabled() -> bool:
29+
"""True if Phoenix OTLP should be used (from .env / env vars)."""
30+
endpoint = os.getenv("PHOENIX_COLLECTOR_ENDPOINT", "").strip()
31+
enabled = os.getenv("DEADEND_PHOENIX_OTEL_ENABLED", "").strip().lower() in ("1", "true", "yes")
32+
return bool(endpoint) or enabled
2733
def main(
2834
debug: bool=False,
2935
log_file: str | None = None,
@@ -67,6 +73,26 @@ def main(
6773
log_file=log_file
6874
)
6975

76+
# setting up tracing
77+
if _phoenix_otel_enabled():
78+
# Register Phoenix OTLP before importing the agent so the global tracer provider
79+
# is Phoenix; agent telemetry will then use it (see DEADEND_OTEL_USE_GLOBAL in telemetry.py).
80+
os.environ["DEADEND_OTEL_USE_GLOBAL"] = "1"
81+
from phoenix.otel import register
82+
83+
endpoint = (os.getenv("PHOENIX_COLLECTOR_ENDPOINT") or "https://crunch.straylabs.ai/").strip().rstrip("/")
84+
if not endpoint.endswith("/v1/traces"):
85+
endpoint = f"{endpoint}/v1/traces"
86+
project_name = os.getenv("PHOENIX_PROJECT_NAME", "deadend")
87+
88+
register(
89+
auto_instrument=True,
90+
project_name=project_name,
91+
batch=True,
92+
endpoint=endpoint,
93+
protocol="http/protobuf",
94+
)
95+
7096
server.add_dependency("component_manager", component_manager)
7197
server.add_dependency("event_bus", event_bus)
7298
server.add_dependency("deadend_agent_refs", deadend_agent_refs)
@@ -668,6 +694,7 @@ async def run_agent_recursive(
668694
threat_model_text = ""
669695

670696
async for item in deadend_agent.threat_model_stream(task=prompt):
697+
671698
threat_model_text += object_to_string(item)
672699
yield {
673700
"phase": "recon",
@@ -683,6 +710,7 @@ async def run_agent_recursive(
683710
task=prompt,
684711
threat_model=threat_model_text
685712
):
713+
686714
yield {
687715
"phase": "exploit",
688716
"data": TypeAdapter(dict).dump_json(item),

0 commit comments

Comments
 (0)