Skip to content

Commit dd0ff0f

Browse files
Support lean MCP reports; move server & docs (kevoreilly#2926)
* Support lean MCP reports; move server & docs Introduce a lightweight "lean" CAPE report flow and update MCP server location. Added mcp/filters.py (lean_search_filters) and mcp/__init__.py to expose configurable projection filters. Renamed web/mcp_server.py -> mcp/server.py and added get_lean_cape_report/_apply_lean_report plus lean flags to search_task, extended_search and task report handling to return reduced payloads for LLM/agent consumption. Adjusted docs (usage/mcp.rst) to reference the new module path and use Poetry (poetry run python mcp/server.py). Fixed perform_search usage in lib/cuckoo/common/web_utils.py to honor an optional projection argument and to compute ES _source fields from projection when provided. Updated web/apiv2/views.py to import and apply the lean projection in search and report endpoints. * Update server.py * Update mcp/server.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * sync * Update web_utils.py * Update web_utils.py * Update web.conf.default --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 0ef1457 commit dd0ff0f

7 files changed

Lines changed: 114 additions & 16 deletions

File tree

conf/default/web.conf.default

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ vnc_host = localhost
197197
# You might need to add your server IP to ALLOWED_HOSTS in web/web/settings.py if it not ["*""]
198198
# vnc or rdp
199199
guest_protocol = vnc
200-
# TIP: For KVM/QEMU, using 'qxl' or 'virtio' video drivers in your VM XML
200+
# TIP: For KVM/QEMU, using 'qxl' or 'virtio' video drivers in your VM XML
201201
# definition provides much better VNC performance than 'vga' or 'cirrus'.
202202
guacd_recording_path = /opt/CAPEv2/storage/guacrecordings
203203
guest_width = 1280
@@ -215,7 +215,7 @@ rdp_enable_menu_animations = no
215215
# VNC Performance Optimizations
216216
# Color depth: 8, 16, 24, 32. 16 is a great balance for performance.
217217
vnc_color_depth = 16
218-
# Cursor: 'local' renders the mouse on your browser (feels instant).
218+
# Cursor: 'local' renders the mouse on your browser (feels instant).
219219
# 'remote' waits for the server (feels laggy).
220220
vnc_cursor = local
221221
# Audio (enable only if needed, consumes bandwidth)
@@ -248,4 +248,4 @@ enabled = no
248248
enabled = no
249249

250250
[audit_framework]
251-
enabled = no
251+
enabled = no

docs/book/src/usage/mcp.rst

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ Scenario B: Remote / Shared Server (SSE)
118118

119119
In this mode, a single MCP server instance runs continuously and accepts connections from multiple clients over the network.
120120

121-
0. **Execution:** Start the server using ``python3 web/mcp_server.py --transport sse``.
121+
0. **Execution:** Start the server using ``poetry run python mcp/server.py --transport sse``.
122122
1. **Configuration:** Start the server **without** a ``CAPE_API_TOKEN`` environment variable.
123123
2. **Strict Mode:** Ensure ``token_auth_enabled = yes`` is set in ``conf/api.conf``.
124124
3. **Usage:** Users **must** provide their API token in the ``token`` argument for every tool call (e.g., ``submit_file(..., token="MyKey")``).
@@ -142,7 +142,7 @@ Standard execution (Stdio)
142142

143143
.. code-block:: bash
144144
145-
CAPE_API_URL=http://your-cape-ip:8000/apiv2 CAPE_API_TOKEN=your_token python3 web/mcp_server.py
145+
CAPE_API_URL=http://your-cape-ip:8000/apiv2 CAPE_API_TOKEN=your_token poetry run python mcp/server.py
146146
147147
Remote / SSE execution
148148
~~~~~~~~~~~~~~~~~~~~~~
@@ -151,7 +151,7 @@ To run the server as a persistent service accessible over the network:
151151

152152
.. code-block:: bash
153153
154-
python3 web/mcp_server.py --transport sse --port 9004
154+
poetry run python mcp/server.py --transport sse --port 9004
155155
156156
Deployment behind Nginx
157157
~~~~~~~~~~~~~~~~~~~~~~~
@@ -192,7 +192,7 @@ Add the following to your ``claude_desktop_config.json``:
192192
"mcpServers": {
193193
"cape": {
194194
"command": "poetry",
195-
"args": ["run", "python", "/opt/CAPEv2/web/mcp_server.py"],
195+
"args": ["run", "python", "/opt/CAPEv2/mcp/server.py"],
196196
"env": {
197197
"CAPE_API_URL": "http://127.0.0.1:8000/apiv2",
198198
"CAPE_API_TOKEN": "YOUR_API_TOKEN_HERE",
@@ -209,7 +209,7 @@ You can add the server using the CLI command:
209209

210210
.. code-block:: bash
211211
212-
gemini mcp add cape poetry run python /opt/CAPEv2/web/mcp_server.py \
212+
gemini mcp add cape poetry run python /opt/CAPEv2/mcp/server.py \
213213
-e CAPE_API_URL=http://127.0.0.1:8000/apiv2 \
214214
-e CAPE_API_TOKEN=YOUR_API_TOKEN_HERE \
215215
-e CAPE_ALLOWED_SUBMISSION_DIR=/home/user/samples
@@ -222,7 +222,7 @@ Or manually add it to your ``~/.gemini/settings.json``:
222222
"mcpServers": {
223223
"cape": {
224224
"command": "poetry",
225-
"args": ["run", "python", "/opt/CAPEv2/web/mcp_server.py"],
225+
"args": ["run", "python", "/opt/CAPEv2/mcp/server.py"],
226226
"env": {
227227
"CAPE_API_URL": "http://127.0.0.1:8000/apiv2",
228228
"CAPE_API_TOKEN": "YOUR_API_TOKEN_HERE",
@@ -243,7 +243,7 @@ Open **Agent Panel** -> **...** -> **MCP Servers** -> **Manage MCP Servers** ->
243243
"mcpServers": {
244244
"cape": {
245245
"command": "poetry",
246-
"args": ["run", "python", "/opt/CAPEv2/web/mcp_server.py"],
246+
"args": ["run", "python", "/opt/CAPEv2/mcp/server.py"],
247247
"env": {
248248
"CAPE_API_URL": "http://127.0.0.1:8000/apiv2",
249249
"CAPE_API_TOKEN": "YOUR_API_TOKEN_HERE",

lib/cuckoo/common/web_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1422,7 +1422,7 @@ def perform_search(
14221422
# Stage 8: Make the task doc the new root
14231423
{"$replaceRoot": {"newRoot": "$task_doc"}},
14241424
# Stage 9: Add your custom projection
1425-
{"$project": perform_search_filters},
1425+
{"$project": projection or perform_search_filters},
14261426
]
14271427
retval = list(mongo_aggregate(FILES_COLL, pipeline))
14281428
if not retval:

mcp/__init__.py

Whitespace-only changes.

mcp/filters.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Configuration for MCP server search filters
2+
# You can modify this dictionary to include or exclude specific fields in the lean report
3+
# Injested by Agents to give a quick overview
4+
5+
lean_search_filters = {
6+
"info": 1,
7+
"virustotal_summary": 1,
8+
"detections.family": 1,
9+
"malfamily": 1,
10+
"malfamily_tag": 1,
11+
"malscore": 1,
12+
"network.pcap_sha256": 1,
13+
"network.domains.domain": 1,
14+
"network.http.uri": 1,
15+
"signatures.name": 1,
16+
"signatures.description": 1,
17+
"signatures.severity": 1,
18+
"CAPE": 1,
19+
"behavior.summary.mutexes": 1,
20+
"behavior.summary.executed_commands": 1,
21+
"mlist_cnt": 1,
22+
"f_mlist_cnt": 1,
23+
"target.file.clamav": 1,
24+
"target.file.sha256": 1,
25+
"suri_tls_cnt": 1,
26+
"suri_alert_cnt": 1,
27+
"suri_http_cnt": 1,
28+
"suri_file_cnt": 1,
29+
"trid": 1,
30+
"_id": 0,
31+
}

web/mcp_server.py renamed to mcp/server.py

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
api_config = Config("api")
3131

3232
# Configuration from Environment or Config File
33-
# Run with: CAPE_API_URL=http://127.0.0.1:8000/apiv2 CAPE_API_TOKEN=your_token python3 web/mcp_server.py
33+
# Run with: CAPE_API_URL=http://127.0.0.1:8000/apiv2 CAPE_API_TOKEN=your_token poetry run python mcp/server.py
3434
API_URL = os.environ.get("CAPE_API_URL")
3535
if not API_URL:
3636
# Try to get from api.conf [api] url
@@ -344,26 +344,69 @@ async def submit_static(
344344

345345
# --- Task Management & Search ---
346346

347+
def get_lean_cape_report(raw_cape_json):
348+
"""Filters a 50MB CAPE report down to a 500-token LLM payload."""
349+
return {
350+
"score": raw_cape_json.get("info", {}).get("score", 0),
351+
"family": raw_cape_json.get("malfamily") or raw_cape_json.get("detections", {}).get("family") or "Unknown",
352+
"extracted_configs": raw_cape_json.get("CAPE", []),
353+
"high_severity_signatures": [
354+
{"name": sig["name"], "desc": sig["description"]}
355+
for sig in raw_cape_json.get("signatures", [])
356+
if isinstance(sig, dict) and sig.get("severity", 0) >= 3
357+
],
358+
"network": {
359+
"domains": [d["domain"] for d in raw_cape_json.get("network", {}).get("domains", [])] if isinstance(raw_cape_json.get("network", {}).get("domains"), list) else [],
360+
"http_uris": [h["uri"] for h in raw_cape_json.get("network", {}).get("http", [])] if isinstance(raw_cape_json.get("network", {}).get("http"), list) else [],
361+
},
362+
"indicators": {
363+
"mutexes": raw_cape_json.get("behavior", {}).get("summary", {}).get("mutexes", []) if isinstance(raw_cape_json.get("behavior", {}).get("summary"), dict) else [],
364+
"commands": raw_cape_json.get("behavior", {}).get("summary", {}).get("executed_commands", []) if isinstance(raw_cape_json.get("behavior", {}).get("summary"), dict) else []
365+
}
366+
}
367+
368+
def _apply_lean_report(result):
369+
if isinstance(result, dict):
370+
if result.get("error") is False and "data" in result:
371+
if isinstance(result["data"], list):
372+
result["data"] = [get_lean_cape_report(item) for item in result["data"]]
373+
elif isinstance(result["data"], dict):
374+
result["data"] = get_lean_cape_report(result["data"])
375+
elif "info" in result:
376+
return get_lean_cape_report(result)
377+
elif isinstance(result, list):
378+
return [get_lean_cape_report(item) for item in result]
379+
return result
380+
347381
@mcp_tool("tasksearch")
348-
async def search_task(hash_value: str, token: str = "") -> str:
382+
async def search_task(hash_value: str, lean: bool = True, token: str = "") -> str:
349383
"""Search for tasks by MD5, SHA1, or SHA256."""
384+
if not re.match(r"^[a-fA-F0-9]+$", hash_value):
385+
return json.dumps({"error": True, "message": "Invalid hash value provided. Only hexadecimal characters are allowed."}, indent=2)
386+
350387
algo = "md5"
351388
if len(hash_value) == 40:
352389
algo = "sha1"
353390
elif len(hash_value) == 64:
354391
algo = "sha256"
355392

356393
result = await _request("GET", f"tasks/search/{algo}/{hash_value}/", token=token)
394+
if lean:
395+
result = _apply_lean_report(result)
357396
return json.dumps(result, indent=2)
358397

359398
@mcp_tool("extendedtasksearch")
360-
async def extended_search(option: str, argument: str, token: str = "") -> str:
399+
async def extended_search(option: str, argument: str, lean: bool = True, token: str = "") -> str:
361400
"""
362401
Search tasks using extended options.
363402
Options include: id, name, type, string, ssdeep, crc32, file, command, resolvedapi, key, mutex, domain, ip, signature, signame, etc.
364403
"""
365404
data = {"option": option, "argument": argument}
405+
if lean:
406+
data["lean"] = True
366407
result = await _request("POST", "tasks/extendedsearch/", token=token, data=data)
408+
if lean:
409+
result = _apply_lean_report(result)
367410
return json.dumps(result, indent=2)
368411

369412
@mcp_tool("extendedtasksearch")
@@ -430,7 +473,25 @@ async def get_statistics(days: int = 7, token: str = "") -> str:
430473

431474
@mcp_tool("taskreport")
432475
async def get_task_report(task_id: int, format: str = "json", token: str = "") -> str:
433-
"""Get the analysis report for a task (json, lite, maec, metadata)."""
476+
"""Get the analysis report for a task (json, lite, maec, metadata, lean)."""
477+
allowed_formats = {"json", "lite", "maec", "metadata", "lean"}
478+
if format not in allowed_formats:
479+
return json.dumps({"error": True, "message": f"Invalid format provided. Allowed formats: {', '.join(allowed_formats)}"}, indent=2)
480+
481+
if format == "lean":
482+
data = {"option": "id", "argument": str(task_id), "lean": True}
483+
result = await _request("POST", "tasks/extendedsearch/", token=token, data=data)
484+
485+
# Extract the single task report from the search results
486+
if isinstance(result, dict) and not result.get("error") and isinstance(result.get("data"), list):
487+
if len(result["data"]) > 0:
488+
result["data"] = result["data"][0]
489+
else:
490+
result = {"error": True, "message": "Task report not found via lean search."}
491+
492+
result = _apply_lean_report(result)
493+
return json.dumps(result, indent=2)
494+
434495
result = await _request("GET", f"tasks/get/report/{task_id}/{format}/", token=token)
435496
return json.dumps(result, indent=2)
436497

@@ -516,11 +577,15 @@ async def download_task_fullmemory(task_id: int, destination: str, token: str =
516577
@mcp_tool("fileview")
517578
async def view_file(hash_value: str, hash_type: str = "sha256", token: str = "") -> str:
518579
"""View information about a file in the database."""
580+
if not re.match(r"^[a-fA-F0-9]+$", hash_value):
581+
return json.dumps({"error": True, "message": "Invalid hash value provided. Only hexadecimal characters are allowed."}, indent=2)
519582
return await _request("GET", f"files/view/{hash_type}/{hash_value}/", token=token)
520583

521584
@mcp_tool("sampledl")
522585
async def download_sample(hash_value: str, destination: str, hash_type: str = "sha256", token: str = "") -> str:
523586
"""Download a sample from the database."""
587+
if not re.match(r"^[a-fA-F0-9]+$", hash_value):
588+
return json.dumps({"error": True, "message": "Invalid hash value provided. Only hexadecimal characters are allowed."}, indent=2)
524589
return await _download_file(f"files/get/{hash_type}/{hash_value}/", destination, f"{hash_value}.bin", token=token)
525590

526591
@mcp_tool("machinelist")

web/apiv2/views.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
statistics,
5454
validate_task,
5555
)
56+
from mcp.filters import lean_search_filters
5657
from lib.cuckoo.core.database import Database, _Database
5758
from lib.cuckoo.core.data.task import (
5859
TASK_RECOVERED,
@@ -756,7 +757,8 @@ def ext_tasks_search(request):
756757
value = tmp_value
757758
del tmp_value
758759
try:
759-
records = perform_search(term, value, user_id=request.user.id, privs=request.user.is_staff, web=False)
760+
projection = lean_search_filters if request.data.get("lean") else None
761+
records = perform_search(term, value, user_id=request.user.id, privs=request.user.is_staff, web=False, projection=projection)
760762
except ValueError:
761763
if not term:
762764
resp = {"error": True, "error_value": "No option provided."}

0 commit comments

Comments
 (0)