Support lean MCP reports; move server & docs (kevoreilly#2926)

doomedraven · gemini-code-assist[bot] · web-flow · commit dd0ff0ff8155 · 2026-02-25T09:49:18.000+01:00
* Support lean MCP reports; move server &amp; docs

Introduce a lightweight "lean" CAPE report flow and update MCP server location. Added mcp/filters.py (lean_search_filters) and mcp/__init__.py to expose configurable projection filters. Renamed web/mcp_server.py -&gt; mcp/server.py and added get_lean_cape_report/_apply_lean_report plus lean flags to search_task, extended_search and task report handling to return reduced payloads for LLM/agent consumption. Adjusted docs (usage/mcp.rst) to reference the new module path and use Poetry (poetry run python mcp/server.py). Fixed perform_search usage in lib/cuckoo/common/web_utils.py to honor an optional projection argument and to compute ES _source fields from projection when provided. Updated web/apiv2/views.py to import and apply the lean projection in search and report endpoints.

* Update server.py

* Update mcp/server.py

Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;

* sync

* Update web_utils.py

* Update web_utils.py

* Update web.conf.default

---------

Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
diff --git a/conf/default/web.conf.default b/conf/default/web.conf.default
@@ -197,7 +197,7 @@ vnc_host = localhost
 # You might need to add your server IP to ALLOWED_HOSTS in web/web/settings.py if it not ["*""]
 # vnc or rdp
 guest_protocol = vnc
-# TIP: For KVM/QEMU, using 'qxl' or 'virtio' video drivers in your VM XML 
+# TIP: For KVM/QEMU, using 'qxl' or 'virtio' video drivers in your VM XML
 # definition provides much better VNC performance than 'vga' or 'cirrus'.
 guacd_recording_path = /opt/CAPEv2/storage/guacrecordings
 guest_width = 1280
@@ -215,7 +215,7 @@ rdp_enable_menu_animations = no
 # VNC Performance Optimizations
 # Color depth: 8, 16, 24, 32. 16 is a great balance for performance.
 vnc_color_depth = 16
-# Cursor: 'local' renders the mouse on your browser (feels instant). 
+# Cursor: 'local' renders the mouse on your browser (feels instant).
 # 'remote' waits for the server (feels laggy).
 vnc_cursor = local
 # Audio (enable only if needed, consumes bandwidth)
@@ -248,4 +248,4 @@ enabled = no
 enabled = no
 
 [audit_framework]
-enabled = no
+enabled = no
diff --git a/docs/book/src/usage/mcp.rst b/docs/book/src/usage/mcp.rst
@@ -118,7 +118,7 @@ Scenario B: Remote / Shared Server (SSE)
 
 In this mode, a single MCP server instance runs continuously and accepts connections from multiple clients over the network.
 
-0.  **Execution:** Start the server using ``python3 web/mcp_server.py --transport sse``.
+0.  **Execution:** Start the server using ``poetry run python mcp/server.py --transport sse``.
 1.  **Configuration:** Start the server **without** a ``CAPE_API_TOKEN`` environment variable.
 2.  **Strict Mode:** Ensure ``token_auth_enabled = yes`` is set in ``conf/api.conf``.
 3.  **Usage:** Users **must** provide their API token in the ``token`` argument for every tool call (e.g., ``submit_file(..., token="MyKey")``).
@@ -142,7 +142,7 @@ Standard execution (Stdio)
 
 .. code-block:: bash
 
-    CAPE_API_URL=http://your-cape-ip:8000/apiv2 CAPE_API_TOKEN=your_token python3 web/mcp_server.py
+    CAPE_API_URL=http://your-cape-ip:8000/apiv2 CAPE_API_TOKEN=your_token poetry run python mcp/server.py
 
 Remote / SSE execution
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -151,7 +151,7 @@ To run the server as a persistent service accessible over the network:
 
 .. code-block:: bash
 
-    python3 web/mcp_server.py --transport sse --port 9004
+    poetry run python mcp/server.py --transport sse --port 9004
 
 Deployment behind Nginx
 ~~~~~~~~~~~~~~~~~~~~~~~
@@ -192,7 +192,7 @@ Add the following to your ``claude_desktop_config.json``:
       "mcpServers": {
         "cape": {
           "command": "poetry",
-          "args": ["run", "python", "/opt/CAPEv2/web/mcp_server.py"],
+          "args": ["run", "python", "/opt/CAPEv2/mcp/server.py"],
           "env": {
             "CAPE_API_URL": "http://127.0.0.1:8000/apiv2",
             "CAPE_API_TOKEN": "YOUR_API_TOKEN_HERE",
@@ -209,7 +209,7 @@ You can add the server using the CLI command:
 
 .. code-block:: bash
 
-    gemini mcp add cape poetry run python /opt/CAPEv2/web/mcp_server.py \
+    gemini mcp add cape poetry run python /opt/CAPEv2/mcp/server.py \
       -e CAPE_API_URL=http://127.0.0.1:8000/apiv2 \
       -e CAPE_API_TOKEN=YOUR_API_TOKEN_HERE \
       -e CAPE_ALLOWED_SUBMISSION_DIR=/home/user/samples
@@ -222,7 +222,7 @@ Or manually add it to your ``~/.gemini/settings.json``:
       "mcpServers": {
         "cape": {
           "command": "poetry",
-          "args": ["run", "python", "/opt/CAPEv2/web/mcp_server.py"],
+          "args": ["run", "python", "/opt/CAPEv2/mcp/server.py"],
           "env": {
             "CAPE_API_URL": "http://127.0.0.1:8000/apiv2",
             "CAPE_API_TOKEN": "YOUR_API_TOKEN_HERE",
@@ -243,7 +243,7 @@ Open **Agent Panel** -> **...** -> **MCP Servers** -> **Manage MCP Servers** ->
       "mcpServers": {
         "cape": {
           "command": "poetry",
-          "args": ["run", "python", "/opt/CAPEv2/web/mcp_server.py"],
+          "args": ["run", "python", "/opt/CAPEv2/mcp/server.py"],
           "env": {
             "CAPE_API_URL": "http://127.0.0.1:8000/apiv2",
             "CAPE_API_TOKEN": "YOUR_API_TOKEN_HERE",
diff --git a/lib/cuckoo/common/web_utils.py b/lib/cuckoo/common/web_utils.py
@@ -1422,7 +1422,7 @@ def perform_search(
                 # Stage 8: Make the task doc the new root
                 {"$replaceRoot": {"newRoot": "$task_doc"}},
                 # Stage 9: Add your custom projection
-                {"$project": perform_search_filters},
+                {"$project": projection or perform_search_filters},
             ]
             retval = list(mongo_aggregate(FILES_COLL, pipeline))
             if not retval:
diff --git a/mcp/__init__.py b/mcp/__init__.py
diff --git a/mcp/filters.py b/mcp/filters.py
@@ -0,0 +1,31 @@
+# Configuration for MCP server search filters
+# You can modify this dictionary to include or exclude specific fields in the lean report
+# Injested by Agents to give a quick overview
+
+lean_search_filters = {
+    "info": 1,
+    "virustotal_summary": 1,
+    "detections.family": 1,
+    "malfamily": 1,
+    "malfamily_tag": 1,
+    "malscore": 1,
+    "network.pcap_sha256": 1,
+    "network.domains.domain": 1,
+    "network.http.uri": 1,
+    "signatures.name": 1,
+    "signatures.description": 1,
+    "signatures.severity": 1,
+    "CAPE": 1,
+    "behavior.summary.mutexes": 1,
+    "behavior.summary.executed_commands": 1,
+    "mlist_cnt": 1,
+    "f_mlist_cnt": 1,
+    "target.file.clamav": 1,
+    "target.file.sha256": 1,
+    "suri_tls_cnt": 1,
+    "suri_alert_cnt": 1,
+    "suri_http_cnt": 1,
+    "suri_file_cnt": 1,
+    "trid": 1,
+    "_id": 0,
+}
diff --git a/mcp/server.py b/mcp/server.py
@@ -30,7 +30,7 @@
 api_config = Config("api")
 
 # Configuration from Environment or Config File
-# Run with: CAPE_API_URL=http://127.0.0.1:8000/apiv2 CAPE_API_TOKEN=your_token python3 web/mcp_server.py
+# Run with: CAPE_API_URL=http://127.0.0.1:8000/apiv2 CAPE_API_TOKEN=your_token poetry run python mcp/server.py
 API_URL = os.environ.get("CAPE_API_URL")
 if not API_URL:
     # Try to get from api.conf [api] url
@@ -344,26 +344,69 @@ async def submit_static(
 
 # --- Task Management & Search ---
 
+def get_lean_cape_report(raw_cape_json):
+    """Filters a 50MB CAPE report down to a 500-token LLM payload."""
+    return {
+        "score": raw_cape_json.get("info", {}).get("score", 0),
+        "family": raw_cape_json.get("malfamily") or raw_cape_json.get("detections", {}).get("family") or "Unknown",
+        "extracted_configs": raw_cape_json.get("CAPE", []),
+        "high_severity_signatures": [
+            {"name": sig["name"], "desc": sig["description"]}
+            for sig in raw_cape_json.get("signatures", [])
+            if isinstance(sig, dict) and sig.get("severity", 0) >= 3
+        ],
+        "network": {
+            "domains": [d["domain"] for d in raw_cape_json.get("network", {}).get("domains", [])] if isinstance(raw_cape_json.get("network", {}).get("domains"), list) else [],
+            "http_uris": [h["uri"] for h in raw_cape_json.get("network", {}).get("http", [])] if isinstance(raw_cape_json.get("network", {}).get("http"), list) else [],
+        },
+        "indicators": {
+            "mutexes": raw_cape_json.get("behavior", {}).get("summary", {}).get("mutexes", []) if isinstance(raw_cape_json.get("behavior", {}).get("summary"), dict) else [],
+            "commands": raw_cape_json.get("behavior", {}).get("summary", {}).get("executed_commands", []) if isinstance(raw_cape_json.get("behavior", {}).get("summary"), dict) else []
+        }
+    }
+
+def _apply_lean_report(result):
+    if isinstance(result, dict):
+        if result.get("error") is False and "data" in result:
+            if isinstance(result["data"], list):
+                result["data"] = [get_lean_cape_report(item) for item in result["data"]]
+            elif isinstance(result["data"], dict):
+                result["data"] = get_lean_cape_report(result["data"])
+        elif "info" in result:
+             return get_lean_cape_report(result)
+    elif isinstance(result, list):
+        return [get_lean_cape_report(item) for item in result]
+    return result
+
 @mcp_tool("tasksearch")
-async def search_task(hash_value: str, token: str = "") -> str:
+async def search_task(hash_value: str, lean: bool = True, token: str = "") -> str:
     """Search for tasks by MD5, SHA1, or SHA256."""
+    if not re.match(r"^[a-fA-F0-9]+$", hash_value):
+        return json.dumps({"error": True, "message": "Invalid hash value provided. Only hexadecimal characters are allowed."}, indent=2)
+
     algo = "md5"
     if len(hash_value) == 40:
         algo = "sha1"
     elif len(hash_value) == 64:
         algo = "sha256"
 
     result = await _request("GET", f"tasks/search/{algo}/{hash_value}/", token=token)
+    if lean:
+        result = _apply_lean_report(result)
     return json.dumps(result, indent=2)
 
 @mcp_tool("extendedtasksearch")
-async def extended_search(option: str, argument: str, token: str = "") -> str:
+async def extended_search(option: str, argument: str, lean: bool = True, token: str = "") -> str:
     """
     Search tasks using extended options.
     Options include: id, name, type, string, ssdeep, crc32, file, command, resolvedapi, key, mutex, domain, ip, signature, signame, etc.
     """
     data = {"option": option, "argument": argument}
+    if lean:
+        data["lean"] = True
     result = await _request("POST", "tasks/extendedsearch/", token=token, data=data)
+    if lean:
+        result = _apply_lean_report(result)
     return json.dumps(result, indent=2)
 
 @mcp_tool("extendedtasksearch")
@@ -430,7 +473,25 @@ async def get_statistics(days: int = 7, token: str = "") -> str:
 
 @mcp_tool("taskreport")
 async def get_task_report(task_id: int, format: str = "json", token: str = "") -> str:
-    """Get the analysis report for a task (json, lite, maec, metadata)."""
+    """Get the analysis report for a task (json, lite, maec, metadata, lean)."""
+    allowed_formats = {"json", "lite", "maec", "metadata", "lean"}
+    if format not in allowed_formats:
+        return json.dumps({"error": True, "message": f"Invalid format provided. Allowed formats: {', '.join(allowed_formats)}"}, indent=2)
+
+    if format == "lean":
+        data = {"option": "id", "argument": str(task_id), "lean": True}
+        result = await _request("POST", "tasks/extendedsearch/", token=token, data=data)
+
+        # Extract the single task report from the search results
+        if isinstance(result, dict) and not result.get("error") and isinstance(result.get("data"), list):
+            if len(result["data"]) > 0:
+                result["data"] = result["data"][0]
+            else:
+                result = {"error": True, "message": "Task report not found via lean search."}
+
+        result = _apply_lean_report(result)
+        return json.dumps(result, indent=2)
+
     result = await _request("GET", f"tasks/get/report/{task_id}/{format}/", token=token)
     return json.dumps(result, indent=2)
 
@@ -516,11 +577,15 @@ async def download_task_fullmemory(task_id: int, destination: str, token: str =
 @mcp_tool("fileview")
 async def view_file(hash_value: str, hash_type: str = "sha256", token: str = "") -> str:
     """View information about a file in the database."""
+    if not re.match(r"^[a-fA-F0-9]+$", hash_value):
+        return json.dumps({"error": True, "message": "Invalid hash value provided. Only hexadecimal characters are allowed."}, indent=2)
     return await _request("GET", f"files/view/{hash_type}/{hash_value}/", token=token)
 
 @mcp_tool("sampledl")
 async def download_sample(hash_value: str, destination: str, hash_type: str = "sha256", token: str = "") -> str:
     """Download a sample from the database."""
+    if not re.match(r"^[a-fA-F0-9]+$", hash_value):
+        return json.dumps({"error": True, "message": "Invalid hash value provided. Only hexadecimal characters are allowed."}, indent=2)
     return await _download_file(f"files/get/{hash_type}/{hash_value}/", destination, f"{hash_value}.bin", token=token)
 
 @mcp_tool("machinelist")
diff --git a/web/apiv2/views.py b/web/apiv2/views.py
@@ -53,6 +53,7 @@
     statistics,
     validate_task,
 )
+from mcp.filters import lean_search_filters
 from lib.cuckoo.core.database import Database, _Database
 from lib.cuckoo.core.data.task import (
     TASK_RECOVERED,
@@ -756,7 +757,8 @@ def ext_tasks_search(request):
             value = tmp_value
             del tmp_value
         try:
-            records = perform_search(term, value, user_id=request.user.id, privs=request.user.is_staff, web=False)
+            projection = lean_search_filters if request.data.get("lean") else None
+            records = perform_search(term, value, user_id=request.user.id, privs=request.user.is_staff, web=False, projection=projection)
         except ValueError:
             if not term:
                 resp = {"error": True, "error_value": "No option provided."}

Original file line number	Diff line number	Diff line change
`@@ -1422,7 +1422,7 @@ def perform_search(`
`1422`	`1422`	`# Stage 8: Make the task doc the new root`
`1423`	`1423`	`{"$replaceRoot": {"newRoot": "$task_doc"}},`
`1424`	`1424`	`# Stage 9: Add your custom projection`
`1425`		`- {"$project": perform_search_filters},`
	`1425`	`+ {"$project": projection or perform_search_filters},`
`1426`	`1426`	`]`
`1427`	`1427`	`retval = list(mongo_aggregate(FILES_COLL, pipeline))`
`1428`	`1428`	`if not retval:`