Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/cuckoo/common/network_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,10 +548,11 @@ def winhttp_finalize_sessions(state):
sessions_by_domain_keys[dom].add(key)

if sessions_by_domain:
sessions_list = [{"host": dom, "events": evts} for dom, evts in sessions_by_domain.items()]
out.append({
"process_id": p.get("process_id"),
"process_name": p.get("process_name", ""),
"sessions": sessions_by_domain,
"sessions": sessions_list,
})

return out
30 changes: 30 additions & 0 deletions modules/processing/CAPE.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,14 @@ def process_file(self, file_path, append_file, metadata: dict, *, category: str,

file_info["options_hash"] = options_hash

# GravityRAT is infector so it will produce a lot of files. we don't need them
if category == "dropped" and any("GravityRAT" in i.get("name", "") for i in file_info.get("cape_yara", [])):
# delete file and continue
log.info("GravityRAT detected, removing file: %s", file_path)
with suppress(OSError):
os.remove(file_path)
return

if category in ("static", "file"):
file_info["name"] = Path(self.task["target"]).name

Expand Down Expand Up @@ -440,6 +448,28 @@ def run(self):
self.process_file(
self.file_path, False, meta.get(self.file_path, {}), category=self.task["category"], duplicated=duplicated
)
if "target" not in self.results:
target_restored = False
try:
db_analysis = mongo_find_one("analysis", {"info.id": int(self.task["id"])}, {"target": 1, "_id": 0})
if db_analysis and "target" in db_analysis:
self.results["target"] = db_analysis["target"]
target_restored = True
log.info("Restored missing target info from MongoDB analysis collection")
except Exception as e:
log.error("Failed to restore target info from MongoDB: %s", e)

if not target_restored:
json_path = os.environ.get("CAPE_REPORT") or os.path.join(self.reports_path, "report.json")
if path_exists(json_path):
try:
with open(json_path, "r", encoding="utf-8") as f:
report_data = json.load(f)
if "target" in report_data:
self.results["target"] = report_data["target"]
log.info("Restored missing target info from existing report.json")
except Exception as e:
log.error("Failed to restore target info from existing report: %s", e)

for folder in ("CAPE_path", "procdump_path", "dropped_path", "package_files"):
category = folder.replace("_path", "").replace("_files", "")
Expand Down
16 changes: 10 additions & 6 deletions modules/processing/behavior.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,14 +1350,15 @@ def run(self):
# BSON/JSON keys must be strings.
# Let's convert tuple keys to string representation "ip:port"

endpoint_map_str = {}
for (ip, port), entries in self.endpoint_map.items():
endpoint_map_str[f"{ip}:{port}"] = entries
endpoint_map_list = [{"ip_port": f"{ip}:{port}", "pinfo": entries} for (ip, port), entries in self.endpoint_map.items()]

http_host_map_list = [{"host": k, "pinfo": v} for k, v in self.http_host_map.items()]
dns_intents_list = [{"domain": k, "intents": v} for k, v in self.dns_intents.items()]

return {
"endpoint_map": endpoint_map_str,
"http_host_map": self.http_host_map,
"dns_intents": self.dns_intents,
"endpoint_map": endpoint_map_list,
"http_host_map": http_host_map_list,
"dns_intents": dns_intents_list,
"http_requests": self.http_requests,
"winhttp_sessions": winhttp_finalize_sessions(self._winhttp_state),
}
Expand Down Expand Up @@ -1468,6 +1469,9 @@ def run(self):
instance.event_apicall(call, process)
except Exception:
log.exception('Failure in partial behavior "%s"', instance.key)
# Reset the iterator so reporting modules can read the calls again
with suppress(AttributeError):
process["calls"].reset()

for instance in instances:
try:
Expand Down
54 changes: 43 additions & 11 deletions modules/processing/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@
log = logging.getLogger(__name__)




try:
import re2 as re
except ImportError:
Expand Down Expand Up @@ -1116,21 +1114,55 @@ def _import_ja3_fprints(self):

def _load_network_map(self) -> Dict:
with suppress(Exception):
return self.results.get("behavior", {}).get("network_map") or {}
behavior_net_map = self.results.get("behavior", {}).get("network_map") or {}
if not behavior_net_map:
return {}

# Create a separate dictionary to avoid modifying self.results in place
net_map = behavior_net_map.copy()

raw_http_host_map = net_map.get("http_host_map", {})
if isinstance(raw_http_host_map, list):
net_map["http_host_map"] = {item["host"]: item["pinfo"] for item in raw_http_host_map}

raw_dns_intents = net_map.get("dns_intents", {})
if isinstance(raw_dns_intents, list):
net_map["dns_intents"] = {item["domain"]: item["intents"] for item in raw_dns_intents}

# We need to deep copy winhttp_sessions if we are modifying its internal dicts
raw_winhttp = net_map.get("winhttp_sessions", [])
new_winhttp = []
for p in raw_winhttp:
new_p = dict(p)
raw_sessions = p.get("sessions", {})
if isinstance(raw_sessions, list):
new_p["sessions"] = {item["host"]: item["events"] for item in raw_sessions}
new_winhttp.append(new_p)
net_map["winhttp_sessions"] = new_winhttp

return net_map
return {}

def _reconstruct_endpoint_map(self, raw_map: Dict[str, List[Dict]]) -> Dict[tuple, List[Dict]]:
def _reconstruct_endpoint_map(self, raw_map) -> Dict[tuple, List[Dict]]:
"""
Convert JSON-friendly "ip:port" keys back to (ip, int(port)) tuples.
"""
endpoint_map = {}
for key, val in raw_map.items():
try:
ip, port_str = key.rsplit(":", 1)
port = int(port_str)
endpoint_map[(ip, port)] = val
except (ValueError, IndexError):
continue
if isinstance(raw_map, list):
for item in raw_map:
try:
ip, port_str = item["ip_port"].rsplit(":", 1)
endpoint_map[(ip, int(port_str))] = item["pinfo"]
except (ValueError, IndexError, KeyError):
continue
elif isinstance(raw_map, dict):
for key, val in raw_map.items():
try:
ip, port_str = key.rsplit(":", 1)
port = int(port_str)
endpoint_map[(ip, port)] = val
except (ValueError, IndexError):
continue
return endpoint_map

def _pick_best(self, candidates: List[Dict]) -> Optional[Dict]:
Expand Down
49 changes: 40 additions & 9 deletions modules/reporting/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import gc
import logging

from contextlib import suppress
from lib.cuckoo.common.iocs import dump_iocs
from lib.cuckoo.common.abstracts import Report
from lib.cuckoo.common.exceptions import CuckooDependencyError, CuckooReportError
from modules.reporting.report_doc import ensure_valid_utf8, get_json_document, insert_calls
from lib.cuckoo.common.config import Config

try:
from pymongo.errors import InvalidDocument, OperationFailure
Expand All @@ -22,6 +24,7 @@
MEGABYTE = 0x100000

log = logging.getLogger(__name__)
reporting_conf = Config("reporting")


class MongoDB(Report):
Expand Down Expand Up @@ -76,14 +79,18 @@ def loop_saver(self, report):
if "_id" in keys:
keys.remove("_id")

# We insert the info section first to get an _id
obj_id = mongo_insert_one("analysis", {"info": report["info"]}).inserted_id
keys.remove("info")

for key in keys:
try:
mongo_update_one("analysis", {"_id": obj_id}, {"$set": {key: report[key]}}, bypass_document_validation=True)
# We include info here so that mongo hooks (like normalize_files) can get the task_id
mongo_update_one("analysis", {"_id": obj_id}, {"$set": {key: report[key], "info": report["info"]}}, bypass_document_validation=True)
except InvalidDocument:
log.warning("Investigate your key: %s", key)
except Exception as e:
log.error("Failed to update key %s in loop_saver: %s", key, e)

def run(self, results):
"""Writes report.
Expand All @@ -108,28 +115,49 @@ def run(self, results):
# the original dictionary and possibly compromise the following
# reporting modules.
report = get_json_document(results, self.analysis_path)
if not report or "info" not in report:
log.error("Failed to get JSON document or 'info' key is missing for Task")
return

mongo_delete_data(int(report["info"]["id"]))
log.debug("Deleted previous MongoDB data for Task %s", report["info"]["id"])
local_task_id = int(report["info"].get("id", 0))
if not local_task_id:
log.error("Task ID is missing in report['info']")
return

# trick for distributed api
if results.get("info", {}).get("options", {}).get("main_task_id", ""):
report["info"]["id"] = int(results["info"]["options"]["main_task_id"])
main_task_id = results.get("info", {}).get("options", {}).get("main_task_id")
if main_task_id:
with suppress(ValueError, TypeError):
report["info"]["id"] = int(main_task_id)

if "network" not in report:
report["network"] = {}

if "behavior" not in report or not isinstance(report["behavior"], dict):
report["behavior"] = {"processes": [], "processtree": [], "summary": {}}

# Delete old data just before inserting new one to avoid "missing report" window
# or data loss if insertion fails during preparation (e.g. OOM)
ids_to_delete = {local_task_id, int(report["info"]["id"])}
log.debug("Deleting previous MongoDB data for Task IDs: %s", ids_to_delete)
mongo_delete_data(list(ids_to_delete))

new_processes = insert_calls(report, mongodb=True)
# Store the results in the report.
report["behavior"] = dict(report["behavior"])
report["behavior"]["processes"] = new_processes

# Store iocs as file
if reporting_conf.mongodb.dump_iocs:
dump_iocs(report, local_task_id)

ensure_valid_utf8(report)
gc.collect()

# Store the report and retrieve its object id.
try:
log.debug("Inserting new MongoDB report for Task %s", report["info"]["id"])
mongo_insert_one("analysis", report)

except OperationFailure as e:
# Check for error codes indicating the BSON object was too large
# (10334 BSONObjectTooLarge) or the maximum nested object depth was
Expand All @@ -145,8 +173,6 @@ def run(self, results):
log.error("Deleting behavior process tree parent from results: %s", str(e))
del report["behavior"]["processtree"][0]
mongo_insert_one("analysis", report)
else:
raise CuckooReportError("Failed inserting report in Mongo") from e
except InvalidDocument as e:
if str(e).startswith("cannot encode object") or "must not contain" in str(e):
self.loop_saver(report)
Expand Down Expand Up @@ -193,3 +219,8 @@ def run(self, results):
except Exception as e:
log.error("Failed to delete child key: %s", e)
error_saved = False

if error_saved:
log.error("Failed to insert report into MongoDB even after attempting to fix large documents for Task %s", report["info"]["id"])
except Exception as e:
log.exception("Failed to store report in MongoDB for Task %s: %s", report["info"]["id"], e)
Loading