33# See the file 'docs/LICENSE' for copying permission.
44
55import copy
6+ import datetime
67import logging
78import os
89import re
@@ -49,6 +50,8 @@ def ensure_valid_utf8(obj):
4950 v .encode ()
5051 except UnicodeEncodeError :
5152 obj [k ] = "" .join (str (ord (_ )) for _ in v ).encode ()
53+ elif isinstance (v , datetime .datetime ):
54+ obj [k ] = v .strftime ("%Y-%m-%d %H:%M:%S" )
5255 else :
5356 ensure_valid_utf8 (v )
5457
@@ -57,13 +60,47 @@ def get_json_document(results, analysis_path):
5760 # Create a copy of the dictionary. This is done in order to not modify
5861 # the original dictionary and possibly
5962 # compromise the following reporting modules.
60- try :
61- report = copy .deepcopy (results )
62- except AttributeError :
63- if "memory" in results :
64- del results ["memory" ]
65- log .error ("Deleting Volatility results" )
66- report = copy .deepcopy (results )
63+ # We use a shallow copy of the top level and common sub-dicts to avoid
64+ # the extremely expensive deepcopy which often causes OOM on large reports.
65+ report = results .copy ()
66+
67+ # Manually copy sections that are often modified by reporting modules
68+ for section in (
69+ "info" ,
70+ "behavior" ,
71+ "network" ,
72+ "suricata" ,
73+ "target" ,
74+ "CAPE" ,
75+ "static" ,
76+ "procdump" ,
77+ "dropped" ,
78+ "strings" ,
79+ "signatures" ,
80+ "statistics" ,
81+ "memory" ,
82+ ):
83+ if section in report :
84+ try :
85+ if isinstance (report [section ], dict ):
86+ report [section ] = report [section ].copy ()
87+ elif isinstance (report [section ], list ):
88+ report [section ] = list (report [section ])
89+ except Exception as e :
90+ log .warning ("Failed to copy section %s: %s" , section , e )
91+ if section == "memory" :
92+ log .error ("Deleting 'memory' key from report due to copy failure" )
93+ del report ["memory" ]
94+
95+ # Deeper copy for behavior processes to avoid modifying metadata
96+ if "behavior" in report and isinstance (report .get ("behavior" ), dict ):
97+ if "processes" in report ["behavior" ]:
98+ report ["behavior" ]["processes" ] = [p .copy () for p in report ["behavior" ]["processes" ]]
99+ if "processtree" in report ["behavior" ]:
100+ try :
101+ report ["behavior" ]["processtree" ] = copy .deepcopy (report ["behavior" ]["processtree" ])
102+ except Exception as e :
103+ log .warning ("Failed to deepcopy processtree: %s" , e )
67104
68105 if "network" not in report :
69106 report ["network" ] = {}
@@ -125,7 +162,7 @@ def insert_calls(report, elastic_db=None, mongodb=False):
125162 chunk_id = None
126163 # If the chunk size is CHUNK_CALL_SIZE or if the loop is completed then store the chunk in DB.
127164 if len (chunk ) == CHUNK_CALL_SIZE :
128- to_insert = {"pid" : process ["process_id" ], "calls" : chunk , "task_id" : report ["info" ]["id" ]}
165+ to_insert = {"pid" : process ["process_id" ], "calls" : chunk , "task_id" : report ["info" ]["id" ]}
129166 with suppress (Exception ):
130167 chunk_id = mongo_insert_one ("calls" , to_insert ).inserted_id
131168 if chunk_id :
0 commit comments