11import os
22import logging
3+ import tempfile
4+ import zipfile
35from lib .cuckoo .common .constants import CUCKOO_ROOT
46from lib .cuckoo .common .abstracts import Report
57from lib .cuckoo .common .exceptions import CuckooReportError
@@ -40,7 +42,6 @@ def run(self, results):
4042 )
4143 return
4244
43- # Read configuration options from gcs.conf
4445 # Read configuration options from gcs.conf and validate them
4546 bucket_name = self .options .get ("bucket_name" )
4647 if not bucket_name :
@@ -66,8 +67,7 @@ def run(self, results):
6667 exclude_dirs_str = self .options .get ("exclude_dirs" , "" )
6768 exclude_files_str = self .options .get ("exclude_files" , "" )
6869
69- # --- NEW: Parse the exclusion strings into sets for efficient lookups ---
70- # The `if item.strip()` ensures we don't have empty strings from trailing commas
70+ # Parse the exclusion strings into sets for efficient lookups
7171 exclude_dirs = {item .strip () for item in exclude_dirs_str .split ("," ) if item .strip ()}
7272 exclude_files = {item .strip () for item in exclude_files_str .split ("," ) if item .strip ()}
7373
@@ -76,6 +76,9 @@ def run(self, results):
7676 if exclude_files :
7777 log .debug ("GCS reporting will exclude files: %s" , exclude_files )
7878
79+ # Get the upload mode, defaulting to 'file' for backward compatibility
80+ mode = self .options .get ("mode" , "file" )
81+
7982 try :
8083 # --- Authentication ---
8184 log .debug ("Authenticating with Google Cloud Storage..." )
@@ -87,39 +90,64 @@ def run(self, results):
8790 "The specified GCS bucket '%s' does not exist or you don't have permission to access it." , bucket_name
8891 )
8992
90- # --- File Upload ---
91- # Use the analysis ID as a "folder" in the bucket
9293 analysis_id = results .get ("info" , {}).get ("id" )
9394 if not analysis_id :
9495 raise CuckooReportError ("Could not get analysis ID from results." )
9596
96- log .debug ("Uploading files for analysis ID %d to GCS bucket '%s'" , analysis_id , bucket_name )
97-
98- # self.analysis_path is the path to the analysis results directory
99- # e.g., /opt/cape/storage/analyses/123/
10097 source_directory = self .analysis_path
10198
102- for root , dirs , files in os .walk (source_directory ):
103- # We modify 'dirs' in-place to prevent os.walk from descending into them.
104- # This is the most efficient way to skip entire directory trees.
105- dirs [:] = [d for d in dirs if d not in exclude_dirs ]
106-
107- for filename in files :
108- # --- NEW: File Exclusion Logic ---
109- if filename in exclude_files :
110- log .debug ("Skipping excluded file: %s" , os .path .join (root , filename ))
111- continue # Skip to the next file
112-
113- local_path = os .path .join (root , filename )
114- relative_path = os .path .relpath (local_path , source_directory )
115- blob_name = f"{ analysis_id } /{ relative_path } "
116-
117- log .debug ("Uploading '%s' to '%s'" , local_path , blob_name )
118-
119- blob = bucket .blob (blob_name )
120- blob .upload_from_filename (local_path )
121-
122- log .info ("Successfully uploaded files for analysis %d to GCS." , analysis_id )
99+ if mode == "zip" :
100+ self .upload_zip_archive (bucket , analysis_id , source_directory , exclude_dirs , exclude_files )
101+ elif mode == "file" :
102+ self .upload_files_individually (bucket , analysis_id , source_directory , exclude_dirs , exclude_files )
103+ else :
104+ raise CuckooReportError ("Invalid GCS upload mode specified: %s. Must be 'file' or 'zip'." , mode )
123105
124106 except Exception as e :
125- raise CuckooReportError ("Failed to upload report to GCS: %s" , str (e ))
107+ raise CuckooReportError (f"Failed to upload report to GCS: { e } " ) from e
108+
109+ def _iter_files_to_upload (self , source_directory , exclude_dirs , exclude_files ):
110+ """Generator that yields files to be uploaded, skipping excluded ones."""
111+ for root , dirs , files in os .walk (source_directory ):
112+ # Exclude specified directories
113+ dirs [:] = [d for d in dirs if d not in exclude_dirs ]
114+ for filename in files :
115+ # Exclude specified files
116+ if filename in exclude_files :
117+ log .debug ("Skipping excluded file: %s" , os .path .join (root , filename ))
118+ continue
119+
120+ local_path = os .path .join (root , filename )
121+ relative_path = os .path .relpath (local_path , source_directory )
122+ yield local_path , relative_path
123+
124+ def upload_zip_archive (self , bucket , analysis_id , source_directory , exclude_dirs , exclude_files ):
125+ """Compresses and uploads the analysis directory as a single zip file."""
126+ log .debug ("Compressing and uploading files for analysis ID %d to GCS bucket '%s'" , analysis_id , bucket .name )
127+ zip_name = "%s.zip" % analysis_id
128+ blob_name = zip_name
129+
130+ with tempfile .NamedTemporaryFile (delete = False , suffix = ".zip" ) as tmp_zip_file :
131+ tmp_zip_file_name = tmp_zip_file .name
132+ with zipfile .ZipFile (tmp_zip_file , "w" , zipfile .ZIP_DEFLATED ) as archive :
133+ for local_path , relative_path in self ._iter_files_to_upload (source_directory , exclude_dirs , exclude_files ):
134+ archive .write (local_path , relative_path )
135+
136+ try :
137+ log .debug ("Uploading '%s' to '%s'" , tmp_zip_file_name , blob_name )
138+ blob = bucket .blob (blob_name )
139+ blob .upload_from_filename (tmp_zip_file_name )
140+ finally :
141+ os .unlink (tmp_zip_file_name )
142+ log .info ("Successfully uploaded archive for analysis %d to GCS." , analysis_id )
143+
144+ def upload_files_individually (self , bucket , analysis_id , source_directory , exclude_dirs , exclude_files ):
145+ """Uploads analysis files individually to the GCS bucket."""
146+ log .debug ("Uploading files for analysis ID %d to GCS bucket '%s'" , analysis_id , bucket .name )
147+ for local_path , relative_path in self ._iter_files_to_upload (source_directory , exclude_dirs , exclude_files ):
148+ blob_name = f"{ analysis_id } /{ relative_path } "
149+ log .debug ("Uploading '%s' to '%s'" , local_path , blob_name )
150+ blob = bucket .blob (blob_name )
151+ blob .upload_from_filename (local_path )
152+
153+ log .info ("Successfully uploaded files for analysis %d to GCS." , analysis_id )
0 commit comments