1414logger = get_logger (__name__ )
1515
1616
17- def _validate_data_path (file_path = None , directory_path = None , data = None ):
17+ def _validate_data_path (file_path = None , directory_path = None , include_pattern = None , exclude_pattern = None , data = None ):
1818 if file_path and directory_path :
1919 raise ValueError ('Can not specify file path and directory path at the same time' )
2020 if file_path and data :
@@ -28,6 +28,51 @@ def _validate_data_path(file_path=None, directory_path=None, data=None):
2828 raise ValueError (f'invalid directory path:{ directory_path } ' )
2929 if file_path and not os .path .isfile (file_path ):
3030 raise ValueError (f'invalid file path:{ file_path } ' )
31+ if not directory_path and include_pattern :
32+ raise ValueError ('--include-pattern need to be used together with --directory-path' )
33+ if not directory_path and exclude_pattern :
34+ raise ValueError ('--exclude-pattern need to be used together with --directory-path' )
35+ if include_pattern and exclude_pattern :
36+ raise ValueError ('--include-pattern and --exclude-pattern are mutually exclusive' )
37+
38+
39+ def _is_file_name_in_patterns (filename , patterns ):
40+ if not filename or not patterns :
41+ return None
42+ import fnmatch
43+ for pattern in patterns :
44+ if fnmatch .fnmatch (filename , pattern ):
45+ return True
46+ return False
47+
48+
49+ def _check_file_include_and_exclude_pattern (filename , include_pattern = None , exclude_pattern = None ):
50+ file_satisfied = True
51+ if include_pattern and not _is_file_name_in_patterns (filename , include_pattern ):
52+ file_satisfied = False
53+ if exclude_pattern and _is_file_name_in_patterns (filename , exclude_pattern ):
54+ file_satisfied = False
55+ return file_satisfied
56+
57+
58+ def _get_files_from_directory (directory_path , recursive = None , include_pattern = None , exclude_pattern = None ):
59+ target_files = []
60+ if recursive :
61+ for root , _ , files in os .walk (directory_path ):
62+ for file in files :
63+ if _check_file_include_and_exclude_pattern (file ,
64+ include_pattern = include_pattern ,
65+ exclude_pattern = exclude_pattern ):
66+ target_files .append (os .path .join (root , file ))
67+ else :
68+ for file in os .listdir (directory_path ):
69+ if _check_file_include_and_exclude_pattern (file ,
70+ include_pattern = include_pattern ,
71+ exclude_pattern = exclude_pattern ):
72+ file = os .path .join (directory_path , file )
73+ if os .path .isfile (file ):
74+ target_files .append (file )
75+ return target_files
3176
3277
3378def _load_built_in_regex_patterns ():
@@ -88,21 +133,17 @@ def _scan_secrets_for_string(data, custom_pattern=None):
88133 return secrets
89134
90135
91- def scan_secrets (file_path = None , directory_path = None , recursive = False , data = None ,
136+ def scan_secrets (file_path = None , directory_path = None , recursive = False ,
137+ include_pattern = None , exclude_pattern = None , data = None ,
92138 save_scan_result = None , scan_result_path = None , custom_pattern = None ):
93- _validate_data_path (file_path = file_path , directory_path = directory_path , data = data )
139+ _validate_data_path (file_path = file_path , directory_path = directory_path ,
140+ include_pattern = include_pattern , exclude_pattern = exclude_pattern , data = data )
94141 target_files = []
95142 scan_results = {}
96143 if directory_path :
97144 directory_path = os .path .abspath (directory_path )
98- if recursive :
99- for root , _ , files in os .walk (directory_path ):
100- target_files .extend (os .path .join (root , file ) for file in files )
101- else :
102- for file in os .listdir (directory_path ):
103- file = os .path .join (directory_path , file )
104- if os .path .isfile (file ):
105- target_files .append (file )
145+ target_files = _get_files_from_directory (directory_path , recursive = recursive ,
146+ include_pattern = include_pattern , exclude_pattern = exclude_pattern )
106147 if file_path :
107148 file_path = os .path .abspath (file_path )
108149 target_files .append (file_path )
@@ -114,7 +155,7 @@ def scan_secrets(file_path=None, directory_path=None, recursive=False, data=None
114155 elif target_files :
115156 for target_file in target_files :
116157 logger .debug ('start scanning secrets for %s' , target_file )
117- with open (target_file ) as f :
158+ with open (target_file , encoding = 'utf8' ) as f :
118159 data = f .read ()
119160 if not data :
120161 continue
@@ -140,41 +181,37 @@ def scan_secrets(file_path=None, directory_path=None, recursive=False, data=None
140181 file_folder = os .path .join (get_azdev_config_dir (), 'scan_results' )
141182 if not os .path .exists (file_folder ):
142183 os .mkdir (file_folder , 0o755 )
143- file_name = file_path or directory_path or datetime .now ().strftime ('%Y%m%d%H%M%S' )
144- result_file_name = 'scan_result_' + file_name .replace ('.' , '_' ) + '.json'
184+ result_file_name = 'scan_result_' + datetime .now ().strftime ('%Y%m%d%H%M%S' ) + '.json'
145185 scan_result_path = os .path .join (file_folder , result_file_name )
146186
147- with open (scan_result_path , 'w' ) as f :
187+ with open (scan_result_path , 'w' , encoding = 'utf8' ) as f :
148188 json .dump (scan_results , f )
149189 logger .debug ('store scanning results in %s' , scan_result_path )
150190 return {'secrets_detected' : True , 'scan_result_path' : os .path .abspath (scan_result_path )}
151191
152192
153193def _get_scan_results_from_saved_file (saved_scan_result_path ,
154- file_path = None , directory_path = None , recursive = False , data = None ):
194+ file_path = None , directory_path = None , recursive = False ,
195+ include_pattern = None , exclude_pattern = None , data = None ):
155196 scan_results = {}
156197 if not os .path .isfile (saved_scan_result_path ):
157198 raise ValueError (f'invalid saved scan result path:{ saved_scan_result_path } ' )
158- with open (saved_scan_result_path ) as f :
199+ with open (saved_scan_result_path , encoding = 'utf8' ) as f :
159200 saved_scan_results = json .load (f )
160201 # filter saved scan results to keep those related with specified file(s)
161- _validate_data_path (file_path = file_path , directory_path = directory_path , data = data )
202+ _validate_data_path (file_path = file_path , directory_path = directory_path ,
203+ include_pattern = include_pattern , exclude_pattern = exclude_pattern , data = data )
162204 if file_path :
163205 file_path = os .path .abspath (file_path )
164206 if file_path in saved_scan_results :
165207 scan_results [file_path ] = saved_scan_results [file_path ]
166208 elif directory_path :
167- if recursive :
168- for root , _ , files in os .walk (directory_path ):
169- for file in files :
170- file_full = os .path .join (root , file )
171- if file_full in saved_scan_results :
172- scan_results [file_full ] = saved_scan_results [file_full ]
173- else :
174- for file in os .listdir (directory_path ):
175- file_full = os .path .join (directory_path , file )
176- if file_full in saved_scan_results :
177- scan_results [file_full ] = saved_scan_results [file_full ]
209+ directory_path = os .path .abspath (directory_path )
210+ target_files = _get_files_from_directory (directory_path , recursive = recursive ,
211+ include_pattern = include_pattern , exclude_pattern = exclude_pattern )
212+ for target_file in target_files :
213+ if target_file in saved_scan_results :
214+ scan_results [target_file ] = saved_scan_results [target_file ]
178215 else :
179216 scan_results ['raw_data' ] = saved_scan_results ['raw_data' ]
180217
@@ -193,19 +230,26 @@ def _mask_secret_for_string(data, secret, redaction_type=None):
193230 return data
194231
195232
196- def mask_secrets (file_path = None , directory_path = None , recursive = False , data = None ,
233+ def mask_secrets (file_path = None , directory_path = None , recursive = False ,
234+ include_pattern = None , exclude_pattern = None , data = None ,
197235 save_scan_result = None , scan_result_path = None , custom_pattern = None ,
198236 saved_scan_result_path = None , redaction_type = 'FIXED_VALUE' , yes = None ):
199237 scan_results = {}
200238 if saved_scan_result_path :
201- scan_results = _get_scan_results_from_saved_file (saved_scan_result_path , file_path = file_path ,
202- directory_path = directory_path , recursive = recursive , data = data )
239+ scan_results = _get_scan_results_from_saved_file (saved_scan_result_path ,
240+ file_path = file_path ,
241+ directory_path = directory_path ,
242+ recursive = recursive ,
243+ include_pattern = include_pattern ,
244+ exclude_pattern = exclude_pattern ,
245+ data = data )
203246 else :
204- scan_response = scan_secrets (file_path = file_path , directory_path = directory_path , recursive = recursive , data = data ,
247+ scan_response = scan_secrets (file_path = file_path , directory_path = directory_path , recursive = recursive ,
248+ include_pattern = include_pattern , exclude_pattern = exclude_pattern , data = data ,
205249 save_scan_result = save_scan_result , scan_result_path = scan_result_path ,
206250 custom_pattern = custom_pattern )
207251 if save_scan_result and scan_response ['scan_result_path' ]:
208- with open (scan_response ['scan_result_path' ]) as f :
252+ with open (scan_response ['scan_result_path' ], encoding = 'utf8' ) as f :
209253 scan_results = json .load (f )
210254 elif not save_scan_result :
211255 scan_results = scan_response ['scan_results' ]
@@ -235,13 +279,13 @@ def mask_secrets(file_path=None, directory_path=None, recursive=False, data=None
235279 return mask_result
236280
237281 for scan_file_path , secrets in scan_results .items ():
238- with open (scan_file_path , 'r' ) as f :
282+ with open (scan_file_path , 'r' , encoding = 'utf8' ) as f :
239283 content = f .read ()
240284 if not content :
241285 continue
242286 for secret in secrets :
243287 content = _mask_secret_for_string (content , secret , redaction_type )
244- with open (scan_file_path , 'w' ) as f :
288+ with open (scan_file_path , 'w' , encoding = 'utf8' ) as f :
245289 f .write (content )
246290 mask_result ['mask' ] = True
247291 return mask_result
0 commit comments