Skip to content

Commit 917ca54

Browse files
azdev scan/mask: Add --include-pattern and --exclude-pattern (#465)
* add scan/mask command * code implementation * refine code and add tests * pylint * flake8 * address comments * Add --include-pattern and --exclude-pattern * tox * Apply suggestions from code review Co-authored-by: ZelinWang <zelinwang@microsoft.com> --------- Co-authored-by: ZelinWang <zelinwang@microsoft.com>
1 parent 8738442 commit 917ca54

6 files changed

Lines changed: 108 additions & 39 deletions

File tree

HISTORY.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
33
Release History
44
===============
5+
0.1.75
6+
++++++
7+
* `azdev scan/mask`: Add `--include-pattern` and `--exclude-pattern` to support filtering files within directory
8+
59
0.1.74
610
++++++
711
* `azdev scan/mask`: New commands for scanning and masking secrets for files or string

azdev/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
# license information.
55
# -----------------------------------------------------------------------------
66

7-
__VERSION__ = '0.1.74'
7+
__VERSION__ = '0.1.75'

azdev/help.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@
193193
- name: Recursively scan secrets for a directory and save results to specific file
194194
text: |
195195
azdev scan --directory-path /path/to/my/folder --recursive --scan-result-path /path/to/scan_result.json
196+
- name: Scan secrets for all json files and yaml files within a directory
197+
text: |
198+
azdev scan --directory-path /path/to/my/folder --include-pattern *.yaml *.json
196199
"""
197200

198201
helps['mask'] = """

azdev/operations/secret.py

Lines changed: 80 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
logger = get_logger(__name__)
1515

1616

17-
def _validate_data_path(file_path=None, directory_path=None, data=None):
17+
def _validate_data_path(file_path=None, directory_path=None, include_pattern=None, exclude_pattern=None, data=None):
1818
if file_path and directory_path:
1919
raise ValueError('Can not specify file path and directory path at the same time')
2020
if file_path and data:
@@ -28,6 +28,51 @@ def _validate_data_path(file_path=None, directory_path=None, data=None):
2828
raise ValueError(f'invalid directory path:{directory_path}')
2929
if file_path and not os.path.isfile(file_path):
3030
raise ValueError(f'invalid file path:{file_path}')
31+
if not directory_path and include_pattern:
32+
raise ValueError('--include-pattern need to be used together with --directory-path')
33+
if not directory_path and exclude_pattern:
34+
raise ValueError('--exclude-pattern need to be used together with --directory-path')
35+
if include_pattern and exclude_pattern:
36+
raise ValueError('--include-pattern and --exclude-pattern are mutually exclusive')
37+
38+
39+
def _is_file_name_in_patterns(filename, patterns):
40+
if not filename or not patterns:
41+
return None
42+
import fnmatch
43+
for pattern in patterns:
44+
if fnmatch.fnmatch(filename, pattern):
45+
return True
46+
return False
47+
48+
49+
def _check_file_include_and_exclude_pattern(filename, include_pattern=None, exclude_pattern=None):
50+
file_satisfied = True
51+
if include_pattern and not _is_file_name_in_patterns(filename, include_pattern):
52+
file_satisfied = False
53+
if exclude_pattern and _is_file_name_in_patterns(filename, exclude_pattern):
54+
file_satisfied = False
55+
return file_satisfied
56+
57+
58+
def _get_files_from_directory(directory_path, recursive=None, include_pattern=None, exclude_pattern=None):
59+
target_files = []
60+
if recursive:
61+
for root, _, files in os.walk(directory_path):
62+
for file in files:
63+
if _check_file_include_and_exclude_pattern(file,
64+
include_pattern=include_pattern,
65+
exclude_pattern=exclude_pattern):
66+
target_files.append(os.path.join(root, file))
67+
else:
68+
for file in os.listdir(directory_path):
69+
if _check_file_include_and_exclude_pattern(file,
70+
include_pattern=include_pattern,
71+
exclude_pattern=exclude_pattern):
72+
file = os.path.join(directory_path, file)
73+
if os.path.isfile(file):
74+
target_files.append(file)
75+
return target_files
3176

3277

3378
def _load_built_in_regex_patterns():
@@ -88,21 +133,17 @@ def _scan_secrets_for_string(data, custom_pattern=None):
88133
return secrets
89134

90135

91-
def scan_secrets(file_path=None, directory_path=None, recursive=False, data=None,
136+
def scan_secrets(file_path=None, directory_path=None, recursive=False,
137+
include_pattern=None, exclude_pattern=None, data=None,
92138
save_scan_result=None, scan_result_path=None, custom_pattern=None):
93-
_validate_data_path(file_path=file_path, directory_path=directory_path, data=data)
139+
_validate_data_path(file_path=file_path, directory_path=directory_path,
140+
include_pattern=include_pattern, exclude_pattern=exclude_pattern, data=data)
94141
target_files = []
95142
scan_results = {}
96143
if directory_path:
97144
directory_path = os.path.abspath(directory_path)
98-
if recursive:
99-
for root, _, files in os.walk(directory_path):
100-
target_files.extend(os.path.join(root, file) for file in files)
101-
else:
102-
for file in os.listdir(directory_path):
103-
file = os.path.join(directory_path, file)
104-
if os.path.isfile(file):
105-
target_files.append(file)
145+
target_files = _get_files_from_directory(directory_path, recursive=recursive,
146+
include_pattern=include_pattern, exclude_pattern=exclude_pattern)
106147
if file_path:
107148
file_path = os.path.abspath(file_path)
108149
target_files.append(file_path)
@@ -114,7 +155,7 @@ def scan_secrets(file_path=None, directory_path=None, recursive=False, data=None
114155
elif target_files:
115156
for target_file in target_files:
116157
logger.debug('start scanning secrets for %s', target_file)
117-
with open(target_file) as f:
158+
with open(target_file, encoding='utf8') as f:
118159
data = f.read()
119160
if not data:
120161
continue
@@ -140,41 +181,37 @@ def scan_secrets(file_path=None, directory_path=None, recursive=False, data=None
140181
file_folder = os.path.join(get_azdev_config_dir(), 'scan_results')
141182
if not os.path.exists(file_folder):
142183
os.mkdir(file_folder, 0o755)
143-
file_name = file_path or directory_path or datetime.now().strftime('%Y%m%d%H%M%S')
144-
result_file_name = 'scan_result_' + file_name.replace('.', '_') + '.json'
184+
result_file_name = 'scan_result_' + datetime.now().strftime('%Y%m%d%H%M%S') + '.json'
145185
scan_result_path = os.path.join(file_folder, result_file_name)
146186

147-
with open(scan_result_path, 'w') as f:
187+
with open(scan_result_path, 'w', encoding='utf8') as f:
148188
json.dump(scan_results, f)
149189
logger.debug('store scanning results in %s', scan_result_path)
150190
return {'secrets_detected': True, 'scan_result_path': os.path.abspath(scan_result_path)}
151191

152192

153193
def _get_scan_results_from_saved_file(saved_scan_result_path,
154-
file_path=None, directory_path=None, recursive=False, data=None):
194+
file_path=None, directory_path=None, recursive=False,
195+
include_pattern=None, exclude_pattern=None, data=None):
155196
scan_results = {}
156197
if not os.path.isfile(saved_scan_result_path):
157198
raise ValueError(f'invalid saved scan result path:{saved_scan_result_path}')
158-
with open(saved_scan_result_path) as f:
199+
with open(saved_scan_result_path, encoding='utf8') as f:
159200
saved_scan_results = json.load(f)
160201
# filter saved scan results to keep those related with specified file(s)
161-
_validate_data_path(file_path=file_path, directory_path=directory_path, data=data)
202+
_validate_data_path(file_path=file_path, directory_path=directory_path,
203+
include_pattern=include_pattern, exclude_pattern=exclude_pattern, data=data)
162204
if file_path:
163205
file_path = os.path.abspath(file_path)
164206
if file_path in saved_scan_results:
165207
scan_results[file_path] = saved_scan_results[file_path]
166208
elif directory_path:
167-
if recursive:
168-
for root, _, files in os.walk(directory_path):
169-
for file in files:
170-
file_full = os.path.join(root, file)
171-
if file_full in saved_scan_results:
172-
scan_results[file_full] = saved_scan_results[file_full]
173-
else:
174-
for file in os.listdir(directory_path):
175-
file_full = os.path.join(directory_path, file)
176-
if file_full in saved_scan_results:
177-
scan_results[file_full] = saved_scan_results[file_full]
209+
directory_path = os.path.abspath(directory_path)
210+
target_files = _get_files_from_directory(directory_path, recursive=recursive,
211+
include_pattern=include_pattern, exclude_pattern=exclude_pattern)
212+
for target_file in target_files:
213+
if target_file in saved_scan_results:
214+
scan_results[target_file] = saved_scan_results[target_file]
178215
else:
179216
scan_results['raw_data'] = saved_scan_results['raw_data']
180217

@@ -193,19 +230,26 @@ def _mask_secret_for_string(data, secret, redaction_type=None):
193230
return data
194231

195232

196-
def mask_secrets(file_path=None, directory_path=None, recursive=False, data=None,
233+
def mask_secrets(file_path=None, directory_path=None, recursive=False,
234+
include_pattern=None, exclude_pattern=None, data=None,
197235
save_scan_result=None, scan_result_path=None, custom_pattern=None,
198236
saved_scan_result_path=None, redaction_type='FIXED_VALUE', yes=None):
199237
scan_results = {}
200238
if saved_scan_result_path:
201-
scan_results = _get_scan_results_from_saved_file(saved_scan_result_path, file_path=file_path,
202-
directory_path=directory_path, recursive=recursive, data=data)
239+
scan_results = _get_scan_results_from_saved_file(saved_scan_result_path,
240+
file_path=file_path,
241+
directory_path=directory_path,
242+
recursive=recursive,
243+
include_pattern=include_pattern,
244+
exclude_pattern=exclude_pattern,
245+
data=data)
203246
else:
204-
scan_response = scan_secrets(file_path=file_path, directory_path=directory_path, recursive=recursive, data=data,
247+
scan_response = scan_secrets(file_path=file_path, directory_path=directory_path, recursive=recursive,
248+
include_pattern=include_pattern, exclude_pattern=exclude_pattern, data=data,
205249
save_scan_result=save_scan_result, scan_result_path=scan_result_path,
206250
custom_pattern=custom_pattern)
207251
if save_scan_result and scan_response['scan_result_path']:
208-
with open(scan_response['scan_result_path']) as f:
252+
with open(scan_response['scan_result_path'], encoding='utf8') as f:
209253
scan_results = json.load(f)
210254
elif not save_scan_result:
211255
scan_results = scan_response['scan_results']
@@ -235,13 +279,13 @@ def mask_secrets(file_path=None, directory_path=None, recursive=False, data=None
235279
return mask_result
236280

237281
for scan_file_path, secrets in scan_results.items():
238-
with open(scan_file_path, 'r') as f:
282+
with open(scan_file_path, 'r', encoding='utf8') as f:
239283
content = f.read()
240284
if not content:
241285
continue
242286
for secret in secrets:
243287
content = _mask_secret_for_string(content, secret, redaction_type)
244-
with open(scan_file_path, 'w') as f:
288+
with open(scan_file_path, 'w', encoding='utf8') as f:
245289
f.write(content)
246290
mask_result['mask'] = True
247291
return mask_result

azdev/operations/tests/test_scan_and_mask.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,16 @@ def test_scan_directory(self):
136136
self.assertEqual(len(result['scan_results'][info_json_file]), 1)
137137
self.assertEqual(result['scan_results'][info_json_file][0]['secret_name'], 'EmailAddress')
138138

139+
result = scan_secrets(directory_path=file_folder, recursive=True, include_pattern=['*.json'], custom_pattern=json.dumps(custom_pattern))
140+
self.assertTrue(result['secrets_detected'])
141+
self.assertNotIn(email_string_file, result['scan_results'])
142+
self.assertIn(info_json_file, result['scan_results'])
143+
144+
result = scan_secrets(directory_path=file_folder, recursive=True, exclude_pattern=['*.json'], custom_pattern=json.dumps(custom_pattern))
145+
self.assertTrue(result['secrets_detected'])
146+
self.assertIn(email_string_file, result['scan_results'])
147+
self.assertNotIn(info_json_file, result['scan_results'])
148+
139149
def test_mask(self):
140150
test_data = "This is a test string with email fooabc@gmail.com and sas sv=2022-11-02&sr=c&sig=a9Y5mpQgKUiiPzHFNdDm53Na6UndTrNMCsRZd6b2oV4%3D"
141151
result = mask_secrets(data=test_data, yes=True)

azdev/params.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,22 @@ def load_arguments(self, _):
109109
help='Path of the folder you want to scan secrets for')
110110
c.argument('recursive', options_list=['--recursive', '-r'],
111111
help='Scan the directory recursively')
112+
c.argument('include_pattern', options_list=['--include-pattern', '--include'], nargs='*',
113+
help="Space separated patterns used for files you want to include within the directory. "
114+
"The supported patterns are '*', '?', '[seq]', and '[!seq]'. "
115+
"For more information, please refer to https://docs.python.org/3/library/fnmatch.html")
116+
c.argument('exclude_pattern', options_list=['--exclude-pattern', '--exclude'], nargs='*',
117+
help="Space separated patterns used for files you want to exclude within the directory. "
118+
"The supported patterns are '*', '?', '[seq]', and '[!seq]'. "
119+
"For more information, please refer to https://docs.python.org/3/library/fnmatch.html")
112120
c.argument('data', help='Raw string you want to scan secrets for')
113-
c.argument('save_scan_result', options_list=['--save-scan-result', '--save'], type=bool,
121+
c.argument('save_scan_result', options_list=['--save-scan-result', '--save'], action='store_true',
114122
help='Whether to save scan result to file or not')
115123
c.argument('scan_result_path', options_list=['--scan-result-path', '--result'],
116124
help='Path for the file you want to save the result in. '
117125
'If specified, --save-scan-result will be True anyway. '
118126
'If not speficied but set --save-scan-result to True, '
119-
'the file will be saved as `scan_result_xxx.json` in your `.azdev` directory ')
127+
'the file will be saved as `scan_result_YYYYmmddHHMMSS.json` in your `.azdev` directory ')
120128
c.argument('custom_pattern',
121129
help='Additional patterns you want to apply or built-in patterns you want to exclude '
122130
'for scanning. Can be json string or path to the json file.')

0 commit comments

Comments
 (0)