Skip to content

Commit 19e0e7d

Browse files
committed
{CI} Fix merge-base diff in linter, style, and scan jobs
1 parent f3ce3d7 commit 19e0e7d

File tree

4 files changed

+140
-51
lines changed

4 files changed

+140
-51
lines changed

azure-pipelines.yml

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -190,19 +190,10 @@ jobs:
190190
#!/usr/bin/env bash
191191
set -ev
192192
source ./env/bin/activate
193-
git fetch origin --depth=1 $(System.PullRequest.TargetBranch)
194-
declare -A secret_files
195-
for FILE in `git diff --name-only --diff-filter=AM origin/$(System.PullRequest.TargetBranch)` ; do
196-
detected=$(azdev scan -f $FILE --continue-on-failure| python -c "import sys, json; print(json.load(sys.stdin)['secrets_detected'])")
197-
if [ $detected == 'True' ]; then
198-
printf "\033[0;31mSecrets detected from %s, Please remove or replace it. You can run 'azdev scan'/'azdev mask' locally to fix.\033[0m\n" "$FILE"
199-
secret_files+=$FILE
200-
fi
201-
done
202-
if [ "${#secret_files[@]}" -gt 0 ]; then
203-
exit 1
204-
fi
193+
python scripts/ci/azdev_scan.py
205194
displayName: "azdev scan ( High Confidence ) on Modified Extensions"
195+
env:
196+
ADO_PULL_REQUEST_TARGET_BRANCH: $(System.PullRequest.TargetBranch)
206197
207198
- job: AzdevScanProModifiedExtensionsMedium
208199
displayName: "azdev scan ( Medium Confidence ) on Modified Extensions"
@@ -221,19 +212,10 @@ jobs:
221212
#!/usr/bin/env bash
222213
set -ev
223214
source ./env/bin/activate
224-
git fetch origin --depth=1 $(System.PullRequest.TargetBranch)
225-
declare -A secret_files
226-
for FILE in `git diff --name-only --diff-filter=AM origin/$(System.PullRequest.TargetBranch)` ; do
227-
detected=$(azdev scan --confidence-level MEDIUM -f $FILE --continue-on-failure| python -c "import sys, json; print(json.load(sys.stdin)['secrets_detected'])")
228-
if [ $detected == 'True' ]; then
229-
printf "\033[0;31mSecrets detected from %s, Please remove or replace it. You can run 'azdev scan --confidence-level MEDIUM'/'azdev mask --confidence-level MEDIUM' locally to fix.\033[0m\n" "$FILE"
230-
secret_files+=$FILE
231-
fi
232-
done
233-
if [ "${#secret_files[@]}" -gt 0 ]; then
234-
exit 1
235-
fi
215+
python scripts/ci/azdev_scan.py --confidence-level MEDIUM
236216
displayName: "azdev scan ( Medium Confidence ) on Modified Extensions"
217+
env:
218+
ADO_PULL_REQUEST_TARGET_BRANCH: $(System.PullRequest.TargetBranch)
237219
238220
#- job: IndexRefDocVerify
239221
# displayName: "Verify Ref Docs"

scripts/ci/azdev_linter_style.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
This script is used to run azdev linter and azdev style on extensions.
88
99
It's only working on ADO by default. If want to run locally,
10-
please update the target branch/commit to find diff in function find_modified_files_against_master_branch()
10+
please update the target branch in find_modified_files_against_master_branch() in util.py.
1111
"""
1212
import json
1313
import logging
@@ -18,7 +18,7 @@
1818

1919
import service_name
2020
from packaging.version import Version
21-
from util import get_ext_metadata
21+
from util import get_ext_metadata, find_modified_files_against_master_branch
2222

2323
logger = logging.getLogger(__name__)
2424
logger.setLevel(logging.DEBUG)
@@ -119,30 +119,6 @@ def check_extension_name(self):
119119
f"Please fix the name in setup.py!")
120120

121121

122-
def find_modified_files_against_master_branch():
123-
"""
124-
Find modified files from src/ only.
125-
A: Added, C: Copied, M: Modified, R: Renamed, T: File type changed.
126-
Deleted files don't count in diff.
127-
"""
128-
ado_pr_target_branch = 'origin/' + os.environ.get('ADO_PULL_REQUEST_TARGET_BRANCH')
129-
130-
separator_line()
131-
logger.info('pull request target branch: %s', ado_pr_target_branch)
132-
133-
cmd = 'git --no-pager diff --name-only --diff-filter=ACMRT {} -- src/'.format(ado_pr_target_branch)
134-
files = check_output(cmd.split()).decode('utf-8').split('\n')
135-
files = [f for f in files if len(f) > 0]
136-
137-
if files:
138-
logger.info('modified files:')
139-
separator_line()
140-
for f in files:
141-
logger.info(f)
142-
143-
return files
144-
145-
146122
def contain_index_json(files):
147123
return 'src/index.json' in files
148124

scripts/ci/azdev_scan.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# --------------------------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for license information.
4+
# --------------------------------------------------------------------------------------------
5+
6+
"""
7+
This script is used to run azdev scan on modified extensions in PR pipelines.
8+
9+
It reuses find_modified_files_against_master_branch() from util.py to get an
10+
accurate list of files changed in the PR (via merge-base), then runs
11+
azdev scan on each file.
12+
"""
13+
import json
14+
import logging
15+
import sys
16+
from subprocess import CalledProcessError, check_output
17+
18+
from util import find_modified_files_against_master_branch
19+
20+
logger = logging.getLogger(__name__)
21+
logger.setLevel(logging.DEBUG)
22+
ch = logging.StreamHandler()
23+
ch.setLevel(logging.DEBUG)
24+
logger.addHandler(ch)
25+
26+
27+
def run_scan(modified_files, confidence_level=None):
28+
"""Run azdev scan on each modified file and report secrets."""
29+
confidence_flag = []
30+
confidence_msg = ''
31+
if confidence_level:
32+
confidence_flag = ['--confidence-level', confidence_level]
33+
confidence_msg = ' --confidence-level {}'.format(confidence_level)
34+
35+
secret_files = []
36+
for f in modified_files:
37+
cmd = ['azdev', 'scan', '-f', f, '--continue-on-failure'] + confidence_flag
38+
logger.info('Scanning: %s', f)
39+
try:
40+
output = check_output(cmd).decode('utf-8', errors='replace')
41+
result = json.loads(output)
42+
if result.get('secrets_detected') is True:
43+
logger.error(
44+
'\033[0;31mSecrets detected from %s, Please remove or replace it. '
45+
'You can run \'azdev scan%s\'/\'azdev mask%s\' locally to fix.\033[0m',
46+
f, confidence_msg, confidence_msg
47+
)
48+
secret_files.append(f)
49+
except CalledProcessError as e:
50+
logger.error('azdev scan failed for %s: %s', f, e)
51+
secret_files.append(f)
52+
except (json.JSONDecodeError, KeyError) as e:
53+
logger.error('Failed to parse azdev scan output for %s: %s', f, e)
54+
secret_files.append(f)
55+
56+
if secret_files:
57+
logger.error('Secrets detected in %d file(s)', len(secret_files))
58+
sys.exit(1)
59+
else:
60+
logger.info('-' * 100)
61+
logger.info('No secrets detected in any modified files.')
62+
logger.info('-' * 100)
63+
64+
65+
def main():
66+
import argparse
67+
parser = argparse.ArgumentParser(description='azdev scan on modified extensions')
68+
parser.add_argument('--confidence-level',
69+
type=str,
70+
default=None,
71+
help='Confidence level for azdev scan (e.g., MEDIUM). '
72+
'Default: HIGH (azdev scan default).')
73+
args = parser.parse_args()
74+
75+
modified_files = find_modified_files_against_master_branch()
76+
if not modified_files:
77+
logger.info('No modified files found, skipping scan.')
78+
return
79+
80+
run_scan(modified_files, confidence_level=args.confidence_level)
81+
82+
83+
if __name__ == '__main__':
84+
main()

scripts/ci/util.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import json
1111
import zipfile
1212

13-
from subprocess import check_output
13+
from subprocess import check_call, check_output
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -163,3 +163,50 @@ def diff_code(start, end):
163163
f'end: {end}, '
164164
f'diff_ref: {diff_ref}.')
165165
return diff_ref
166+
167+
168+
def find_modified_files_against_master_branch():
169+
"""
170+
Find modified files from src/ only, using merge-base for accurate PR diff.
171+
A: Added, C: Copied, M: Modified, R: Renamed, T: File type changed.
172+
Deleted files don't count in diff.
173+
"""
174+
ado_pr_target_branch = os.environ.get('ADO_PULL_REQUEST_TARGET_BRANCH')
175+
if not ado_pr_target_branch or ado_pr_target_branch == '$(System.PullRequest.TargetBranch)':
176+
logger.warning('ADO_PULL_REQUEST_TARGET_BRANCH is not available, skip diff.')
177+
return []
178+
179+
normalized_branch = ado_pr_target_branch
180+
for prefix in ('refs/remotes/origin/', 'refs/heads/', 'origin/'):
181+
if normalized_branch.startswith(prefix):
182+
normalized_branch = normalized_branch[len(prefix):]
183+
break
184+
185+
ado_pr_target_branch = 'origin/{}'.format(normalized_branch)
186+
187+
logger.info('-' * 100)
188+
logger.info('pull request target branch: %s', ado_pr_target_branch)
189+
190+
# Ensure target ref exists and has enough history for merge-base.
191+
check_call([
192+
'git', 'fetch', 'origin', '--deepen=50',
193+
'refs/heads/{}:refs/remotes/origin/{}'.format(normalized_branch, normalized_branch)
194+
])
195+
196+
merge_base = check_output([
197+
'git', 'merge-base', 'HEAD', ado_pr_target_branch
198+
]).decode('utf-8').strip()
199+
200+
logger.info('merge base: %s', merge_base)
201+
202+
cmd = ['git', '--no-pager', 'diff', '--name-only', '--diff-filter=ACMRT', merge_base, '--', 'src/']
203+
files = check_output(cmd).decode('utf-8').split('\n')
204+
files = [f for f in files if len(f) > 0]
205+
206+
if files:
207+
logger.info('modified files:')
208+
logger.info('-' * 100)
209+
for f in files:
210+
logger.info(f)
211+
212+
return files

0 commit comments

Comments
 (0)