Skip to content

Commit e7117ce

Browse files
committed
{CI} Fix merge-base diff in linter, style, and scan jobs
1 parent f3ce3d7 commit e7117ce

File tree

4 files changed

+164
-51
lines changed

4 files changed

+164
-51
lines changed

azure-pipelines.yml

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -190,19 +190,10 @@ jobs:
190190
#!/usr/bin/env bash
191191
set -ev
192192
source ./env/bin/activate
193-
git fetch origin --depth=1 $(System.PullRequest.TargetBranch)
194-
declare -A secret_files
195-
for FILE in `git diff --name-only --diff-filter=AM origin/$(System.PullRequest.TargetBranch)` ; do
196-
detected=$(azdev scan -f $FILE --continue-on-failure| python -c "import sys, json; print(json.load(sys.stdin)['secrets_detected'])")
197-
if [ $detected == 'True' ]; then
198-
printf "\033[0;31mSecrets detected from %s, Please remove or replace it. You can run 'azdev scan'/'azdev mask' locally to fix.\033[0m\n" "$FILE"
199-
secret_files+=$FILE
200-
fi
201-
done
202-
if [ "${#secret_files[@]}" -gt 0 ]; then
203-
exit 1
204-
fi
193+
python scripts/ci/azdev_scan.py
205194
displayName: "azdev scan ( High Confidence ) on Modified Extensions"
195+
env:
196+
ADO_PULL_REQUEST_TARGET_BRANCH: $(System.PullRequest.TargetBranch)
206197
207198
- job: AzdevScanProModifiedExtensionsMedium
208199
displayName: "azdev scan ( Medium Confidence ) on Modified Extensions"
@@ -221,19 +212,10 @@ jobs:
221212
#!/usr/bin/env bash
222213
set -ev
223214
source ./env/bin/activate
224-
git fetch origin --depth=1 $(System.PullRequest.TargetBranch)
225-
declare -A secret_files
226-
for FILE in `git diff --name-only --diff-filter=AM origin/$(System.PullRequest.TargetBranch)` ; do
227-
detected=$(azdev scan --confidence-level MEDIUM -f $FILE --continue-on-failure| python -c "import sys, json; print(json.load(sys.stdin)['secrets_detected'])")
228-
if [ $detected == 'True' ]; then
229-
printf "\033[0;31mSecrets detected from %s, Please remove or replace it. You can run 'azdev scan --confidence-level MEDIUM'/'azdev mask --confidence-level MEDIUM' locally to fix.\033[0m\n" "$FILE"
230-
secret_files+=$FILE
231-
fi
232-
done
233-
if [ "${#secret_files[@]}" -gt 0 ]; then
234-
exit 1
235-
fi
215+
python scripts/ci/azdev_scan.py --confidence-level MEDIUM
236216
displayName: "azdev scan ( Medium Confidence ) on Modified Extensions"
217+
env:
218+
ADO_PULL_REQUEST_TARGET_BRANCH: $(System.PullRequest.TargetBranch)
237219
238220
#- job: IndexRefDocVerify
239221
# displayName: "Verify Ref Docs"

scripts/ci/azdev_linter_style.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
This script is used to run azdev linter and azdev style on extensions.
88
99
It's only working on ADO by default. If want to run locally,
10-
please update the target branch/commit to find diff in function find_modified_files_against_master_branch()
10+
please update the target branch in find_modified_files_against_master_branch() in util.py.
1111
"""
1212
import json
1313
import logging
@@ -18,7 +18,7 @@
1818

1919
import service_name
2020
from packaging.version import Version
21-
from util import get_ext_metadata
21+
from util import get_ext_metadata, find_modified_files_against_master_branch
2222

2323
logger = logging.getLogger(__name__)
2424
logger.setLevel(logging.DEBUG)
@@ -119,30 +119,6 @@ def check_extension_name(self):
119119
f"Please fix the name in setup.py!")
120120

121121

122-
def find_modified_files_against_master_branch():
123-
"""
124-
Find modified files from src/ only.
125-
A: Added, C: Copied, M: Modified, R: Renamed, T: File type changed.
126-
Deleted files don't count in diff.
127-
"""
128-
ado_pr_target_branch = 'origin/' + os.environ.get('ADO_PULL_REQUEST_TARGET_BRANCH')
129-
130-
separator_line()
131-
logger.info('pull request target branch: %s', ado_pr_target_branch)
132-
133-
cmd = 'git --no-pager diff --name-only --diff-filter=ACMRT {} -- src/'.format(ado_pr_target_branch)
134-
files = check_output(cmd.split()).decode('utf-8').split('\n')
135-
files = [f for f in files if len(f) > 0]
136-
137-
if files:
138-
logger.info('modified files:')
139-
separator_line()
140-
for f in files:
141-
logger.info(f)
142-
143-
return files
144-
145-
146122
def contain_index_json(files):
147123
return 'src/index.json' in files
148124

scripts/ci/azdev_scan.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# --------------------------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for license information.
4+
# --------------------------------------------------------------------------------------------
5+
6+
"""
7+
This script is used to run azdev scan on modified extensions in PR pipelines.
8+
9+
It reuses find_modified_files_against_master_branch() from util.py to get an
10+
accurate list of files changed in the PR (via merge-base), then runs
11+
azdev scan on each file.
12+
"""
13+
import json
14+
import logging
15+
import sys
16+
from subprocess import CalledProcessError, check_output
17+
18+
from util import find_modified_files_against_master_branch
19+
20+
logger = logging.getLogger(__name__)
21+
logger.setLevel(logging.DEBUG)
22+
ch = logging.StreamHandler()
23+
ch.setLevel(logging.DEBUG)
24+
logger.addHandler(ch)
25+
26+
27+
def run_scan(modified_files, confidence_level=None):
28+
"""Run azdev scan on each modified file and report secrets."""
29+
confidence_flag = []
30+
confidence_msg = ''
31+
if confidence_level:
32+
confidence_flag = ['--confidence-level', confidence_level]
33+
confidence_msg = ' --confidence-level {}'.format(confidence_level)
34+
35+
secret_files = []
36+
failed_files = []
37+
for f in modified_files:
38+
cmd = ['azdev', 'scan', '-f', f, '--continue-on-failure'] + confidence_flag
39+
logger.info('Scanning: %s', f)
40+
try:
41+
output = check_output(cmd).decode('utf-8', errors='replace')
42+
result = json.loads(output)
43+
if result.get('secrets_detected') is True:
44+
logger.error(
45+
'\033[0;31mSecrets detected from %s, Please remove or replace it. '
46+
'You can run \'azdev scan%s\'/\'azdev mask%s\' locally to fix.\033[0m',
47+
f, confidence_msg, confidence_msg
48+
)
49+
secret_files.append(f)
50+
except CalledProcessError as e:
51+
logger.error('azdev scan failed for %s: %s', f, e)
52+
failed_files.append(f)
53+
except (json.JSONDecodeError, KeyError) as e:
54+
logger.error('Failed to parse azdev scan output for %s: %s', f, e)
55+
failed_files.append(f)
56+
57+
has_errors = False
58+
if secret_files:
59+
logger.error('Secrets detected in %d file(s): %s', len(secret_files), secret_files)
60+
has_errors = True
61+
if failed_files:
62+
logger.error('Scan failed for %d file(s): %s', len(failed_files), failed_files)
63+
has_errors = True
64+
if has_errors:
65+
sys.exit(1)
66+
else:
67+
logger.info('-' * 100)
68+
logger.info('No secrets detected in any modified files.')
69+
logger.info('-' * 100)
70+
71+
72+
def main():
73+
import argparse
74+
parser = argparse.ArgumentParser(description='azdev scan on modified extensions')
75+
parser.add_argument('--confidence-level',
76+
type=str,
77+
default=None,
78+
help='Confidence level for azdev scan (e.g., MEDIUM). '
79+
'Default: HIGH (azdev scan default).')
80+
args = parser.parse_args()
81+
82+
modified_files = find_modified_files_against_master_branch()
83+
if not modified_files:
84+
logger.info('No modified files found, skipping scan.')
85+
return
86+
87+
run_scan(modified_files, confidence_level=args.confidence_level)
88+
89+
90+
if __name__ == '__main__':
91+
main()

scripts/ci/util.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import json
1111
import zipfile
1212

13-
from subprocess import check_output
13+
from subprocess import check_call, check_output
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -163,3 +163,67 @@ def diff_code(start, end):
163163
f'end: {end}, '
164164
f'diff_ref: {diff_ref}.')
165165
return diff_ref
166+
167+
168+
def find_modified_files_against_master_branch():
169+
"""
170+
Find modified files from src/ only, using merge-base for accurate PR diff.
171+
A: Added, C: Copied, M: Modified, R: Renamed, T: File type changed.
172+
Deleted files don't count in diff.
173+
"""
174+
ado_pr_target_branch = os.environ.get('ADO_PULL_REQUEST_TARGET_BRANCH')
175+
if not ado_pr_target_branch or ado_pr_target_branch == '$(System.PullRequest.TargetBranch)':
176+
logger.warning('ADO_PULL_REQUEST_TARGET_BRANCH is not available, skip diff.')
177+
return []
178+
179+
normalized_branch = re.sub(
180+
r'^(?:refs/remotes/origin/|refs/heads/|origin/)+', '', ado_pr_target_branch
181+
)
182+
183+
ado_pr_target_branch = 'origin/{}'.format(normalized_branch)
184+
185+
logger.info('-' * 100)
186+
logger.info('pull request target branch: %s', ado_pr_target_branch)
187+
188+
# Ensure target ref exists and has enough history for merge-base.
189+
# Only use --deepen when the repo is a shallow clone.
190+
is_shallow = os.path.isfile(os.path.join('.git', 'shallow'))
191+
fetch_cmd = ['git', 'fetch', 'origin']
192+
if is_shallow:
193+
fetch_cmd.append('--deepen=50')
194+
fetch_cmd.append('refs/heads/{}:refs/remotes/origin/{}'.format(normalized_branch, normalized_branch))
195+
check_call(fetch_cmd)
196+
197+
try:
198+
merge_base = check_output([
199+
'git', 'merge-base', 'HEAD', ado_pr_target_branch
200+
]).decode('utf-8').strip()
201+
except Exception:
202+
if is_shallow:
203+
logger.warning('merge-base failed after --deepen=50, falling back to --unshallow')
204+
check_call([
205+
'git',
206+
'fetch',
207+
'origin',
208+
'--unshallow',
209+
'refs/heads/{}:refs/remotes/origin/{}'.format(normalized_branch, normalized_branch),
210+
])
211+
merge_base = check_output([
212+
'git', 'merge-base', 'HEAD', ado_pr_target_branch
213+
]).decode('utf-8').strip()
214+
else:
215+
raise
216+
217+
logger.info('merge base: %s', merge_base)
218+
219+
cmd = ['git', '--no-pager', 'diff', '--name-only', '--diff-filter=ACMRT', merge_base, 'HEAD', '--', 'src/']
220+
files = check_output(cmd).decode('utf-8').split('\n')
221+
files = [f for f in files if len(f) > 0]
222+
223+
if files:
224+
logger.info('modified files:')
225+
logger.info('-' * 100)
226+
for f in files:
227+
logger.info(f)
228+
229+
return files

0 commit comments

Comments
 (0)