Skip to content

Commit 1698a74

Browse files
authored
CM-17814 - SCA commit range (#50)
1 parent 18961ee commit 1698a74

4 files changed

Lines changed: 232 additions & 43 deletions

File tree

cli/code_scanner.py

Lines changed: 143 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -85,24 +85,26 @@ def scan_repository_commit_history(context: click.Context, path: str, commit_ran
8585

8686
def scan_commit_range(context: click.Context, path: str, commit_range: str):
8787
scan_type = context.obj["scan_type"]
88-
89-
if scan_type != SECRET_SCAN_TYPE:
88+
if scan_type not in COMMIT_RANGE_SCAN_SUPPORTED_SCAN_TYPES:
9089
raise click.ClickException(f"Commit range scanning for {str.upper(scan_type)} is not supported")
9190

92-
documents_to_scan = []
93-
for commit in Repo(path).iter_commits(rev=commit_range):
94-
commit_id = commit.hexsha
95-
parent = commit.parents[0] if commit.parents else NULL_TREE
96-
diff = commit.diff(parent, create_patch=True, R=True)
97-
for blob in diff:
98-
doc = Document(get_path_by_os(os.path.join(path, get_diff_file_path(blob))),
99-
blob.diff.decode('utf-8', errors='replace'), True, commit_id)
100-
documents_to_scan.append(doc)
101-
102-
documents_to_scan = exclude_irrelevant_documents_to_scan(context, documents_to_scan)
103-
logger.debug('Found all relevant files in commit %s',
104-
{'path': path, 'commit_range': commit_range, 'commit_id': commit_id})
105-
return scan_documents(context, documents_to_scan, is_git_diff=True, is_commit_range=True)
91+
if scan_type == SCA_SCAN_TYPE:
92+
return scan_sca_commit_range(context, path, commit_range)
93+
else:
94+
documents_to_scan = []
95+
for commit in Repo(path).iter_commits(rev=commit_range):
96+
commit_id = commit.hexsha
97+
parent = commit.parents[0] if commit.parents else NULL_TREE
98+
diff = commit.diff(parent, create_patch=True, R=True)
99+
for blob in diff:
100+
doc = Document(get_path_by_os(os.path.join(path, get_diff_file_path(blob))),
101+
blob.diff.decode('utf-8', errors='replace'), True, commit_id)
102+
documents_to_scan.append(doc)
103+
104+
documents_to_scan = exclude_irrelevant_documents_to_scan(context, documents_to_scan)
105+
logger.debug('Found all relevant files in commit %s',
106+
{'path': path, 'commit_range': commit_range, 'commit_id': commit_id})
107+
return scan_documents(context, documents_to_scan, is_git_diff=True, is_commit_range=True)
106108

107109

108110
@click.command()
@@ -137,6 +139,17 @@ def pre_commit_scan(context: click.Context, ignored_args: List[str]):
137139
return scan_documents(context, documents_to_scan, is_git_diff=True)
138140

139141

142+
def scan_sca_commit_range(context: click.Context, path: str, commit_range: str):
143+
from_commit_rev, to_commit_rev = parse_commit_range(commit_range, path)
144+
from_commit_documents, to_commit_documents = \
145+
get_commit_range_modified_documents(path, from_commit_rev, to_commit_rev)
146+
exclude_irrelevant_documents_to_scan(context, from_commit_documents)
147+
exclude_irrelevant_documents_to_scan(context, to_commit_documents)
148+
sca_code_scanner.perform_pre_commit_range_scan_actions(path, from_commit_documents, from_commit_rev,
149+
to_commit_documents, to_commit_rev)
150+
return scan_commit_range_documents(context, from_commit_documents, to_commit_documents)
151+
152+
140153
def scan_disk_files(context: click.Context, paths: List[str]):
141154
scan_type = context.obj['scan_type']
142155
is_git_diff = False
@@ -150,8 +163,8 @@ def scan_disk_files(context: click.Context, paths: List[str]):
150163
return scan_documents(context, documents, is_git_diff=is_git_diff)
151164

152165

153-
def scan_documents(context: click.Context, documents_to_scan: List[Document],
154-
is_git_diff: bool = False, is_commit_range: bool = False, scan_parameters: dict = None):
166+
def scan_documents(context: click.Context, documents_to_scan: List[Document], is_git_diff: bool = False,
167+
is_commit_range: bool = False, scan_parameters: dict = None):
155168
cycode_client = context.obj["client"]
156169
scan_type = context.obj["scan_type"]
157170
severity_threshold = context.obj["severity_threshold"]
@@ -166,17 +179,9 @@ def scan_documents(context: click.Context, documents_to_scan: List[Document],
166179
zipped_documents = zip_documents_to_scan(scan_type, zipped_documents, documents_to_scan)
167180
scan_result = perform_scan(cycode_client, zipped_documents, scan_type, scan_id, is_git_diff, is_commit_range,
168181
scan_parameters)
169-
document_detections_list = enrich_scan_result(scan_result, documents_to_scan)
170-
relevant_document_detections_list = exclude_irrelevant_scan_results(document_detections_list, scan_type,
171-
scan_command_type, severity_threshold)
172-
context.obj['report_url'] = scan_result.report_url
173-
print_results(context, relevant_document_detections_list)
174-
175-
context.obj['issue_detected'] = len(relevant_document_detections_list) > 0
176-
all_detections_count = sum(
177-
[len(document_detections.detections) for document_detections in document_detections_list])
178-
output_detections_count = sum(
179-
[len(document_detections.detections) for document_detections in relevant_document_detections_list])
182+
all_detections_count, output_detections_count = \
183+
handle_scan_result(context, scan_result, scan_command_type, scan_type, severity_threshold,
184+
documents_to_scan)
180185
scan_completed = True
181186
except Exception as e:
182187
_handle_exception(context, e)
@@ -191,10 +196,62 @@ def scan_documents(context: click.Context, documents_to_scan: List[Document],
191196
all_detections_count, len(documents_to_scan), zip_file_size, scan_command_type, error_message)
192197

193198

199+
def scan_commit_range_documents(context: click.Context, from_documents_to_scan: List[Document],
200+
to_documents_to_scan: List[Document], scan_parameters: dict = None):
201+
cycode_client = context.obj["client"]
202+
scan_type = context.obj["scan_type"]
203+
severity_threshold = context.obj["severity_threshold"]
204+
scan_command_type = context.info_name
205+
error_message = None
206+
all_detections_count = 0
207+
output_detections_count = 0
208+
scan_id = _get_scan_id(context)
209+
from_commit_zipped_documents = InMemoryZip()
210+
to_commit_zipped_documents = InMemoryZip()
211+
212+
try:
213+
from_commit_zipped_documents = zip_documents_to_scan(scan_type, from_commit_zipped_documents,
214+
from_documents_to_scan)
215+
to_commit_zipped_documents = zip_documents_to_scan(scan_type, to_commit_zipped_documents, to_documents_to_scan)
216+
scan_result = perform_commit_range_scan_async(cycode_client, from_commit_zipped_documents,
217+
to_commit_zipped_documents, scan_type, scan_parameters)
218+
all_detections_count, output_detections_count = \
219+
handle_scan_result(context, scan_result, scan_command_type, scan_type, severity_threshold,
220+
to_documents_to_scan)
221+
scan_completed = True
222+
except Exception as e:
223+
_handle_exception(context, e)
224+
error_message = str(e)
225+
scan_completed = False
226+
227+
zip_file_size = getsizeof(from_commit_zipped_documents.in_memory_zip) + getsizeof(
228+
to_commit_zipped_documents.in_memory_zip)
229+
logger.debug('Finished scan process, %s',
230+
{'all_violations_count': all_detections_count, 'relevant_violations_count': output_detections_count,
231+
'scan_id': str(scan_id), 'zip_file_size': zip_file_size})
232+
_report_scan_status(context, scan_type, str(scan_id), scan_completed, output_detections_count,
233+
all_detections_count, len(to_documents_to_scan), zip_file_size, scan_command_type,
234+
error_message)
235+
236+
237+
def handle_scan_result(context, scan_result, scan_command_type, scan_type, severity_threshold, to_documents_to_scan):
238+
document_detections_list = enrich_scan_result(scan_result, to_documents_to_scan)
239+
relevant_document_detections_list = exclude_irrelevant_scan_results(document_detections_list, scan_type,
240+
scan_command_type, severity_threshold)
241+
context.obj['report_url'] = scan_result.report_url
242+
print_results(context, relevant_document_detections_list)
243+
context.obj['issue_detected'] = len(relevant_document_detections_list) > 0
244+
all_detections_count = sum(
245+
[len(document_detections.detections) for document_detections in document_detections_list])
246+
output_detections_count = sum(
247+
[len(document_detections.detections) for document_detections in relevant_document_detections_list])
248+
return all_detections_count, output_detections_count
249+
250+
194251
def perform_pre_scan_documents_actions(context: click.Context, scan_type: str, documents_to_scan: List[Document],
195252
is_git_diff: bool = False):
196253
if scan_type == SCA_SCAN_TYPE:
197-
sca_code_scanner.run_pre_scan_actions(context, documents_to_scan, is_git_diff)
254+
sca_code_scanner.add_dependencies_tree_document(context, documents_to_scan, is_git_diff)
198255

199256

200257
def zip_documents_to_scan(scan_type: str, zip: InMemoryZip, documents: List[Document]):
@@ -238,14 +295,28 @@ def perform_scan_async(cycode_client, zipped_documents: InMemoryZip, scan_type:
238295
scan_parameters: dict) -> ZippedFileScanResult:
239296
scan_async_result = cycode_client.zipped_file_scan_async(zipped_documents, scan_type, scan_parameters)
240297
logger.debug("scan request has been triggered successfully, scan id: %s", scan_async_result.scan_id)
241-
polling_timeout = configuration_manager.get_scan_polling_timeout_in_seconds()
242298

299+
return poll_scan_results(cycode_client, scan_async_result.scan_id)
300+
301+
302+
def perform_commit_range_scan_async(cycode_client, from_commit_zipped_documents: InMemoryZip,
303+
to_commit_zipped_documents: InMemoryZip, scan_type: str,
304+
scan_parameters: dict) -> ZippedFileScanResult:
305+
scan_async_result = \
306+
cycode_client.multiple_zipped_file_scan_async(from_commit_zipped_documents, to_commit_zipped_documents,
307+
scan_type, scan_parameters)
308+
logger.debug("scan request has been triggered successfully, scan id: %s", scan_async_result.scan_id)
309+
return poll_scan_results(cycode_client, scan_async_result.scan_id)
310+
311+
312+
def poll_scan_results(cycode_client, scan_id: str):
313+
polling_timeout = configuration_manager.get_scan_polling_timeout_in_seconds()
243314
end_polling_time = time.time() + polling_timeout
244315
while time.time() < end_polling_time:
245316
logger.debug("scan in progress")
246-
scan_details = cycode_client.get_scan_details(scan_async_result.scan_id)
317+
scan_details = cycode_client.get_scan_details(scan_id)
247318
if scan_details.scan_status == SCAN_STATUS_COMPLETED:
248-
return _get_scan_result(cycode_client, scan_async_result, scan_details)
319+
return _get_scan_result(cycode_client, scan_id, scan_details)
249320
if scan_details.scan_status == SCAN_STATUS_ERROR:
250321
raise ScanAsyncError(f'error occurred while trying to scan zip file. {scan_details.message}')
251322
time.sleep(SCAN_POLLING_WAIT_INTERVAL_IN_SECONDS)
@@ -367,6 +438,26 @@ def exclude_detections_by_exclusions_configuration(scan_type: str, detections) -
367438
return [detection for detection in detections if not _should_exclude_detection(detection, exclusions)]
368439

369440

441+
def get_commit_range_modified_documents(path: str, from_commit_rev: str, to_commit_rev: str) -> (
442+
List[Document], List[Document]):
443+
from_commit_documents = []
444+
to_commit_documents = []
445+
repo = Repo(path)
446+
diff = repo.commit(from_commit_rev).diff(to_commit_rev)
447+
modified_files_diff = [change for change in diff if change.change_type != COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE]
448+
for blob in modified_files_diff:
449+
diff_file_path = get_diff_file_path(blob)
450+
file_path = get_path_by_os(diff_file_path)
451+
452+
file_content = get_file_content_from_commit(repo, from_commit_rev, diff_file_path)
453+
from_commit_documents.append(Document(file_path, file_content))
454+
455+
file_content = get_file_content_from_commit(repo, to_commit_rev, diff_file_path)
456+
to_commit_documents.append(Document(file_path, file_content))
457+
458+
return from_commit_documents, to_commit_documents
459+
460+
370461
def _should_exclude_detection(detection, exclusions: Dict) -> bool:
371462
exclusions_by_value = exclusions.get(EXCLUSIONS_BY_VALUE_SECTION_NAME, [])
372463
if _is_detection_sha_configured_in_exclusions(detection, exclusions_by_value):
@@ -577,16 +668,15 @@ def _does_severity_match_severity_threshold(severity: str, severity_threshold: s
577668
return detection_severity_value >= Severity.try_get_value(severity_threshold)
578669

579670

580-
def _get_scan_result(cycode_client, scan_async_result: ScanInitializationResponse,
581-
scan_details: ScanDetailsResponse) -> ZippedFileScanResult:
671+
def _get_scan_result(cycode_client, scan_id: str, scan_details: ScanDetailsResponse) -> ZippedFileScanResult:
582672
scan_result = ZippedFileScanResult(did_detect=False, detections_per_file=[],
583-
scan_id=scan_async_result.scan_id,
673+
scan_id=scan_id,
584674
report_url=_try_get_report_url(scan_details.metadata))
585675
if not scan_details.detections_count:
586676
return scan_result
587677

588-
wait_for_detections_creation(cycode_client, scan_async_result.scan_id, scan_details.detections_count)
589-
scan_detections = cycode_client.get_scan_detections(scan_async_result.scan_id)
678+
wait_for_detections_creation(cycode_client, scan_id, scan_details.detections_count)
679+
scan_detections = cycode_client.get_scan_detections(scan_id)
590680
scan_result.detections_per_file = _map_detections_per_file(scan_detections)
591681
scan_result.did_detect = True
592682
return scan_result
@@ -633,3 +723,18 @@ def _map_detections_per_file(detections) -> List[DetectionsPerFile]:
633723

634724
return [DetectionsPerFile(file_name=file_name, detections=file_detections)
635725
for file_name, file_detections in detections_per_files.items()]
726+
727+
728+
def parse_commit_range(commit_range: str, path: str) -> (str, str):
729+
from_commit_rev = None
730+
to_commit_rev = None
731+
for commit in Repo(path).iter_commits(rev=commit_range):
732+
if not to_commit_rev:
733+
to_commit_rev = commit.hexsha
734+
from_commit_rev = commit.hexsha
735+
736+
return from_commit_rev, to_commit_rev
737+
738+
739+
def get_file_content_from_commit(repo: Repo, commit: str, file_path: str) -> str:
740+
return repo.git.show(f'{commit}:{file_path}')

cli/consts.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,24 @@
2727
'pipfile', 'pipfile.lock', 'requirements.txt', 'setup.py'
2828
]
2929

30+
PROJECT_FILES_BY_ECOSYSTEM_MAP = {
31+
"crates": ["Cargo.lock", "Cargo.toml"],
32+
"composer": ["composer.json", "composer.lock"],
33+
"go": ["go.sum", "go.mod", "Gopkg.lock"],
34+
"maven_pom": ["pom.xml"],
35+
"maven_gradle": ["build.gradle", "build.gradle.kts", "gradle.lockfile"],
36+
"npm": ["package.json", "package-lock.json", "yarn.lock", "npm-shrinkwrap.json", ".npmrc"],
37+
"nuget": ["packages.config", "project.assets.json", "packages.lock.json", "nuget.config"],
38+
"ruby_gems": ["Gemfile", "Gemfile.lock"],
39+
"sbt": ["build.sbt", "build.scala", "build.sbt.lock"],
40+
"pypi_poetry": ["pyproject.toml", "poetry.lock"],
41+
"pypi_pipenv": ["Pipfile", "Pipfile.lock"],
42+
"pypi_requirements": ["requirements.txt"],
43+
"pypi_setup": ["setup.py"]
44+
}
45+
46+
COMMIT_RANGE_SCAN_SUPPORTED_SCAN_TYPES = [SECRET_SCAN_TYPE, SCA_SCAN_TYPE]
47+
3048
DEFAULT_CYCODE_API_URL = "https://api.cycode.com"
3149
DEFAULT_CYCODE_APP_URL = "https://app.cycode.com"
3250

@@ -68,3 +86,5 @@
6886
# scan statuses
6987
SCAN_STATUS_COMPLETED = 'Completed'
7088
SCAN_STATUS_ERROR = 'Error'
89+
90+
COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE = 'D'

cli/helpers/sca_code_scanner.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,65 @@
11
import click
22
from typing import List, Optional
3+
from git import Repo, GitCommandError
34
from cli.utils.shell_executor import shell
45
from cli.models import Document
56
from cli.utils.path_utils import get_file_dir, join_paths
67
from cyclient import logger
8+
from cli.consts import *
79

810
BUILD_GRADLE_FILE_NAME = 'build.gradle'
911
BUILD_GRADLE_KTS_FILE_NAME = 'build.gradle.kts'
1012
BUILD_GRADLE_DEP_TREE_FILE_NAME = 'gradle-dependencies-generated.txt'
1113
BUILD_GRADLE_DEP_TREE_TIMEOUT = 180
1214

1315

14-
def run_pre_scan_actions(context: click.Context, documents_to_scan: List[Document], is_git_diff: bool = False):
16+
def perform_pre_commit_range_scan_actions(path: str, from_commit_documents: List[Document],
17+
from_commit_rev: str, to_commit_documents: List[Document],
18+
to_commit_rev: str) -> None:
19+
repo = Repo(path)
20+
add_ecosystem_related_files_if_exists(from_commit_documents, repo, from_commit_rev)
21+
add_ecosystem_related_files_if_exists(to_commit_documents, repo, to_commit_rev)
22+
23+
24+
def add_ecosystem_related_files_if_exists(documents: List[Document], repo: Repo, commit_rev: str):
25+
documents_to_add: List[Document] = []
26+
for doc in documents:
27+
ecosystem = get_project_file_ecosystem(doc)
28+
if ecosystem is None:
29+
logger.debug("failed to resolve project file ecosystem: %s", doc.path)
30+
continue
31+
documents_to_add = get_ecosystem_project_files_if_exists(documents, commit_rev, doc, ecosystem, repo)
32+
33+
documents.extend(documents_to_add)
34+
35+
36+
def get_ecosystem_project_files_if_exists(documents, commit_rev, doc, ecosystem, repo):
37+
documents_to_add: List[Document] = []
38+
for ecosystem_project_file in PROJECT_FILES_BY_ECOSYSTEM_MAP.get(ecosystem):
39+
file_to_search = join_paths(get_file_dir(doc.path), ecosystem_project_file)
40+
if not is_project_file_exists_in_documents(documents, file_to_search):
41+
try:
42+
file_content = repo.git.show(f'{commit_rev}:{file_to_search}')
43+
except GitCommandError:
44+
continue
45+
documents_to_add.append(Document(file_to_search, file_content))
46+
return documents_to_add
47+
48+
49+
def is_project_file_exists_in_documents(documents: List[Document], file: str):
50+
return any(doc for doc in documents if file == doc.path)
51+
52+
53+
def get_project_file_ecosystem(document: Document):
54+
for ecosystem, project_files in PROJECT_FILES_BY_ECOSYSTEM_MAP.items():
55+
for project_file in project_files:
56+
if document.path.endswith(project_file):
57+
return ecosystem
58+
return None
59+
60+
61+
def add_dependencies_tree_document(context: click.Context, documents_to_scan: List[Document],
62+
is_git_diff: bool = False) -> None:
1563
is_monitor_action = context.obj.get('monitor')
1664
project_path = context.params.get('path')
1765
documents_to_add: List[Document] = []
@@ -23,10 +71,11 @@ def run_pre_scan_actions(context: click.Context, documents_to_scan: List[Documen
2371
logger.warning('Error occurred while trying to generate gradle dependencies tree. %s',
2472
{'filename': document.path})
2573
documents_to_add.append(
26-
Document(build_dep_tree_path(document.path), '', is_git_diff))
74+
Document(build_dep_tree_path(document.path, BUILD_GRADLE_DEP_TREE_FILE_NAME), '', is_git_diff))
2775
else:
2876
documents_to_add.append(
29-
Document(build_dep_tree_path(document.path), gradle_dependencies_tree, is_git_diff))
77+
Document(build_dep_tree_path(document.path, BUILD_GRADLE_DEP_TREE_FILE_NAME),
78+
gradle_dependencies_tree, is_git_diff))
3079

3180
documents_to_scan.extend(documents_to_add)
3281

@@ -47,8 +96,8 @@ def try_generate_dependencies_tree(filename: str) -> Optional[str]:
4796
return gradle_dependencies
4897

4998

50-
def build_dep_tree_path(path):
51-
return join_paths(get_file_dir(path), BUILD_GRADLE_DEP_TREE_FILE_NAME)
99+
def build_dep_tree_path(path: str, generated_file_name: str) -> str:
100+
return join_paths(get_file_dir(path), generated_file_name)
52101

53102

54103
def is_gradle_project(document: Document) -> bool:

0 commit comments

Comments
 (0)