Skip to content

Commit fd8f13e

Browse files
authored
CM-17604 - Support SCA pre-commit hook (#51)
* Support SCA pre-commit hook * Avoid failures in case file not exists in specific commit * Avoid calling BE if no documents to scan * Avoid failures in case file not exists in file system * Refactoring and Typing * use --scan-type instead of -t * Refactoring
1 parent 1698a74 commit fd8f13e

5 files changed

Lines changed: 111 additions & 37 deletions

File tree

.pre-commit-hooks.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,9 @@
22
name: Cycode pre commit defender
33
language: python
44
entry: cycode
5-
args: ['scan', 'pre_commit']
5+
args: [ 'scan', 'pre_commit' ]
6+
- id: cycode-sca
7+
name: Cycode SCA pre commit defender
8+
language: python
9+
entry: cycode
10+
args: [ 'scan', '--scan_type', 'sca', 'pre_commit' ]

cli/code_scanner.py

Lines changed: 62 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
from platform import platform
66
from uuid import uuid4, UUID
77
from typing import Optional
8-
from git import Repo, NULL_TREE, InvalidGitRepositoryError
8+
from git import Repo, NULL_TREE, InvalidGitRepositoryError, GitCommandError
99
from sys import getsizeof
1010
from cli.printers import ResultsPrinter
1111
from cli.models import Document, DocumentDetections, Severity
1212
from cli.ci_integrations import get_commit_range
1313
from cli.consts import *
1414
from cli.config import configuration_manager
15-
from cli.utils.path_utils import is_sub_path, is_binary_file, get_file_size, get_relevant_files_in_path, get_path_by_os
15+
from cli.utils.path_utils import is_sub_path, is_binary_file, get_file_size, get_relevant_files_in_path, \
16+
get_path_by_os, get_file_content
1617
from cli.utils.string_utils import get_content_size, is_binary_content
1718
from cli.user_settings.config_file_manager import ConfigFileManager
1819
from cli.zip_file import InMemoryZip
@@ -132,19 +133,31 @@ def scan_path(context: click.Context, path):
132133
@click.pass_context
133134
def pre_commit_scan(context: click.Context, ignored_args: List[str]):
134135
""" Use this command to scan the content that was not committed yet """
136+
scan_type = context.obj['scan_type']
137+
if scan_type == SCA_SCAN_TYPE:
138+
return scan_sca_pre_commit(context)
139+
135140
diff_files = Repo(os.getcwd()).index.diff("HEAD", create_patch=True, R=True)
136141
documents_to_scan = [Document(get_path_by_os(get_diff_file_path(file)), get_diff_file_content(file))
137142
for file in diff_files]
138143
documents_to_scan = exclude_irrelevant_documents_to_scan(context, documents_to_scan)
139144
return scan_documents(context, documents_to_scan, is_git_diff=True)
140145

141146

147+
def scan_sca_pre_commit(context):
148+
git_head_documents, pre_committed_documents = get_pre_commit_modified_documents()
149+
git_head_documents = exclude_irrelevant_documents_to_scan(context, git_head_documents)
150+
pre_committed_documents = exclude_irrelevant_documents_to_scan(context, pre_committed_documents)
151+
sca_code_scanner.perform_pre_hook_range_scan_actions(git_head_documents, pre_committed_documents)
152+
return scan_commit_range_documents(context, git_head_documents, pre_committed_documents)
153+
154+
142155
def scan_sca_commit_range(context: click.Context, path: str, commit_range: str):
143156
from_commit_rev, to_commit_rev = parse_commit_range(commit_range, path)
144157
from_commit_documents, to_commit_documents = \
145158
get_commit_range_modified_documents(path, from_commit_rev, to_commit_rev)
146-
exclude_irrelevant_documents_to_scan(context, from_commit_documents)
147-
exclude_irrelevant_documents_to_scan(context, to_commit_documents)
159+
from_commit_documents = exclude_irrelevant_documents_to_scan(context, from_commit_documents)
160+
to_commit_documents = exclude_irrelevant_documents_to_scan(context, to_commit_documents)
148161
sca_code_scanner.perform_pre_commit_range_scan_actions(path, from_commit_documents, from_commit_rev,
149162
to_commit_documents, to_commit_rev)
150163
return scan_commit_range_documents(context, from_commit_documents, to_commit_documents)
@@ -210,11 +223,14 @@ def scan_commit_range_documents(context: click.Context, from_documents_to_scan:
210223
to_commit_zipped_documents = InMemoryZip()
211224

212225
try:
213-
from_commit_zipped_documents = zip_documents_to_scan(scan_type, from_commit_zipped_documents,
214-
from_documents_to_scan)
215-
to_commit_zipped_documents = zip_documents_to_scan(scan_type, to_commit_zipped_documents, to_documents_to_scan)
216-
scan_result = perform_commit_range_scan_async(cycode_client, from_commit_zipped_documents,
217-
to_commit_zipped_documents, scan_type, scan_parameters)
226+
scan_result = init_default_scan_result(str(scan_id))
227+
if should_scan_documents(from_documents_to_scan, to_documents_to_scan):
228+
from_commit_zipped_documents = zip_documents_to_scan(scan_type, from_commit_zipped_documents,
229+
from_documents_to_scan)
230+
to_commit_zipped_documents = zip_documents_to_scan(scan_type, to_commit_zipped_documents,
231+
to_documents_to_scan)
232+
scan_result = perform_commit_range_scan_async(cycode_client, from_commit_zipped_documents,
233+
to_commit_zipped_documents, scan_type, scan_parameters)
218234
all_detections_count, output_detections_count = \
219235
handle_scan_result(context, scan_result, scan_command_type, scan_type, severity_threshold,
220236
to_documents_to_scan)
@@ -234,6 +250,10 @@ def scan_commit_range_documents(context: click.Context, from_documents_to_scan:
234250
error_message)
235251

236252

253+
def should_scan_documents(from_documents_to_scan: List[Document], to_documents_to_scan: List[Document]) -> bool:
254+
return len(from_documents_to_scan) > 0 or len(to_documents_to_scan) > 0
255+
256+
237257
def handle_scan_result(context, scan_result, scan_command_type, scan_type, severity_threshold, to_documents_to_scan):
238258
document_detections_list = enrich_scan_result(scan_result, to_documents_to_scan)
239259
relevant_document_detections_list = exclude_irrelevant_scan_results(document_detections_list, scan_type,
@@ -438,6 +458,26 @@ def exclude_detections_by_exclusions_configuration(scan_type: str, detections) -
438458
return [detection for detection in detections if not _should_exclude_detection(detection, exclusions)]
439459

440460

461+
def get_pre_commit_modified_documents():
462+
repo = Repo(os.getcwd())
463+
diff_files = repo.index.diff(GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
464+
git_head_documents = []
465+
pre_committed_documents = []
466+
for file in diff_files:
467+
diff_file_path = get_diff_file_path(file)
468+
file_path = get_path_by_os(diff_file_path)
469+
470+
file_content = sca_code_scanner.get_file_content_from_commit(repo, GIT_HEAD_COMMIT_REV, diff_file_path)
471+
if file_content is not None:
472+
git_head_documents.append(Document(file_path, file_content))
473+
474+
if os.path.exists(file_path):
475+
file_content = get_file_content(file_path)
476+
pre_committed_documents.append(Document(file_path, file_content))
477+
478+
return git_head_documents, pre_committed_documents
479+
480+
441481
def get_commit_range_modified_documents(path: str, from_commit_rev: str, to_commit_rev: str) -> (
442482
List[Document], List[Document]):
443483
from_commit_documents = []
@@ -449,11 +489,13 @@ def get_commit_range_modified_documents(path: str, from_commit_rev: str, to_comm
449489
diff_file_path = get_diff_file_path(blob)
450490
file_path = get_path_by_os(diff_file_path)
451491

452-
file_content = get_file_content_from_commit(repo, from_commit_rev, diff_file_path)
453-
from_commit_documents.append(Document(file_path, file_content))
492+
file_content = sca_code_scanner.get_file_content_from_commit(repo, from_commit_rev, diff_file_path)
493+
if file_content is not None:
494+
from_commit_documents.append(Document(file_path, file_content))
454495

455-
file_content = get_file_content_from_commit(repo, to_commit_rev, diff_file_path)
456-
to_commit_documents.append(Document(file_path, file_content))
496+
file_content = sca_code_scanner.get_file_content_from_commit(repo, to_commit_rev, diff_file_path)
497+
if file_content is not None:
498+
to_commit_documents.append(Document(file_path, file_content))
457499

458500
return from_commit_documents, to_commit_documents
459501

@@ -669,9 +711,7 @@ def _does_severity_match_severity_threshold(severity: str, severity_threshold: s
669711

670712

671713
def _get_scan_result(cycode_client, scan_id: str, scan_details: ScanDetailsResponse) -> ZippedFileScanResult:
672-
scan_result = ZippedFileScanResult(did_detect=False, detections_per_file=[],
673-
scan_id=scan_id,
674-
report_url=_try_get_report_url(scan_details.metadata))
714+
scan_result = init_default_scan_result(scan_id, scan_details.metadata)
675715
if not scan_details.detections_count:
676716
return scan_result
677717

@@ -682,6 +722,12 @@ def _get_scan_result(cycode_client, scan_id: str, scan_details: ScanDetailsRespo
682722
return scan_result
683723

684724

725+
def init_default_scan_result(scan_id: str, scan_metadata: str = None) -> ZippedFileScanResult:
726+
return ZippedFileScanResult(did_detect=False, detections_per_file=[],
727+
scan_id=scan_id,
728+
report_url=_try_get_report_url(scan_metadata))
729+
730+
685731
def _try_get_report_url(metadata: str) -> Optional[str]:
686732
if metadata is None:
687733
return None
@@ -734,7 +780,3 @@ def parse_commit_range(commit_range: str, path: str) -> (str, str):
734780
from_commit_rev = commit.hexsha
735781

736782
return from_commit_rev, to_commit_rev
737-
738-
739-
def get_file_content_from_commit(repo: Repo, commit: str, file_path: str) -> str:
740-
return repo.git.show(f'{commit}:{file_path}')

cli/consts.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,6 @@
8787
SCAN_STATUS_COMPLETED = 'Completed'
8888
SCAN_STATUS_ERROR = 'Error'
8989

90+
# git consts
9091
COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE = 'D'
92+
GIT_HEAD_COMMIT_REV = 'HEAD'

cli/helpers/sca_code_scanner.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
import os
12
import click
23
from typing import List, Optional
34
from git import Repo, GitCommandError
45
from cli.utils.shell_executor import shell
56
from cli.models import Document
6-
from cli.utils.path_utils import get_file_dir, join_paths
7+
from cli.utils.path_utils import get_file_dir, join_paths, get_file_content
78
from cyclient import logger
89
from cli.consts import *
910

@@ -21,36 +22,44 @@ def perform_pre_commit_range_scan_actions(path: str, from_commit_documents: List
2122
add_ecosystem_related_files_if_exists(to_commit_documents, repo, to_commit_rev)
2223

2324

24-
def add_ecosystem_related_files_if_exists(documents: List[Document], repo: Repo, commit_rev: str):
25-
documents_to_add: List[Document] = []
25+
def perform_pre_hook_range_scan_actions(git_head_documents: List[Document],
26+
pre_committed_documents: List[Document]) -> None:
27+
repo = Repo(os.getcwd())
28+
add_ecosystem_related_files_if_exists(git_head_documents, repo, GIT_HEAD_COMMIT_REV)
29+
add_ecosystem_related_files_if_exists(pre_committed_documents)
30+
31+
32+
def add_ecosystem_related_files_if_exists(documents: List[Document], repo: Optional[Repo] = None,
33+
commit_rev: Optional[str] = None):
2634
for doc in documents:
2735
ecosystem = get_project_file_ecosystem(doc)
2836
if ecosystem is None:
2937
logger.debug("failed to resolve project file ecosystem: %s", doc.path)
3038
continue
31-
documents_to_add = get_ecosystem_project_files_if_exists(documents, commit_rev, doc, ecosystem, repo)
39+
documents_to_add = get_doc_ecosystem_related_project_files(doc, documents, ecosystem, commit_rev, repo)
40+
documents.extend(documents_to_add)
3241

33-
documents.extend(documents_to_add)
3442

35-
36-
def get_ecosystem_project_files_if_exists(documents, commit_rev, doc, ecosystem, repo):
43+
def get_doc_ecosystem_related_project_files(doc: Document, documents: List[Document], ecosystem: str,
44+
commit_rev: Optional[str], repo: Optional[Repo]) -> List[Document]:
3745
documents_to_add: List[Document] = []
3846
for ecosystem_project_file in PROJECT_FILES_BY_ECOSYSTEM_MAP.get(ecosystem):
3947
file_to_search = join_paths(get_file_dir(doc.path), ecosystem_project_file)
4048
if not is_project_file_exists_in_documents(documents, file_to_search):
41-
try:
42-
file_content = repo.git.show(f'{commit_rev}:{file_to_search}')
43-
except GitCommandError:
44-
continue
45-
documents_to_add.append(Document(file_to_search, file_content))
49+
file_content = get_file_content_from_commit(repo, commit_rev, file_to_search) if repo \
50+
else get_file_content(file_to_search)
51+
52+
if file_content is not None:
53+
documents_to_add.append(Document(file_to_search, file_content))
54+
4655
return documents_to_add
4756

4857

49-
def is_project_file_exists_in_documents(documents: List[Document], file: str):
58+
def is_project_file_exists_in_documents(documents: List[Document], file: str) -> bool:
5059
return any(doc for doc in documents if file == doc.path)
5160

5261

53-
def get_project_file_ecosystem(document: Document):
62+
def get_project_file_ecosystem(document: Document) -> Optional[str]:
5463
for ecosystem, project_files in PROJECT_FILES_BY_ECOSYSTEM_MAP.items():
5564
for project_file in project_files:
5665
if document.path.endswith(project_file):
@@ -80,7 +89,7 @@ def add_dependencies_tree_document(context: click.Context, documents_to_scan: Li
8089
documents_to_scan.extend(documents_to_add)
8190

8291

83-
def get_manifest_file_path(document, is_monitor_action, project_path):
92+
def get_manifest_file_path(document: Document, is_monitor_action: bool, project_path: str) -> str:
8493
return join_paths(project_path, document.path) if is_monitor_action else document.path
8594

8695

@@ -102,3 +111,10 @@ def build_dep_tree_path(path: str, generated_file_name: str) -> str:
102111

103112
def is_gradle_project(document: Document) -> bool:
104113
return document.path.endswith(BUILD_GRADLE_FILE_NAME) or document.path.endswith(BUILD_GRADLE_KTS_FILE_NAME)
114+
115+
116+
def get_file_content_from_commit(repo: Repo, commit: str, file_path: str) -> Optional[str]:
117+
try:
118+
return repo.git.show(f'{commit}:{file_path}')
119+
except GitCommandError:
120+
return None

cli/utils/path_utils.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Iterable, List
1+
from typing import Iterable, List, Optional
22
import pathspec
33
import os
44
from pathlib import Path
@@ -59,3 +59,12 @@ def get_file_dir(path: str) -> str:
5959

6060
def join_paths(path: str, filename: str) -> str:
6161
return os.path.join(path, filename)
62+
63+
64+
def get_file_content(file_path: str) -> Optional[str]:
65+
try:
66+
with open(file_path, "r", encoding="utf-8") as f:
67+
content = f.read()
68+
return content
69+
except FileNotFoundError:
70+
return None

0 commit comments

Comments
 (0)