Skip to content

Commit d9ce12c

Browse files
CM-60184-Scans using presigned post url (#395)
1 parent 718521a commit d9ce12c

File tree

8 files changed

+228
-16
lines changed

8 files changed

+228
-16
lines changed

cycode/cli/apps/scan/code_scanner.py

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,15 @@
2929
generate_unique_scan_id,
3030
is_cycodeignore_allowed_by_scan_config,
3131
set_issue_detected_by_scan_results,
32+
should_use_presigned_upload,
3233
)
3334
from cycode.cyclient.models import ZippedFileScanResult
3435
from cycode.logger import get_logger
3536

3637
if TYPE_CHECKING:
3738
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
39+
from cycode.cli.printers.console_printer import ConsolePrinter
40+
from cycode.cli.utils.progress_bar import BaseProgressBar
3841
from cycode.cyclient.scan_client import ScanClient
3942

4043
start_scan_time = time.time()
@@ -106,7 +109,10 @@ def _should_use_sync_flow(command_scan_type: str, scan_type: str, sync_option: b
106109

107110

108111
def _get_scan_documents_thread_func(
109-
ctx: typer.Context, is_git_diff: bool, is_commit_range: bool, scan_parameters: dict
112+
ctx: typer.Context,
113+
is_git_diff: bool,
114+
is_commit_range: bool,
115+
scan_parameters: dict,
110116
) -> Callable[[list[Document]], tuple[str, CliError, LocalScanResult]]:
111117
cycode_client = ctx.obj['client']
112118
scan_type = ctx.obj['scan_type']
@@ -180,6 +186,36 @@ def _scan_batch_thread_func(batch: list[Document]) -> tuple[str, CliError, Local
180186
return _scan_batch_thread_func
181187

182188

189+
def _run_presigned_upload_scan(
190+
scan_batch_thread_func: Callable,
191+
scan_type: str,
192+
documents_to_scan: list[Document],
193+
progress_bar: 'BaseProgressBar',
194+
printer: 'ConsolePrinter',
195+
) -> tuple:
196+
try:
197+
# Try to zip all documents as a single batch; ZipTooLargeError raised if it exceeds the scan type's limit
198+
zip_documents(scan_type, documents_to_scan)
199+
# It fits: skip batching and upload everything as one ZIP
200+
return run_parallel_batched_scan(
201+
scan_batch_thread_func,
202+
scan_type,
203+
documents_to_scan,
204+
progress_bar=progress_bar,
205+
skip_batching=True,
206+
)
207+
except custom_exceptions.ZipTooLargeError:
208+
printer.print_warning(
209+
'The scan is too large to upload as a single file. This may result in corrupted scan results.'
210+
)
211+
return run_parallel_batched_scan(
212+
scan_batch_thread_func,
213+
scan_type,
214+
documents_to_scan,
215+
progress_bar=progress_bar,
216+
)
217+
218+
183219
def scan_documents(
184220
ctx: typer.Context,
185221
documents_to_scan: list[Document],
@@ -203,9 +239,15 @@ def scan_documents(
203239
return
204240

205241
scan_batch_thread_func = _get_scan_documents_thread_func(ctx, is_git_diff, is_commit_range, scan_parameters)
206-
errors, local_scan_results = run_parallel_batched_scan(
207-
scan_batch_thread_func, scan_type, documents_to_scan, progress_bar=progress_bar
208-
)
242+
243+
if should_use_presigned_upload(scan_type):
244+
errors, local_scan_results = _run_presigned_upload_scan(
245+
scan_batch_thread_func, scan_type, documents_to_scan, progress_bar, printer
246+
)
247+
else:
248+
errors, local_scan_results = run_parallel_batched_scan(
249+
scan_batch_thread_func, scan_type, documents_to_scan, progress_bar=progress_bar
250+
)
209251

210252
try_set_aggregation_report_url_if_needed(ctx, scan_parameters, ctx.obj['client'], scan_type)
211253

@@ -217,6 +259,31 @@ def scan_documents(
217259
print_local_scan_results(ctx, local_scan_results, errors)
218260

219261

262+
def _perform_scan_v4_async(
263+
cycode_client: 'ScanClient',
264+
zipped_documents: 'InMemoryZip',
265+
scan_type: str,
266+
scan_parameters: dict,
267+
is_git_diff: bool,
268+
is_commit_range: bool,
269+
) -> ZippedFileScanResult:
270+
upload_link = cycode_client.get_upload_link(scan_type)
271+
logger.debug('Got upload link, %s', {'upload_id': upload_link.upload_id})
272+
273+
cycode_client.upload_to_presigned_post(upload_link.url, upload_link.presigned_post_fields, zipped_documents)
274+
logger.debug('Uploaded zip to presigned URL')
275+
276+
scan_async_result = cycode_client.scan_repository_from_upload_id(
277+
scan_type, upload_link.upload_id, scan_parameters, is_git_diff, is_commit_range
278+
)
279+
logger.debug(
280+
'Presigned upload scan request triggered, %s',
281+
{'scan_id': scan_async_result.scan_id, 'upload_id': upload_link.upload_id},
282+
)
283+
284+
return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters)
285+
286+
220287
def _perform_scan_async(
221288
cycode_client: 'ScanClient',
222289
zipped_documents: 'InMemoryZip',
@@ -262,6 +329,11 @@ def _perform_scan(
262329
# it does not support commit range scans; should_use_sync_flow handles it
263330
return _perform_scan_sync(cycode_client, zipped_documents, scan_type, scan_parameters, is_git_diff)
264331

332+
if should_use_presigned_upload(scan_type):
333+
return _perform_scan_v4_async(
334+
cycode_client, zipped_documents, scan_type, scan_parameters, is_git_diff, is_commit_range
335+
)
336+
265337
return _perform_scan_async(cycode_client, zipped_documents, scan_type, scan_parameters, is_commit_range)
266338

267339

cycode/cli/apps/scan/commit_range_scanner.py

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
generate_unique_scan_id,
4545
is_cycodeignore_allowed_by_scan_config,
4646
set_issue_detected_by_scan_results,
47+
should_use_presigned_upload,
4748
)
4849
from cycode.cyclient.models import ZippedFileScanResult
4950
from cycode.logger import get_logger
@@ -86,6 +87,38 @@ def _perform_commit_range_scan_async(
8687
return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters, timeout)
8788

8889

90+
def _perform_commit_range_scan_v4_async(
91+
cycode_client: 'ScanClient',
92+
from_commit_zipped_documents: 'InMemoryZip',
93+
to_commit_zipped_documents: 'InMemoryZip',
94+
scan_type: str,
95+
scan_parameters: dict,
96+
timeout: Optional[int] = None,
97+
) -> ZippedFileScanResult:
98+
from_upload_link = cycode_client.get_upload_link(scan_type)
99+
logger.debug('Got from-commit upload link, %s', {'upload_id': from_upload_link.upload_id})
100+
101+
cycode_client.upload_to_presigned_post(
102+
from_upload_link.url, from_upload_link.presigned_post_fields, from_commit_zipped_documents
103+
)
104+
logger.debug('Uploaded from-commit zip')
105+
106+
to_upload_link = cycode_client.get_upload_link(scan_type)
107+
logger.debug('Got to-commit upload link, %s', {'upload_id': to_upload_link.upload_id})
108+
109+
cycode_client.upload_to_presigned_post(
110+
to_upload_link.url, to_upload_link.presigned_post_fields, to_commit_zipped_documents
111+
)
112+
logger.debug('Uploaded to-commit zip')
113+
114+
scan_async_result = cycode_client.commit_range_scan_from_upload_ids(
115+
scan_type, from_upload_link.upload_id, to_upload_link.upload_id, scan_parameters
116+
)
117+
logger.debug('V4 commit range scan request triggered, %s', {'scan_id': scan_async_result.scan_id})
118+
119+
return poll_scan_results(cycode_client, scan_async_result.scan_id, scan_type, scan_parameters, timeout)
120+
121+
89122
def _scan_commit_range_documents(
90123
ctx: typer.Context,
91124
from_documents_to_scan: list[Document],
@@ -118,14 +151,24 @@ def _scan_commit_range_documents(
118151
# for SAST it is files with diff between from_commit and to_commit
119152
to_commit_zipped_documents = zip_documents(scan_type, to_documents_to_scan)
120153

121-
scan_result = _perform_commit_range_scan_async(
122-
cycode_client,
123-
from_commit_zipped_documents,
124-
to_commit_zipped_documents,
125-
scan_type,
126-
scan_parameters,
127-
timeout,
128-
)
154+
if should_use_presigned_upload(scan_type):
155+
scan_result = _perform_commit_range_scan_v4_async(
156+
cycode_client,
157+
from_commit_zipped_documents,
158+
to_commit_zipped_documents,
159+
scan_type,
160+
scan_parameters,
161+
timeout,
162+
)
163+
else:
164+
scan_result = _perform_commit_range_scan_async(
165+
cycode_client,
166+
from_commit_zipped_documents,
167+
to_commit_zipped_documents,
168+
scan_type,
169+
scan_parameters,
170+
timeout,
171+
)
129172
enrich_scan_result_with_data_from_detection_rules(cycode_client, scan_result)
130173

131174
progress_bar.update(ScanProgressBarSection.SCAN)

cycode/cli/consts.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,15 +192,18 @@
192192
# 5MB in bytes (in decimal)
193193
FILE_MAX_SIZE_LIMIT_IN_BYTES = 5000000
194194

195+
PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 5 * 1024 * 1024 * 1024 # 5 GB (S3 presigned POST limit)
196+
PRESIGNED_UPLOAD_SCAN_TYPES = {SAST_SCAN_TYPE}
197+
195198
DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20 * 1024 * 1024
196199
ZIP_MAX_SIZE_LIMIT_IN_BYTES = {
197200
SCA_SCAN_TYPE: 200 * 1024 * 1024,
198-
SAST_SCAN_TYPE: 50 * 1024 * 1024,
201+
SAST_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES,
199202
}
200203

201204
# scan in batches
202205
DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES = 9 * 1024 * 1024
203-
SCAN_BATCH_MAX_SIZE_IN_BYTES = {SAST_SCAN_TYPE: 50 * 1024 * 1024}
206+
SCAN_BATCH_MAX_SIZE_IN_BYTES = {SAST_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES}
204207
SCAN_BATCH_MAX_SIZE_IN_BYTES_ENV_VAR_NAME = 'SCAN_BATCH_MAX_SIZE_IN_BYTES'
205208

206209
DEFAULT_SCAN_BATCH_MAX_FILES_COUNT = 1000

cycode/cli/files_collector/zip_documents.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ def _validate_zip_file_size(scan_type: str, zip_file_size: int) -> None:
1717
raise custom_exceptions.ZipTooLargeError(max_size_limit)
1818

1919

20-
def zip_documents(scan_type: str, documents: list[Document], zip_file: Optional[InMemoryZip] = None) -> InMemoryZip:
20+
def zip_documents(
21+
scan_type: str,
22+
documents: list[Document],
23+
zip_file: Optional[InMemoryZip] = None,
24+
) -> InMemoryZip:
2125
if zip_file is None:
2226
zip_file = InMemoryZip()
2327

cycode/cli/utils/scan_batch.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,13 @@ def run_parallel_batched_scan(
111111
scan_type: str,
112112
documents: list[Document],
113113
progress_bar: 'BaseProgressBar',
114+
skip_batching: bool = False,
114115
) -> tuple[dict[str, 'CliError'], list['LocalScanResult']]:
115116
# batching is disabled for SCA; requested by Mor
116-
batches = [documents] if scan_type == consts.SCA_SCAN_TYPE else split_documents_into_batches(scan_type, documents)
117+
if scan_type == consts.SCA_SCAN_TYPE or skip_batching:
118+
batches = [documents]
119+
else:
120+
batches = split_documents_into_batches(scan_type, documents)
117121

118122
progress_bar.set_section_length(ScanProgressBarSection.SCAN, len(batches)) # * 3
119123
# TODO(MarshalX): we should multiply the count of batches in SCAN section because each batch has 3 steps:

cycode/cli/utils/scan_utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import typer
77

8+
from cycode.cli import consts
89
from cycode.cli.cli_types import SeverityOption
910

1011
if TYPE_CHECKING:
@@ -31,6 +32,10 @@ def is_cycodeignore_allowed_by_scan_config(ctx: typer.Context) -> bool:
3132
return scan_config.is_cycode_ignore_allowed if scan_config else True
3233

3334

35+
def should_use_presigned_upload(scan_type: str) -> bool:
36+
return scan_type in consts.PRESIGNED_UPLOAD_SCAN_TYPES
37+
38+
3439
def generate_unique_scan_id() -> UUID:
3540
if 'PYTEST_TEST_UNIQUE_ID' in os.environ:
3641
return UUID(os.environ['PYTEST_TEST_UNIQUE_ID'])

cycode/cyclient/models.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,26 @@ def build_dto(self, data: dict[str, Any], **_) -> 'ScanResult':
114114
return ScanResult(**data)
115115

116116

117+
@dataclass
118+
class UploadLinkResponse:
119+
upload_id: str
120+
url: str
121+
presigned_post_fields: dict[str, str]
122+
123+
124+
class UploadLinkResponseSchema(Schema):
125+
class Meta:
126+
unknown = EXCLUDE
127+
128+
upload_id = fields.String()
129+
url = fields.String()
130+
presigned_post_fields = fields.Dict(keys=fields.String(), values=fields.String())
131+
132+
@post_load
133+
def build_dto(self, data: dict[str, Any], **_) -> 'UploadLinkResponse':
134+
return UploadLinkResponse(**data)
135+
136+
117137
class ScanInitializationResponse(Schema):
118138
def __init__(self, scan_id: Optional[str] = None, err: Optional[str] = None) -> None:
119139
super().__init__()

cycode/cyclient/scan_client.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import TYPE_CHECKING, Optional, Union
44
from uuid import UUID
55

6+
import requests
67
from requests import Response
78

89
from cycode.cli import consts
@@ -25,6 +26,7 @@ def __init__(
2526
self.scan_config = scan_config
2627

2728
self._SCAN_SERVICE_CLI_CONTROLLER_PATH = 'api/v1/cli-scan'
29+
self._SCAN_SERVICE_V4_CLI_CONTROLLER_PATH = 'api/v4/scans/cli'
2830
self._DETECTIONS_SERVICE_CLI_CONTROLLER_PATH = 'api/v1/detections/cli'
2931
self._POLICIES_SERVICE_CONTROLLER_PATH_V3 = 'api/v3/policies'
3032

@@ -56,6 +58,10 @@ def get_scan_aggregation_report_url(self, aggregation_id: str, scan_type: str) -
5658
)
5759
return models.ScanReportUrlResponseSchema().build_dto(response.json())
5860

61+
def get_scan_service_v4_url_path(self, scan_type: str) -> str:
62+
service_path = self.scan_config.get_service_name(scan_type)
63+
return f'{service_path}/{self._SCAN_SERVICE_V4_CLI_CONTROLLER_PATH}'
64+
5965
def get_zipped_file_scan_async_url_path(self, scan_type: str, should_use_sync_flow: bool = False) -> str:
6066
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
6167
async_entity_type = self.scan_config.get_async_entity_type(scan_type)
@@ -123,6 +129,40 @@ def zipped_file_scan_async(
123129
)
124130
return models.ScanInitializationResponseSchema().load(response.json())
125131

132+
def get_upload_link(self, scan_type: str) -> models.UploadLinkResponse:
133+
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
134+
url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/upload-link'
135+
response = self.scan_cycode_client.get(url_path=url_path, hide_response_content_log=self._hide_response_log)
136+
return models.UploadLinkResponseSchema().load(response.json())
137+
138+
def upload_to_presigned_post(self, url: str, fields: dict[str, str], zip_file: 'InMemoryZip') -> None:
139+
multipart = {key: (None, value) for key, value in fields.items()}
140+
multipart['file'] = (None, zip_file.read())
141+
# We are not using Cycode client, as we are calling aws S3.
142+
response = requests.post(url, files=multipart, timeout=self.scan_cycode_client.timeout)
143+
response.raise_for_status()
144+
145+
def scan_repository_from_upload_id(
146+
self,
147+
scan_type: str,
148+
upload_id: str,
149+
scan_parameters: dict,
150+
is_git_diff: bool = False,
151+
is_commit_range: bool = False,
152+
) -> models.ScanInitializationResponse:
153+
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
154+
url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/repository'
155+
response = self.scan_cycode_client.post(
156+
url_path=url_path,
157+
body={
158+
'upload_id': upload_id,
159+
'is_git_diff': is_git_diff,
160+
'is_commit_range': is_commit_range,
161+
'scan_parameters': json.dumps(scan_parameters),
162+
},
163+
)
164+
return models.ScanInitializationResponseSchema().load(response.json())
165+
126166
def commit_range_scan_async(
127167
self,
128168
from_commit_zip_file: InMemoryZip,
@@ -161,6 +201,27 @@ def commit_range_scan_async(
161201
)
162202
return models.ScanInitializationResponseSchema().load(response.json())
163203

204+
def commit_range_scan_from_upload_ids(
205+
self,
206+
scan_type: str,
207+
from_commit_upload_id: str,
208+
to_commit_upload_id: str,
209+
scan_parameters: dict,
210+
is_git_diff: bool = False,
211+
) -> models.ScanInitializationResponse:
212+
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
213+
url_path = f'{self.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/commit-range'
214+
response = self.scan_cycode_client.post(
215+
url_path=url_path,
216+
body={
217+
'from_commit_upload_id': from_commit_upload_id,
218+
'to_commit_upload_id': to_commit_upload_id,
219+
'is_git_diff': is_git_diff,
220+
'scan_parameters': json.dumps(scan_parameters),
221+
},
222+
)
223+
return models.ScanInitializationResponseSchema().load(response.json())
224+
164225
def get_scan_details_path(self, scan_type: str, scan_id: str) -> str:
165226
return f'{self.get_scan_service_url_path(scan_type)}/{scan_id}'
166227

0 commit comments

Comments
 (0)