Skip to content

Commit ccb0750

Browse files
committed
use the stream option to make the large scan data request
Signed-off-by: tdruez <tdruez@aboutcode.org>
1 parent 92da66d commit ccb0750

3 files changed

Lines changed: 38 additions & 11 deletions

File tree

component_catalog/api.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111
from django.db import transaction
1212
from django.forms.widgets import HiddenInput
1313
from django.http import FileResponse
14+
from django.http.response import StreamingHttpResponse
1415

1516
import django_filters
17+
import requests
1618
from packageurl.contrib import url2purl
1719
from packageurl.contrib.django.filters import PackageURLFilter
1820
from rest_framework import serializers
@@ -961,19 +963,29 @@ def scan_info(self, request, uuid):
961963

962964
@action(detail=True, name="Scan results")
963965
def scan_results(self, request, uuid):
964-
"""Return the scan results from ScanCode.io."""
966+
"""
967+
Stream scan results directly from ScanCode.io back to the client.
968+
969+
The response body is not loaded in memory but proxied chunk by chunk,
970+
making it suitable for large scan result payloads.
971+
"""
965972
package = self.get_object()
966973
dataspace = request.user.dataspace
967974
scancodeio = ScanCodeIO(dataspace)
968975
project_info = self._get_scancodeio_project_info(scancodeio, package)
969976

970977
project_uuid = project_info.get("uuid")
971978
scan_results_url = scancodeio.get_scan_action_url(project_uuid, "results")
972-
scan_results = scancodeio.fetch_scan_data(scan_results_url)
973-
if not scan_results:
979+
980+
try:
981+
scan_response = scancodeio.stream_scan_data(scan_results_url)
982+
except requests.RequestException:
974983
raise ScanFetchError()
975984

976-
return Response(scan_results)
985+
return StreamingHttpResponse(
986+
scan_response.iter_content(chunk_size=8192),
987+
content_type=scan_response.headers.get("Content-Type", "application/json"),
988+
)
977989

978990
@action(detail=True, name="Scan summary")
979991
def scan_summary(self, request, uuid):

component_catalog/tests/test_api.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from django.test import override_settings
1818
from django.urls import reverse
1919

20+
import requests
2021
from rest_framework import status
2122
from rest_framework.exceptions import ErrorDetail
2223
from rest_framework.test import APIClient
@@ -1523,26 +1524,32 @@ def test_api_package_viewset_scan_info_action(self, mock_is_available, mock_get_
15231524
self.assertEqual(project_info, response.data)
15241525

15251526
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.get_project_info")
1526-
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.fetch_scan_data")
1527+
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.stream_scan_data")
15271528
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.is_available")
15281529
def test_api_package_viewset_scan_results_action(
1529-
self, mock_is_available, mock_fetch_scan_data, mock_get_project_info
1530+
self, mock_is_available, mock_stream_scan_data, mock_get_project_info
15301531
):
15311532
self.client.login(username=self.base_user.username, password="secret")
15321533
action_url = reverse("api_v2:package-scan-results", args=[self.package1.uuid])
15331534
mock_is_available.return_value = True
15341535
mock_get_project_info.return_value = {"uuid": "abcdef"}
15351536

1536-
mock_fetch_scan_data.return_value = None
1537+
mock_stream_scan_data.side_effect = requests.RequestException
15371538
response = self.client.get(action_url)
15381539
self.assertEqual(400, response.status_code)
1539-
error = {'detail': ErrorDetail(string='Could not fetch scan data', code='error')}
1540+
error = {"detail": ErrorDetail(string="Could not fetch scan data", code="error")}
15401541
self.assertEqual(error, response.data)
15411542

1542-
mock_fetch_scan_data.return_value = {"results": ""}
1543+
mock_response = mock.Mock()
1544+
mock_response.iter_content.return_value = iter([b'{"results": ""}'])
1545+
mock_response.headers = {"Content-Type": "application/json"}
1546+
mock_stream_scan_data.side_effect = None
1547+
mock_stream_scan_data.return_value = mock_response
1548+
15431549
response = self.client.get(action_url)
15441550
self.assertEqual(200, response.status_code)
1545-
self.assertEqual({"results": ""}, response.data)
1551+
self.assertEqual(b'{"results": ""}', b"".join(response.streaming_content))
1552+
self.assertEqual("application/json", response.headers["Content-Type"])
15461553

15471554
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.get_project_info")
15481555
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.fetch_scan_data")

dejacode_toolkit/scancodeio.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,16 @@ def fetch_scan_data(self, data_url):
146146
return self.request_get(url=data_url)
147147

148148
def stream_scan_data(self, data_url):
149+
"""
150+
Stream scan data from the given URL.
151+
152+
With stream=True, only headers are fetched initially, so raise_for_status()
153+
can fail fast on errors before any body content is downloaded.
154+
"""
149155
logger.debug(f"{self.label}: stream scan data data_url={data_url}")
150-
return self.session.get(url=data_url, stream=True)
156+
response = self.session.get(url=data_url, stream=True)
157+
response.raise_for_status()
158+
return response
151159

152160
def delete_scan(self, detail_url):
153161
logger.debug(f"{self.label}: delete scan detail_url={detail_url}")

0 commit comments

Comments
 (0)