From 4080646588f75e561abae92fa9bb8ef5f0e14667 Mon Sep 17 00:00:00 2001 From: tdruez Date: Wed, 11 Jun 2025 10:34:36 +0400 Subject: [PATCH 1/5] Upgrade packageurl-python to latest version #1383 Signed-off-by: tdruez --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index f061c6504b..0b91c3ea44 100644 --- a/setup.cfg +++ b/setup.cfg @@ -76,7 +76,7 @@ install_requires = extractcode[full]==31.0.0 commoncode==32.2.1 Beautifulsoup4[chardet]==4.13.3 - packageurl-python==0.16.0 + packageurl-python==0.17.1 # FetchCode fetchcode-container==1.2.3.210512; sys_platform == "linux" # Inspectors From b1d0f956f1f5d34927493c990205bba66653bac2 Mon Sep 17 00:00:00 2001 From: tdruez Date: Wed, 11 Jun 2025 10:50:01 +0400 Subject: [PATCH 2/5] Add make_mock_response to simplify setup in unit test #1383 Signed-off-by: tdruez --- scanpipe/tests/__init__.py | 10 ++++++++++ scanpipe/tests/pipes/test_fetch.py | 17 +++++------------ scanpipe/tests/test_commands.py | 29 +++++------------------------ scanpipe/tests/test_models.py | 5 ++--- scanpipe/tests/test_pipelines.py | 5 ++--- 5 files changed, 24 insertions(+), 42 deletions(-) diff --git a/scanpipe/tests/__init__.py b/scanpipe/tests/__init__.py index 502412750e..d40d28a43c 100644 --- a/scanpipe/tests/__init__.py +++ b/scanpipe/tests/__init__.py @@ -101,6 +101,16 @@ def make_message(project, **data): ) +def make_mock_response(url, content=b"\x00", status_code=200, headers=None): + """Return a mock HTTP response object for testing purposes.""" + response = mock.Mock() + response.url = url + response.content = content + response.status_code = status_code + response.headers = headers or {} + return response + + resource_data1 = { "path": "notice.NOTICE", "type": "file", diff --git a/scanpipe/tests/pipes/test_fetch.py b/scanpipe/tests/pipes/test_fetch.py index 15e32a3c20..3776ee51c7 100644 --- a/scanpipe/tests/pipes/test_fetch.py +++ b/scanpipe/tests/pipes/test_fetch.py @@ -29,6 +29,7 @@ from requests import auth as request_auth from scanpipe.pipes import fetch +from scanpipe.tests import make_mock_response class ScanPipeFetchPipesTest(TestCase): @@ -71,25 +72,19 @@ def test_scanpipe_pipes_fetch_get_fetcher(self): def test_scanpipe_pipes_fetch_http(self, mock_get): url = "https://example.com/filename.zip" - mock_get.return_value = mock.Mock( - content=b"\x00", headers={}, status_code=200, url=url - ) + mock_get.return_value = make_mock_response(url=url) downloaded_file = fetch.fetch_http(url) self.assertTrue(Path(downloaded_file.directory, "filename.zip").exists()) url_with_spaces = "https://example.com/space%20in%20name.zip" - mock_get.return_value = mock.Mock( - content=b"\x00", headers={}, status_code=200, url=url_with_spaces - ) + mock_get.return_value = make_mock_response(url=url_with_spaces) downloaded_file = fetch.fetch_http(url) self.assertTrue(Path(downloaded_file.directory, "space in name.zip").exists()) headers = { "content-disposition": 'attachment; filename="another_name.zip"', } - mock_get.return_value = mock.Mock( - content=b"\x00", headers=headers, status_code=200, url=url - ) + mock_get.return_value = make_mock_response(url=url, headers=headers) downloaded_file = fetch.fetch_http(url) self.assertTrue(Path(downloaded_file.directory, "another_name.zip").exists()) @@ -188,9 +183,7 @@ def test_scanpipe_pipes_fetch_fetch_urls(self, mock_get): "https://example.com/archive.tar.gz", ] - mock_get.return_value = mock.Mock( - content=b"\x00", headers={}, status_code=200, url="mocked_url" - ) + mock_get.return_value = make_mock_response(url="mocked_url") downloads, errors = fetch.fetch_urls(urls) self.assertEqual(2, len(downloads)) self.assertEqual(urls[0], downloads[0].uri) diff --git a/scanpipe/tests/test_commands.py b/scanpipe/tests/test_commands.py index 3f3d6d00b6..1b3cf8f730 100644 --- a/scanpipe/tests/test_commands.py +++ b/scanpipe/tests/test_commands.py @@ -48,6 +48,7 @@ from scanpipe.models import WebhookSubscription from scanpipe.pipes import flag from scanpipe.pipes import purldb +from scanpipe.tests import make_mock_response from scanpipe.tests import make_package from scanpipe.tests import make_project from scanpipe.tests import make_resource_file @@ -963,12 +964,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker( mock_get_latest_output.return_value = ( self.data / "scancode" / "is-npm-1.0.0_summary.json" ) - mock_download_get.return_value = mock.Mock( - content=b"\x00", - headers={}, - status_code=200, - url=download_url, - ) + mock_download_get.return_value = make_mock_response(url=download_url) self.assertFalse(WebhookSubscription.objects.exists()) @@ -1016,12 +1012,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_failure( "status": f"updated scannable_uri {scannable_uri_uuid} " "scan_status to 'failed'" } - mock_download_get.return_value = mock.Mock( - content=b"\x00", - headers={}, - status_code=200, - url=download_url, - ) + mock_download_get.return_value = make_mock_response(url=download_url) options = [ "--max-loops", @@ -1075,18 +1066,8 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_continue_after_fai ] mock_download_get.side_effect = [ - mock.Mock( - content=b"\x00", - headers={}, - status_code=200, - url=download_url1, - ), - mock.Mock( - content=b"\x00", - headers={}, - status_code=200, - url=download_url2, - ), + make_mock_response(url=download_url1), + make_mock_response(url=download_url2), ] mock_request_post.side_effect = [ diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 98d7775309..a0eadf5cdd 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -78,6 +78,7 @@ from scanpipe.tests import license_policies_index from scanpipe.tests import make_dependency from scanpipe.tests import make_message +from scanpipe.tests import make_mock_response from scanpipe.tests import make_package from scanpipe.tests import make_project from scanpipe.tests import make_resource_directory @@ -1473,9 +1474,7 @@ def test_scanpipe_input_source_model_delete_file(self): @mock.patch("requests.sessions.Session.get") def test_scanpipe_input_source_model_fetch(self, mock_get): download_url = "https://download.url/file.zip" - mock_get.return_value = mock.Mock( - content=b"\x00", headers={}, status_code=200, url=download_url - ) + mock_get.return_value = make_mock_response(url=download_url) input_source = self.project1.add_input_source(download_url=download_url) destination = input_source.fetch() diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 1f4f75d091..e3df763948 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -53,6 +53,7 @@ from scanpipe.pipes import scancode from scanpipe.pipes.input import copy_input from scanpipe.tests import FIXTURES_REGEN +from scanpipe.tests import make_mock_response from scanpipe.tests import make_package from scanpipe.tests import make_project from scanpipe.tests import package_data1 @@ -226,9 +227,7 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get): self.assertEqual("", run.log) download_url = "https://download.url/file.zip" - mock_get.return_value = mock.Mock( - content=b"\x00", headers={}, status_code=200, url=download_url - ) + mock_get.return_value = make_mock_response(url=download_url) input_source2 = project1.add_input_source(download_url=download_url) pipeline.download_missing_inputs() self.assertIn("Fetching input from https://download.url/file.zip", run.log) From c2c9bc18dca10cf5e0a95594085e16067bf98190 Mon Sep 17 00:00:00 2001 From: tdruez Date: Wed, 11 Jun 2025 10:55:46 +0400 Subject: [PATCH 3/5] Add support for fetching Package URLs (fetch_package_url) #1383 Signed-off-by: tdruez --- scanpipe/pipes/fetch.py | 16 ++++++++++++++++ scanpipe/tests/pipes/test_fetch.py | 20 ++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/scanpipe/pipes/fetch.py b/scanpipe/pipes/fetch.py index 127f120da0..7151ad7a17 100644 --- a/scanpipe/pipes/fetch.py +++ b/scanpipe/pipes/fetch.py @@ -39,6 +39,8 @@ from commoncode import command from commoncode.hash import multi_checksums from commoncode.text import python_safe_name +from packageurl import PackageURL +from packageurl.contrib import purl2url from plugincode.location_provider import get_location from requests import auth as request_auth @@ -356,6 +358,17 @@ def fetch_git_repo(url, to=None): ) +def fetch_package_url(url): + # Ensure the provided Package URL is valid, or raise a ValueError. + PackageURL.from_string(url) + + # Resolve a Download URL using purl2url. + if download_url := purl2url.get_download_url(url): + return fetch_http(download_url) + + raise ValueError(f"Could not resolve a download URL for {url}.") + + SCHEME_TO_FETCHER_MAPPING = { "http": fetch_http, "https": fetch_http, @@ -371,6 +384,9 @@ def get_fetcher(url): if url.rstrip("/").endswith(".git"): return fetch_git_repo + if url.startswith("pkg:"): + return fetch_package_url + # Not using `urlparse(url).scheme` for the scheme as it converts to lower case. scheme = url.split("://")[0] diff --git a/scanpipe/tests/pipes/test_fetch.py b/scanpipe/tests/pipes/test_fetch.py index 3776ee51c7..a53b86d267 100644 --- a/scanpipe/tests/pipes/test_fetch.py +++ b/scanpipe/tests/pipes/test_fetch.py @@ -42,6 +42,7 @@ def test_scanpipe_pipes_fetch_get_fetcher(self): git_http_url = "https://github.com/aboutcode-org/scancode.io.git" self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url)) self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url + "/")) + self.assertEqual(fetch.fetch_package_url, fetch.get_fetcher("pkg:npm/d3@5.8.0")) with self.assertRaises(ValueError) as cm: fetch.get_fetcher("") @@ -88,6 +89,25 @@ def test_scanpipe_pipes_fetch_http(self, mock_get): downloaded_file = fetch.fetch_http(url) self.assertTrue(Path(downloaded_file.directory, "another_name.zip").exists()) + @mock.patch("requests.sessions.Session.get") + def test_scanpipe_pipes_fetch_package_url(self, mock_get): + package_url = "pkg:not_a_valid_purl" + with self.assertRaises(ValueError) as cm: + fetch.fetch_package_url(package_url) + expected = f"purl is missing the required type component: '{package_url}'." + self.assertEqual(expected, str(cm.exception)) + + package_url = "pkg:generic/name@version" + with self.assertRaises(ValueError) as cm: + fetch.fetch_package_url(package_url) + expected = f"Could not resolve a download URL for {package_url}." + self.assertEqual(expected, str(cm.exception)) + + package_url = "pkg:npm/d3@5.8.0" + mock_get.return_value = make_mock_response(url="https://exa.com/filename.zip") + downloaded_file = fetch.fetch_package_url(package_url) + self.assertTrue(Path(downloaded_file.directory, "filename.zip").exists()) + @mock.patch("scanpipe.pipes.fetch.get_docker_image_platform") @mock.patch("scanpipe.pipes.fetch._get_skopeo_location") @mock.patch("scanpipe.pipes.fetch.run_command_safely") From 6a5fedf8c273fcbf4db71b57b689827b75218715 Mon Sep 17 00:00:00 2001 From: tdruez Date: Wed, 11 Jun 2025 10:56:14 +0400 Subject: [PATCH 4/5] Add Package URL placeholder in InputsBaseForm #1383 Signed-off-by: tdruez --- scanpipe/forms.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scanpipe/forms.py b/scanpipe/forms.py index 30f472c20c..f9f46da67c 100644 --- a/scanpipe/forms.py +++ b/scanpipe/forms.py @@ -64,16 +64,17 @@ class InputsBaseForm(forms.Form): label="Download URLs", required=False, help_text=( - "Provide one or more URLs to download, one per line. " - "Files are fetched at the beginning of the pipeline run execution." + "Enter one or more download URLs, one per line. " + "Files will be fetched when the pipeline starts." ), widget=forms.Textarea( attrs={ "class": "textarea is-dynamic", - "rows": 2, + "rows": 3, "placeholder": ( "https://domain.com/archive.zip\n" - "docker://docker-reference (e.g.: docker://postgres:13)" + "docker://docker-reference (e.g.: docker://postgres:13)\n" + "pkg://type/name@version" ), }, ), From ccac1e74997a05057660a87f4047bfbfea6f117d Mon Sep 17 00:00:00 2001 From: tdruez Date: Wed, 11 Jun 2025 11:00:34 +0400 Subject: [PATCH 5/5] Add CHANGELOG entry #1383 Signed-off-by: tdruez --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0267dd7bc0..86a9e5935a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,10 @@ Changelog v34.12.0 (unreleased) --------------------- +- Add support for using Package URL (purl) as project input. + This implementation is based on ``purl2url.get_download_url``. + https://github.com/aboutcode-org/scancode.io/issues/1383 + - Raise a ``MatchCodeIOException`` when the response from the MatchCode.io service is not valid in ``send_project_json_to_matchcode``. This generally means an issue on the MatchCode.io server side.