Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Changelog
v34.12.0 (unreleased)
---------------------

- Add support for using Package URL (purl) as project input.
This implementation is based on ``purl2url.get_download_url``.
https://github.com/aboutcode-org/scancode.io/issues/1383

- Raise a ``MatchCodeIOException`` when the response from the MatchCode.io service is
not valid in ``send_project_json_to_matchcode``.
This generally means an issue on the MatchCode.io server side.
Expand Down
9 changes: 5 additions & 4 deletions scanpipe/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,17 @@ class InputsBaseForm(forms.Form):
label="Download URLs",
required=False,
help_text=(
"Provide one or more URLs to download, one per line. "
"Files are fetched at the beginning of the pipeline run execution."
"Enter one or more download URLs, one per line. "
"Files will be fetched when the pipeline starts."
),
widget=forms.Textarea(
attrs={
"class": "textarea is-dynamic",
"rows": 2,
"rows": 3,
"placeholder": (
"https://domain.com/archive.zip\n"
"docker://docker-reference (e.g.: docker://postgres:13)"
"docker://docker-reference (e.g.: docker://postgres:13)\n"
"pkg://type/name@version"
),
},
),
Expand Down
16 changes: 16 additions & 0 deletions scanpipe/pipes/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
from commoncode import command
from commoncode.hash import multi_checksums
from commoncode.text import python_safe_name
from packageurl import PackageURL
from packageurl.contrib import purl2url
from plugincode.location_provider import get_location
from requests import auth as request_auth

Expand Down Expand Up @@ -356,6 +358,17 @@ def fetch_git_repo(url, to=None):
)


def fetch_package_url(url):
# Ensure the provided Package URL is valid, or raise a ValueError.
PackageURL.from_string(url)

# Resolve a Download URL using purl2url.
if download_url := purl2url.get_download_url(url):
return fetch_http(download_url)

raise ValueError(f"Could not resolve a download URL for {url}.")


SCHEME_TO_FETCHER_MAPPING = {
"http": fetch_http,
"https": fetch_http,
Expand All @@ -371,6 +384,9 @@ def get_fetcher(url):
if url.rstrip("/").endswith(".git"):
return fetch_git_repo

if url.startswith("pkg:"):
return fetch_package_url

# Not using `urlparse(url).scheme` for the scheme as it converts to lower case.
scheme = url.split("://")[0]

Expand Down
10 changes: 10 additions & 0 deletions scanpipe/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,16 @@ def make_message(project, **data):
)


def make_mock_response(url, content=b"\x00", status_code=200, headers=None):
"""Return a mock HTTP response object for testing purposes."""
response = mock.Mock()
response.url = url
response.content = content
response.status_code = status_code
response.headers = headers or {}
return response


resource_data1 = {
"path": "notice.NOTICE",
"type": "file",
Expand Down
37 changes: 25 additions & 12 deletions scanpipe/tests/pipes/test_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from requests import auth as request_auth

from scanpipe.pipes import fetch
from scanpipe.tests import make_mock_response


class ScanPipeFetchPipesTest(TestCase):
Expand All @@ -41,6 +42,7 @@ def test_scanpipe_pipes_fetch_get_fetcher(self):
git_http_url = "https://github.com/aboutcode-org/scancode.io.git"
self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url))
self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url + "/"))
self.assertEqual(fetch.fetch_package_url, fetch.get_fetcher("pkg:npm/d3@5.8.0"))

with self.assertRaises(ValueError) as cm:
fetch.get_fetcher("")
Expand Down Expand Up @@ -71,28 +73,41 @@ def test_scanpipe_pipes_fetch_get_fetcher(self):
def test_scanpipe_pipes_fetch_http(self, mock_get):
url = "https://example.com/filename.zip"

mock_get.return_value = mock.Mock(
content=b"\x00", headers={}, status_code=200, url=url
)
mock_get.return_value = make_mock_response(url=url)
downloaded_file = fetch.fetch_http(url)
self.assertTrue(Path(downloaded_file.directory, "filename.zip").exists())

url_with_spaces = "https://example.com/space%20in%20name.zip"
mock_get.return_value = mock.Mock(
content=b"\x00", headers={}, status_code=200, url=url_with_spaces
)
mock_get.return_value = make_mock_response(url=url_with_spaces)
downloaded_file = fetch.fetch_http(url)
self.assertTrue(Path(downloaded_file.directory, "space in name.zip").exists())

headers = {
"content-disposition": 'attachment; filename="another_name.zip"',
}
mock_get.return_value = mock.Mock(
content=b"\x00", headers=headers, status_code=200, url=url
)
mock_get.return_value = make_mock_response(url=url, headers=headers)
downloaded_file = fetch.fetch_http(url)
self.assertTrue(Path(downloaded_file.directory, "another_name.zip").exists())

@mock.patch("requests.sessions.Session.get")
def test_scanpipe_pipes_fetch_package_url(self, mock_get):
package_url = "pkg:not_a_valid_purl"
with self.assertRaises(ValueError) as cm:
fetch.fetch_package_url(package_url)
expected = f"purl is missing the required type component: '{package_url}'."
self.assertEqual(expected, str(cm.exception))

package_url = "pkg:generic/name@version"
with self.assertRaises(ValueError) as cm:
fetch.fetch_package_url(package_url)
expected = f"Could not resolve a download URL for {package_url}."
self.assertEqual(expected, str(cm.exception))

package_url = "pkg:npm/d3@5.8.0"
mock_get.return_value = make_mock_response(url="https://exa.com/filename.zip")
downloaded_file = fetch.fetch_package_url(package_url)
self.assertTrue(Path(downloaded_file.directory, "filename.zip").exists())

@mock.patch("scanpipe.pipes.fetch.get_docker_image_platform")
@mock.patch("scanpipe.pipes.fetch._get_skopeo_location")
@mock.patch("scanpipe.pipes.fetch.run_command_safely")
Expand Down Expand Up @@ -188,9 +203,7 @@ def test_scanpipe_pipes_fetch_fetch_urls(self, mock_get):
"https://example.com/archive.tar.gz",
]

mock_get.return_value = mock.Mock(
content=b"\x00", headers={}, status_code=200, url="mocked_url"
)
mock_get.return_value = make_mock_response(url="mocked_url")
downloads, errors = fetch.fetch_urls(urls)
self.assertEqual(2, len(downloads))
self.assertEqual(urls[0], downloads[0].uri)
Expand Down
29 changes: 5 additions & 24 deletions scanpipe/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from scanpipe.models import WebhookSubscription
from scanpipe.pipes import flag
from scanpipe.pipes import purldb
from scanpipe.tests import make_mock_response
from scanpipe.tests import make_package
from scanpipe.tests import make_project
from scanpipe.tests import make_resource_file
Expand Down Expand Up @@ -963,12 +964,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker(
mock_get_latest_output.return_value = (
self.data / "scancode" / "is-npm-1.0.0_summary.json"
)
mock_download_get.return_value = mock.Mock(
content=b"\x00",
headers={},
status_code=200,
url=download_url,
)
mock_download_get.return_value = make_mock_response(url=download_url)

self.assertFalse(WebhookSubscription.objects.exists())

Expand Down Expand Up @@ -1016,12 +1012,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_failure(
"status": f"updated scannable_uri {scannable_uri_uuid} "
"scan_status to 'failed'"
}
mock_download_get.return_value = mock.Mock(
content=b"\x00",
headers={},
status_code=200,
url=download_url,
)
mock_download_get.return_value = make_mock_response(url=download_url)

options = [
"--max-loops",
Expand Down Expand Up @@ -1075,18 +1066,8 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_continue_after_fai
]

mock_download_get.side_effect = [
mock.Mock(
content=b"\x00",
headers={},
status_code=200,
url=download_url1,
),
mock.Mock(
content=b"\x00",
headers={},
status_code=200,
url=download_url2,
),
make_mock_response(url=download_url1),
make_mock_response(url=download_url2),
]

mock_request_post.side_effect = [
Expand Down
5 changes: 2 additions & 3 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from scanpipe.tests import license_policies_index
from scanpipe.tests import make_dependency
from scanpipe.tests import make_message
from scanpipe.tests import make_mock_response
from scanpipe.tests import make_package
from scanpipe.tests import make_project
from scanpipe.tests import make_resource_directory
Expand Down Expand Up @@ -1473,9 +1474,7 @@ def test_scanpipe_input_source_model_delete_file(self):
@mock.patch("requests.sessions.Session.get")
def test_scanpipe_input_source_model_fetch(self, mock_get):
download_url = "https://download.url/file.zip"
mock_get.return_value = mock.Mock(
content=b"\x00", headers={}, status_code=200, url=download_url
)
mock_get.return_value = make_mock_response(url=download_url)

input_source = self.project1.add_input_source(download_url=download_url)
destination = input_source.fetch()
Expand Down
5 changes: 2 additions & 3 deletions scanpipe/tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from scanpipe.pipes import scancode
from scanpipe.pipes.input import copy_input
from scanpipe.tests import FIXTURES_REGEN
from scanpipe.tests import make_mock_response
from scanpipe.tests import make_package
from scanpipe.tests import make_project
from scanpipe.tests import package_data1
Expand Down Expand Up @@ -226,9 +227,7 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get):
self.assertEqual("", run.log)

download_url = "https://download.url/file.zip"
mock_get.return_value = mock.Mock(
content=b"\x00", headers={}, status_code=200, url=download_url
)
mock_get.return_value = make_mock_response(url=download_url)
input_source2 = project1.add_input_source(download_url=download_url)
pipeline.download_missing_inputs()
self.assertIn("Fetching input from https://download.url/file.zip", run.log)
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ install_requires =
extractcode[full]==31.0.0
commoncode==32.2.1
Beautifulsoup4[chardet]==4.13.3
packageurl-python==0.16.0
packageurl-python==0.17.1
# FetchCode
fetchcode-container==1.2.3.210512; sys_platform == "linux"
# Inspectors
Expand Down