From b4f7066cabb98e51827efec350380c0d2df84e4a Mon Sep 17 00:00:00 2001 From: tdruez Date: Wed, 16 Apr 2025 15:06:43 +0800 Subject: [PATCH 1/3] Add an UUID field on the DiscoveredDependency model #1651 Signed-off-by: tdruez --- .../0070_discovereddependency_uuid.py | 19 ++++++++++++ ...0071_discovereddependency_uuid_populate.py | 29 +++++++++++++++++++ .../0072_discovereddependency_uuid_unique.py | 19 ++++++++++++ scanpipe/models.py | 17 +++++++++-- 4 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 scanpipe/migrations/0070_discovereddependency_uuid.py create mode 100644 scanpipe/migrations/0071_discovereddependency_uuid_populate.py create mode 100644 scanpipe/migrations/0072_discovereddependency_uuid_unique.py diff --git a/scanpipe/migrations/0070_discovereddependency_uuid.py b/scanpipe/migrations/0070_discovereddependency_uuid.py new file mode 100644 index 0000000000..55bf91e0f2 --- /dev/null +++ b/scanpipe/migrations/0070_discovereddependency_uuid.py @@ -0,0 +1,19 @@ +# Generated by Django 5.1.8 on 2025-04-16 06:49 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0069_project_purl'), + ] + + operations = [ + migrations.AddField( + model_name='discovereddependency', + name='uuid', + field=models.UUIDField(null=True, editable=False, verbose_name='UUID'), + ), + ] diff --git a/scanpipe/migrations/0071_discovereddependency_uuid_populate.py b/scanpipe/migrations/0071_discovereddependency_uuid_populate.py new file mode 100644 index 0000000000..a8633f2fa9 --- /dev/null +++ b/scanpipe/migrations/0071_discovereddependency_uuid_populate.py @@ -0,0 +1,29 @@ +# Generated by Django 5.1.8 on 2025-04-16 06:57 + +import uuid +from django.db import migrations + + +def gen_uuid_bulk(apps, schema_editor): + DiscoveredDependency = apps.get_model("scanpipe", "DiscoveredDependency") + batch_size = 10000 + objs = [] + for obj in DiscoveredDependency.objects.filter(uuid__isnull=True).iterator(): + obj.uuid = uuid.uuid4() + objs.append(obj) + if len(objs) >= batch_size: + DiscoveredDependency.objects.bulk_update(objs, ['uuid']) + objs = [] + if objs: + DiscoveredDependency.objects.bulk_update(objs, ['uuid']) + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0070_discovereddependency_uuid'), + ] + + operations = [ + migrations.RunPython(gen_uuid_bulk, reverse_code=migrations.RunPython.noop), + ] diff --git a/scanpipe/migrations/0072_discovereddependency_uuid_unique.py b/scanpipe/migrations/0072_discovereddependency_uuid_unique.py new file mode 100644 index 0000000000..18856b5243 --- /dev/null +++ b/scanpipe/migrations/0072_discovereddependency_uuid_unique.py @@ -0,0 +1,19 @@ +# Generated by Django 5.1.8 on 2025-04-16 07:00 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0071_discovereddependency_uuid_populate'), + ] + + operations = [ + migrations.AlterField( + model_name='discovereddependency', + name='uuid', + field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID'), + ), + ] diff --git a/scanpipe/models.py b/scanpipe/models.py index 388c8a4b2b..b7b4bdb529 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -129,6 +129,18 @@ def short_uuid(self): return str(self.uuid)[0:8] +class UUIDFieldMixin(models.Model): + uuid = models.UUIDField( + verbose_name=_("UUID"), + default=uuid.uuid4, + editable=False, + unique=True, + ) + + class Meta: + abstract = True + + class HashFieldsMixin(models.Model): """ The hash fields are not indexed by default, use the `indexes` in Meta as needed: @@ -3400,6 +3412,7 @@ class Meta: class DiscoveredPackage( ProjectRelatedModel, + UUIDFieldMixin, ExtraDataFieldMixin, SaveProjectMessageMixin, UpdateFromDataMixin, @@ -3421,9 +3434,6 @@ class DiscoveredPackage( license_expression_field = "declared_license_expression" - uuid = models.UUIDField( - verbose_name=_("UUID"), default=uuid.uuid4, unique=True, editable=False - ) codebase_resources = models.ManyToManyField( "CodebaseResource", related_name="discovered_packages" ) @@ -3769,6 +3779,7 @@ def only_package_url_fields(self, extra=None): class DiscoveredDependency( ProjectRelatedModel, + UUIDFieldMixin, SaveProjectMessageMixin, UpdateFromDataMixin, VulnerabilityMixin, From 6d85a9fe34eb78c7a6f54b2680f6e9bd6c0086cb Mon Sep 17 00:00:00 2001 From: tdruez Date: Wed, 16 Apr 2025 15:20:53 +0800 Subject: [PATCH 2/3] Use the UUID for the DiscoveredDependency spdx_id #1651 Signed-off-by: tdruez --- scanpipe/models.py | 5 +++- .../data/asgiref/asgiref-3.3.0.spdx.json | 24 +++++++++---------- .../data/asgiref/asgiref-3.3.0_fixtures.json | 4 ++++ scanpipe/tests/test_models.py | 12 +++++++++- 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index b7b4bdb529..941298251b 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -4042,7 +4042,10 @@ def populate_dependency_uuid(cls, dependency_data): @property def spdx_id(self): - return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.dependency_uid}" + # We cannot rely on `dependency_uid` for the SPDX ID because it may contain + # PURL components that are not SPDX-compliant. According to the spec, + # "SPDXID is a unique string containing letters, numbers, ., and/or -" + return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.uuid}" def as_spdx(self): """Return this Dependency as an SPDX Package entry.""" diff --git a/scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json b/scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json index 0485436e89..3cf2421996 100644 --- a/scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json +++ b/scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json @@ -52,7 +52,7 @@ }, { "name": "pytest", - "SPDXID": "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest?uuid=cfa26c80-95fc-4da3-a290-5e7403d0d9bc", + "SPDXID": "SPDXRef-scancodeio-discovereddependency-13818fb7-6094-4868-97ca-384a8fc8d16d", "downloadLocation": "NOASSERTION", "licenseConcluded": "NOASSERTION", "copyrightText": "NOASSERTION", @@ -68,7 +68,7 @@ }, { "name": "pytest", - "SPDXID": "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest?uuid=bfafc414-739f-4747-bfb0-1b3ad03d62c7", + "SPDXID": "SPDXRef-scancodeio-discovereddependency-2f1d3742-0553-4c4f-8731-1ffbbc13827d", "downloadLocation": "NOASSERTION", "licenseConcluded": "NOASSERTION", "copyrightText": "NOASSERTION", @@ -84,7 +84,7 @@ }, { "name": "pytest-asyncio", - "SPDXID": "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest-asyncio?uuid=68b8d3cb-eddb-4727-b6cb-707dde279301", + "SPDXID": "SPDXRef-scancodeio-discovereddependency-fd5a81e5-0739-406e-9189-7b8a3644ef0d", "downloadLocation": "NOASSERTION", "licenseConcluded": "NOASSERTION", "copyrightText": "NOASSERTION", @@ -100,7 +100,7 @@ }, { "name": "pytest-asyncio", - "SPDXID": "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest-asyncio?uuid=570878e1-aa7c-46bc-9216-122b73b34f9b", + "SPDXID": "SPDXRef-scancodeio-discovereddependency-e175db55-d0f3-4224-b6d4-2b0ad553b865", "downloadLocation": "NOASSERTION", "licenseConcluded": "NOASSERTION", "copyrightText": "NOASSERTION", @@ -118,30 +118,30 @@ "documentDescribes": [ "SPDXRef-scancodeio-discoveredpackage-101147dd-f8a7-4ea3-87a1-01b9b0af5d4f", "SPDXRef-scancodeio-discoveredpackage-b5035991-5b4b-40be-b68b-1c9c528078cd", - "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest?uuid=cfa26c80-95fc-4da3-a290-5e7403d0d9bc", - "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest?uuid=bfafc414-739f-4747-bfb0-1b3ad03d62c7", - "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest-asyncio?uuid=68b8d3cb-eddb-4727-b6cb-707dde279301", - "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest-asyncio?uuid=570878e1-aa7c-46bc-9216-122b73b34f9b" + "SPDXRef-scancodeio-discovereddependency-13818fb7-6094-4868-97ca-384a8fc8d16d", + "SPDXRef-scancodeio-discovereddependency-2f1d3742-0553-4c4f-8731-1ffbbc13827d", + "SPDXRef-scancodeio-discovereddependency-fd5a81e5-0739-406e-9189-7b8a3644ef0d", + "SPDXRef-scancodeio-discovereddependency-e175db55-d0f3-4224-b6d4-2b0ad553b865" ], "files": [], "relationships": [ { - "spdxElementId": "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest?uuid=cfa26c80-95fc-4da3-a290-5e7403d0d9bc", + "spdxElementId": "SPDXRef-scancodeio-discovereddependency-13818fb7-6094-4868-97ca-384a8fc8d16d", "relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-101147dd-f8a7-4ea3-87a1-01b9b0af5d4f", "relationshipType": "DEPENDENCY_OF" }, { - "spdxElementId": "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest?uuid=bfafc414-739f-4747-bfb0-1b3ad03d62c7", + "spdxElementId": "SPDXRef-scancodeio-discovereddependency-2f1d3742-0553-4c4f-8731-1ffbbc13827d", "relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-b5035991-5b4b-40be-b68b-1c9c528078cd", "relationshipType": "DEPENDENCY_OF" }, { - "spdxElementId": "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest-asyncio?uuid=68b8d3cb-eddb-4727-b6cb-707dde279301", + "spdxElementId": "SPDXRef-scancodeio-discovereddependency-fd5a81e5-0739-406e-9189-7b8a3644ef0d", "relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-101147dd-f8a7-4ea3-87a1-01b9b0af5d4f", "relationshipType": "DEPENDENCY_OF" }, { - "spdxElementId": "SPDXRef-scancodeio-discovereddependency-pkg:pypi/pytest-asyncio?uuid=570878e1-aa7c-46bc-9216-122b73b34f9b", + "spdxElementId": "SPDXRef-scancodeio-discovereddependency-e175db55-d0f3-4224-b6d4-2b0ad553b865", "relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-b5035991-5b4b-40be-b68b-1c9c528078cd", "relationshipType": "DEPENDENCY_OF" } diff --git a/scanpipe/tests/data/asgiref/asgiref-3.3.0_fixtures.json b/scanpipe/tests/data/asgiref/asgiref-3.3.0_fixtures.json index 99128c376d..5077315c08 100644 --- a/scanpipe/tests/data/asgiref/asgiref-3.3.0_fixtures.json +++ b/scanpipe/tests/data/asgiref/asgiref-3.3.0_fixtures.json @@ -1714,6 +1714,7 @@ "model": "scanpipe.discovereddependency", "pk": 1, "fields": { + "uuid": "13818fb7-6094-4868-97ca-384a8fc8d16d", "type": "pypi", "namespace": "", "name": "pytest", @@ -1739,6 +1740,7 @@ "model": "scanpipe.discovereddependency", "pk": 2, "fields": { + "uuid": "fd5a81e5-0739-406e-9189-7b8a3644ef0d", "type": "pypi", "namespace": "", "name": "pytest-asyncio", @@ -1764,6 +1766,7 @@ "model": "scanpipe.discovereddependency", "pk": 3, "fields": { + "uuid": "2f1d3742-0553-4c4f-8731-1ffbbc13827d", "type": "pypi", "namespace": "", "name": "pytest", @@ -1789,6 +1792,7 @@ "model": "scanpipe.discovereddependency", "pk": 4, "fields": { + "uuid": "e175db55-d0f3-4224-b6d4-2b0ad553b865", "type": "pypi", "namespace": "", "name": "pytest-asyncio", diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index dd9207a9cb..645fe38163 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -2429,6 +2429,11 @@ def test_scanpipe_discovered_package_model_compliance_alert(self): # Reset the index value scanpipe_app.license_policies_index = None + def test_scanpipe_discovered_package_model_spdx_id(self): + package1 = make_package(self.project1, "pkg:type/a") + expected = f"SPDXRef-scancodeio-discoveredpackage-{package1.uuid}" + self.assertEqual(expected, package1.spdx_id) + def test_scanpipe_model_create_user_creates_auth_token(self): basic_user = User.objects.create_user(username="basic_user") self.assertTrue(basic_user.auth_token.key) @@ -2492,7 +2497,7 @@ def test_scanpipe_discovered_dependency_model_many_to_many(self): self.assertEqual([], list(c.declared_dependencies.all())) self.assertEqual([b_c], list(c.resolved_from_dependencies.all())) - def test_scanpipe_discovered_dependency_model_is_vulnerable_property(self): + def test_scanpipe_discovered_package_model_is_vulnerable_property(self): package = DiscoveredPackage.create_from_data(self.project1, package_data1) self.assertFalse(package.is_vulnerable) package.update( @@ -2500,6 +2505,11 @@ def test_scanpipe_discovered_dependency_model_is_vulnerable_property(self): ) self.assertTrue(package.is_vulnerable) + def test_scanpipe_discovered_dependency_model_spdx_id(self): + dependency1 = make_dependency(self.project1) + expected = f"SPDXRef-scancodeio-discovereddependency-{dependency1.uuid}" + self.assertEqual(expected, dependency1.spdx_id) + def test_scanpipe_package_model_integrity_with_toolkit_package_model(self): scanpipe_only_fields = [ "id", From b7ad2451332d912433d528e0ffe573a8f7d98fa1 Mon Sep 17 00:00:00 2001 From: tdruez Date: Wed, 16 Apr 2025 16:57:56 +0800 Subject: [PATCH 3/3] Add changelog entry #1651 Signed-off-by: tdruez --- CHANGELOG.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4406dba100..9935d14adf 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog ========= +v34.10.2 (unreleased) +--------------------- + +- Add a ``UUID`` field on the DiscoveredDependency model. + Use the UUID for the DiscoveredDependency spdx_id for better SPDX compatibility. + https://github.com/aboutcode-org/scancode.io/issues/1651 + v34.10.1 (2025-03-26) ---------------------