Skip to content

Commit 51ccd1f

Browse files
committed
Avoid the IntegrityError for duplicated identifier fields values #303
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 212738a commit 51ccd1f

2 files changed

Lines changed: 49 additions & 15 deletions

File tree

component_catalog/models.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from django.core.exceptions import ObjectDoesNotExist
1818
from django.core.exceptions import ValidationError
1919
from django.core.validators import EMPTY_VALUES
20-
from django.db import IntegrityError
2120
from django.db import models
2221
from django.db.models import CharField
2322
from django.db.models import Count
@@ -2505,16 +2504,42 @@ def update_from_purldb(self, user):
25052504
package_data["release_date"] = release_date.split("T")[0]
25062505
package_data["license_expression"] = package_data.get("declared_license_expression")
25072506

2508-
try:
2509-
updated_fields = self.update_from_data(
2510-
user,
2511-
package_data,
2512-
override=False,
2513-
override_unknown=True,
2514-
)
2515-
except IntegrityError as e:
2516-
logger.error(f"[update_from_purldb] Skipping {self} due to IntegrityError: {e}")
2517-
return []
2507+
# Avoid raising an IntegrityError when the values in `package_data` for the
2508+
# identifier fields already exist on another Package instance.
2509+
#
2510+
# This situation can occur when a complete package (with both `purl` and
2511+
# `download_url`) already exists in the Dataspace, and `update_from_purldb` is
2512+
# called on a different package that has the same `purl` but no `download_url`.
2513+
#
2514+
# If we try to assign the same `download_url` to the second package, it would
2515+
# violate the unique constraints defined in the Package model (since the
2516+
# combination of fields must be unique).
2517+
unique_filters_lookups = {
2518+
field_name: package_data.get(field_name, "")
2519+
for field_name in self.get_identifier_fields()
2520+
}
2521+
unique_filters_qs = (
2522+
Package.objects.scope(self.dataspace)
2523+
.filter(**unique_filters_lookups)
2524+
.exclude(pk=self.pk)
2525+
)
2526+
if unique_filters_qs.exists():
2527+
# Remove the problematic "identifier_fields" values and the checksum values
2528+
hash_field_names = [field.name for field in HashFieldsMixin._meta.fields]
2529+
identifier_fields = self.get_identifier_fields()
2530+
for field_name in [*hash_field_names, *identifier_fields]:
2531+
package_data.pop(field_name, None)
2532+
2533+
# try:
2534+
updated_fields = self.update_from_data(
2535+
user,
2536+
package_data,
2537+
override=False,
2538+
override_unknown=True,
2539+
)
2540+
# except IntegrityError as e:
2541+
# logger.error(f"[update_from_purldb] Skipping {self} due to IntegrityError: {e}")
2542+
# return []
25182543

25192544
return updated_fields
25202545

component_catalog/tests/test_models.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2670,16 +2670,25 @@ def test_package_model_update_from_purldb_duplicate_exception(self, mock_get_pur
26702670
"name": "django",
26712671
"version": "3.0",
26722672
"download_url": download_url,
2673+
"description": "This value will be updated",
2674+
"md5": "This value is skipped",
2675+
"sha1": "This value is skipped",
26732676
}
2674-
26752677
mock_get_purldb_entries.return_value = [purldb_entry]
2678+
2679+
# 2 packages with the same "pkg:pypi/django@3.0" PURL:
2680+
# - 1 with a `download_url` value
2681+
# - 1 without a `download_url` value
26762682
make_package(self.dataspace, package_url=package_url, download_url=download_url)
26772683
package_no_download_url = make_package(self.dataspace, package_url=package_url)
26782684

2679-
# Updating the package with the download_url form purldb_entry would violates the
2680-
# unique constraint. This is handle properly by update_from_purldb.
2685+
# Updating the package with the `download_url` from the purldb_entry data
2686+
# would violates the unique constraint.
2687+
# This is handle properly by update_from_purldb.
26812688
updated_fields = package_no_download_url.update_from_purldb(self.user)
2682-
self.assertEqual([], updated_fields)
2689+
self.assertEqual(["description"], updated_fields)
2690+
package_no_download_url.refresh_from_db()
2691+
self.assertEqual(purldb_entry["description"], package_no_download_url.description)
26832692

26842693
def test_package_model_vulnerability_queryset_mixin(self):
26852694
package1 = make_package(self.dataspace, is_vulnerable=True)

0 commit comments

Comments
 (0)