|
17 | 17 | from django.core.exceptions import ObjectDoesNotExist |
18 | 18 | from django.core.exceptions import ValidationError |
19 | 19 | from django.core.validators import EMPTY_VALUES |
20 | | -from django.db import IntegrityError |
21 | 20 | from django.db import models |
22 | 21 | from django.db.models import CharField |
23 | 22 | from django.db.models import Count |
@@ -2505,16 +2504,42 @@ def update_from_purldb(self, user): |
2505 | 2504 | package_data["release_date"] = release_date.split("T")[0] |
2506 | 2505 | package_data["license_expression"] = package_data.get("declared_license_expression") |
2507 | 2506 |
|
2508 | | - try: |
2509 | | - updated_fields = self.update_from_data( |
2510 | | - user, |
2511 | | - package_data, |
2512 | | - override=False, |
2513 | | - override_unknown=True, |
2514 | | - ) |
2515 | | - except IntegrityError as e: |
2516 | | - logger.error(f"[update_from_purldb] Skipping {self} due to IntegrityError: {e}") |
2517 | | - return [] |
| 2507 | + # Avoid raising an IntegrityError when the values in `package_data` for the |
| 2508 | + # identifier fields already exist on another Package instance. |
| 2509 | + # |
| 2510 | + # This situation can occur when a complete package (with both `purl` and |
| 2511 | + # `download_url`) already exists in the Dataspace, and `update_from_purldb` is |
| 2512 | + # called on a different package that has the same `purl` but no `download_url`. |
| 2513 | + # |
| 2514 | + # If we try to assign the same `download_url` to the second package, it would |
| 2515 | + # violate the unique constraints defined in the Package model (since the |
| 2516 | + # combination of fields must be unique). |
| 2517 | + unique_filters_lookups = { |
| 2518 | + field_name: package_data.get(field_name, "") |
| 2519 | + for field_name in self.get_identifier_fields() |
| 2520 | + } |
| 2521 | + unique_filters_qs = ( |
| 2522 | + Package.objects.scope(self.dataspace) |
| 2523 | + .filter(**unique_filters_lookups) |
| 2524 | + .exclude(pk=self.pk) |
| 2525 | + ) |
| 2526 | + if unique_filters_qs.exists(): |
| 2527 | + # Remove the problematic "identifier_fields" values and the checksum values |
| 2528 | + hash_field_names = [field.name for field in HashFieldsMixin._meta.fields] |
| 2529 | + identifier_fields = self.get_identifier_fields() |
| 2530 | + for field_name in [*hash_field_names, *identifier_fields]: |
| 2531 | + package_data.pop(field_name, None) |
| 2532 | + |
| 2533 | + # try: |
| 2534 | + updated_fields = self.update_from_data( |
| 2535 | + user, |
| 2536 | + package_data, |
| 2537 | + override=False, |
| 2538 | + override_unknown=True, |
| 2539 | + ) |
| 2540 | + # except IntegrityError as e: |
| 2541 | + # logger.error(f"[update_from_purldb] Skipping {self} due to IntegrityError: {e}") |
| 2542 | + # return [] |
2518 | 2543 |
|
2519 | 2544 | return updated_fields |
2520 | 2545 |
|
|
0 commit comments