1818from django .core .exceptions import ValidationError
1919from django .core .validators import EMPTY_VALUES
2020from django .db import models
21+ from django .db .models import Case
2122from django .db .models import CharField
2223from django .db .models import Count
2324from django .db .models import Exists
25+ from django .db .models import F
2426from django .db .models import OuterRef
27+ from django .db .models import Value
28+ from django .db .models import When
2529from django .db .models .functions import Concat
2630from django .dispatch import receiver
2731from django .template .defaultfilters import filesizeformat
7276from dje .models import ReferenceNotesMixin
7377from dje .tasks import logger as tasks_logger
7478from dje .utils import is_purl_str
79+ from dje .utils import merge_common_non_empty_values
7580from dje .utils import set_fields_from_object
7681from dje .validators import generic_uri_validator
7782from dje .validators import validate_url_segment
@@ -1650,6 +1655,65 @@ def __str__(self):
16501655PACKAGE_URL_FIELDS = ["type" , "namespace" , "name" , "version" , "qualifiers" , "subpath" ]
16511656
16521657
1658+ def get_plain_package_url_expression ():
1659+ """
1660+ Return a Django expression to compute the "PLAIN" Package URL (PURL).
1661+ Return an empty string if the required `type` or `name` values are missing.
1662+ """
1663+ plain_package_url = Concat (
1664+ Value ("pkg:" ),
1665+ F ("type" ),
1666+ Case (
1667+ When (namespace = "" , then = Value ("" )),
1668+ default = Concat (Value ("/" ), F ("namespace" )),
1669+ output_field = CharField (),
1670+ ),
1671+ Value ("/" ),
1672+ F ("name" ),
1673+ Case (
1674+ When (version = "" , then = Value ("" )),
1675+ default = Concat (Value ("@" ), F ("version" )),
1676+ output_field = CharField (),
1677+ ),
1678+ output_field = CharField (),
1679+ )
1680+
1681+ return Case (
1682+ When (type = "" , then = Value ("" )),
1683+ When (name = "" , then = Value ("" )),
1684+ default = plain_package_url ,
1685+ output_field = CharField (),
1686+ )
1687+
1688+
1689+ def get_package_url_expression ():
1690+ """
1691+ Return a Django expression to compute the "FULL" Package URL (PURL).
1692+ Return an empty string if the required `type` or `name` values are missing.
1693+ """
1694+ package_url = Concat (
1695+ get_plain_package_url_expression (),
1696+ Case (
1697+ When (qualifiers = "" , then = Value ("" )),
1698+ default = Concat (Value ("?" ), F ("qualifiers" )),
1699+ output_field = CharField (),
1700+ ),
1701+ Case (
1702+ When (subpath = "" , then = Value ("" )),
1703+ default = Concat (Value ("#" ), F ("subpath" )),
1704+ output_field = CharField (),
1705+ ),
1706+ output_field = CharField (),
1707+ )
1708+
1709+ return Case (
1710+ When (type = "" , then = Value ("" )),
1711+ When (name = "" , then = Value ("" )),
1712+ default = package_url ,
1713+ output_field = CharField (),
1714+ )
1715+
1716+
16531717class PackageQuerySet (PackageURLQuerySetMixin , VulnerabilityQuerySetMixin , DataspacedQuerySet ):
16541718 def has_package_url (self ):
16551719 """Return objects with Package URL defined."""
@@ -1665,6 +1729,26 @@ def annotate_sortable_identifier(self):
16651729 sortable_identifier = Concat (* PACKAGE_URL_FIELDS , "filename" , output_field = CharField ())
16661730 )
16671731
1732+ def annotate_plain_package_url (self ):
1733+ """
1734+ Annotate the QuerySet with a computed 'plain' Package URL (PURL).
1735+
1736+ This plain PURL is a simplified version that includes only the core fields:
1737+ `type`, `namespace`, `name`, and `version`. It omits any qualifiers or
1738+ subpath components, providing a normalized and minimal representation
1739+ of the Package URL.
1740+ """
1741+ return self .annotate (plain_purl = get_plain_package_url_expression ())
1742+
1743+ def annotate_package_url (self ):
1744+ """
1745+ Annotate the QuerySet with a fully-computed Package URL (PURL).
1746+
1747+ This includes the core PURL fields (`type`, `namespace`, `name`, `version`)
1748+ as well as any qualifiers and subpath components.
1749+ """
1750+ return self .annotate (purl = get_package_url_expression ())
1751+
16681752 def only_rendering_fields (self ):
16691753 """Minimum requirements to render a Package element in the UI."""
16701754 return self .only (
@@ -2454,6 +2538,7 @@ def get_purldb_entries(self, user, max_request_call=0, timeout=10):
24542538 is nothing was found.
24552539 """
24562540 payloads = []
2541+ purldb_entries = []
24572542
24582543 package_url = self .package_url
24592544 if package_url :
@@ -2468,24 +2553,69 @@ def get_purldb_entries(self, user, max_request_call=0, timeout=10):
24682553 if max_request_call and index >= max_request_call :
24692554 return
24702555
2471- if packages_data := purldb .find_packages (payload , timeout ):
2472- return packages_data
2556+ if purldb_entries := purldb .find_packages (payload , timeout ):
2557+ break
2558+
2559+ if not purldb_entries :
2560+ return []
2561+
2562+ # Cleanup the PurlDB entries:
2563+ # - Packages with different PURL are excluded.
2564+ if package_url :
2565+ purldb_entries = [entry for entry in purldb_entries if entry .get ("purl" ) == package_url ]
2566+
2567+ return purldb_entries
24732568
24742569 def update_from_purldb (self , user ):
24752570 """
2476- Find this Package in the PurlDB and update empty fields with PurlDB data
2477- when available.
2571+ Update this Package instance with data from PurlDB.
2572+
2573+ - Retrieves matching entries from PurlDB using the given user.
2574+ - If exactly one match is found, its data is used directly.
2575+ - If multiple entries are found, only values that are non-empty and
2576+ common across all entries are merged and used to update the Package.
24782577 """
24792578 purldb_entries = self .get_purldb_entries (user )
24802579 if not purldb_entries :
24812580 return
24822581
2483- package_data = purldb_entries [0 ]
2582+ purldb_entries_count = len (purldb_entries )
2583+ if purldb_entries_count == 1 :
2584+ package_data = purldb_entries [0 ]
2585+ else :
2586+ package_data = merge_common_non_empty_values (purldb_entries )
2587+
24842588 # The format from PURLDB is "2019-11-18T00:00:00Z"
24852589 if release_date := package_data .get ("release_date" ):
24862590 package_data ["release_date" ] = release_date .split ("T" )[0 ]
24872591 package_data ["license_expression" ] = package_data .get ("declared_license_expression" )
24882592
2593+ # Avoid raising an IntegrityError when the values in `package_data` for the
2594+ # identifier fields already exist on another Package instance.
2595+ #
2596+ # This situation can occur when a complete package (with both `purl` and
2597+ # `download_url`) already exists in the Dataspace, and `update_from_purldb` is
2598+ # called on a different package that has the same `purl` but no `download_url`.
2599+ #
2600+ # If we try to assign the same `download_url` to the second package, it would
2601+ # violate the unique constraints defined in the Package model (since the
2602+ # combination of fields must be unique).
2603+ unique_filters_lookups = {
2604+ field_name : package_data .get (field_name , "" )
2605+ for field_name in self .get_identifier_fields ()
2606+ }
2607+ unique_filters_qs = (
2608+ Package .objects .scope (self .dataspace )
2609+ .filter (** unique_filters_lookups )
2610+ .exclude (pk = self .pk )
2611+ )
2612+ if unique_filters_qs .exists ():
2613+ # Remove the problematic "identifier_fields" values and the checksum values
2614+ hash_field_names = [field .name for field in HashFieldsMixin ._meta .fields ]
2615+ identifier_fields = self .get_identifier_fields ()
2616+ for field_name in [* hash_field_names , * identifier_fields ]:
2617+ package_data .pop (field_name , None )
2618+
24892619 updated_fields = self .update_from_data (
24902620 user ,
24912621 package_data ,
@@ -2508,6 +2638,32 @@ def update_from_scan(self, user):
25082638 updated_fields = scancodeio .update_from_scan (package = self , user = user )
25092639 return updated_fields
25102640
2641+ def get_related_packages_qs (self ):
2642+ """
2643+ Return a QuerySet of packages that are considered part of the same
2644+ "Package Set".
2645+
2646+ A "Package Set" consists of all packages that share the same "plain"
2647+ Package URL (PURL), meaning they have identical values for the following PURL
2648+ components:
2649+ `type`, `namespace`, `name`, and `version`.
2650+ The `qualifiers` and `subpath` components are ignored for this comparison.
2651+ """
2652+ plain_package_url = self .plain_package_url
2653+ if not plain_package_url :
2654+ return None
2655+
2656+ return (
2657+ self .__class__ .objects .scope (self .dataspace )
2658+ .for_package_url (plain_package_url , exact_match = True )
2659+ .order_by (
2660+ * PACKAGE_URL_FIELDS ,
2661+ "filename" ,
2662+ "download_url" ,
2663+ )
2664+ .distinct ()
2665+ )
2666+
25112667
25122668class PackageAssignedLicense (DataspacedModel ):
25132669 package = models .ForeignKey (
0 commit comments