Skip to content

Commit d3aee83

Browse files
committed
Merge main and fix conflicts
Signed-off-by: tdruez <tdruez@nexb.com>
2 parents 1df8116 + 31a41c3 commit d3aee83

102 files changed

Lines changed: 1732 additions & 691 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CHANGELOG.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,39 @@ Release notes
120120
simplicity, and readability.
121121
https://github.com/aboutcode-org/dejacode/issues/241
122122

123+
- Refine the way the PURL fragments are handled in searches.
124+
https://github.com/aboutcode-org/dejacode/issues/286
125+
126+
- Fix an issue with ``urlize_target_blank`` when the URL contains curly braces.
127+
128+
- Add the ability to download Product "Imports" input file.
129+
https://github.com/aboutcode-org/dejacode/issues/156
130+
131+
- Fix a logic issue in the ``ImportPackageFromScanCodeIO.import_package`` that occurs when
132+
multiple packages with the same PURL, but different download_url or filename,
133+
are present in the Dataspace.
134+
https://github.com/aboutcode-org/dejacode/issues/295
135+
136+
- Fix a logic issue in the ``ImportPackageFromScanCodeIO.import_dependencies`` to
137+
prevent the creation of duplicated "resolved" dependencies.
138+
https://github.com/aboutcode-org/dejacode/issues/297
139+
140+
- Display the filename/download_url in the Inventory tab.
141+
https://github.com/aboutcode-org/dejacode/issues/303
142+
143+
- Improve exception support in improve_packages_from_purldb task.
144+
In case of an exception, the error is properly logged on the Import instance.
145+
https://github.com/aboutcode-org/dejacode/issues/303
146+
147+
- Refine the ``update_from_purldb`` function to avoid any IntegrityError.
148+
Also, when multiple entries are returned from the PurlDB, only the common values are
149+
merged and kept for the data update.
150+
https://github.com/aboutcode-org/dejacode/issues/303
151+
152+
- Add a new "Package Set" tab to the Package details view.
153+
This tab displays related packages grouped by their normalized ("plain") Package URL.
154+
https://github.com/aboutcode-org/dejacode/issues/276
155+
123156
### Version 5.2.1
124157

125158
- Fix the models documentation navigation.

component_catalog/admin.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
from dje.templatetags.dje_tags import urlize_target_blank
7272
from dje.utils import CHANGELIST_LINK_TEMPLATE
7373
from dje.utils import get_instance_from_referer
74+
from dje.utils import is_purl_fragment
7475
from license_library.models import License
7576
from reporting.filters import ReportingQueryListFilter
7677

@@ -774,7 +775,17 @@ class PackageAdmin(
774775
"get_dataspace",
775776
)
776777
list_display_links = ("identifier",)
777-
search_fields = ("filename", "download_url", "project")
778+
search_fields = (
779+
"type",
780+
"namespace",
781+
"name",
782+
"version",
783+
"filename",
784+
"download_url",
785+
"sha1",
786+
"md5",
787+
"project",
788+
)
778789
ordering = ("-last_modified_date",)
779790
list_filter = (
780791
("component", HierarchyRelatedLookupListFilter),
@@ -912,6 +923,7 @@ def get_queryset(self, request):
912923
return (
913924
super()
914925
.get_queryset(request)
926+
.annotate_sortable_identifier()
915927
.select_related(
916928
"usage_policy",
917929
)
@@ -938,6 +950,16 @@ def get_urls(self):
938950

939951
return urls + super().get_urls()
940952

953+
def get_search_results(self, request, queryset, search_term):
954+
"""Add searching on provided PackageURL identifier."""
955+
use_distinct = False
956+
957+
if is_purl_fragment(search_term):
958+
if results := queryset.for_package_url(search_term):
959+
return results, use_distinct
960+
961+
return super().get_search_results(request, queryset, search_term)
962+
941963
def changeform_view(self, request, object_id=None, form_url="", extra_context=None):
942964
"""
943965
Add the `show_save_and_collect_data` in the context.
@@ -1053,6 +1075,10 @@ def inferred_url(self, obj):
10531075
return urlize_target_blank(inferred_url)
10541076
return ""
10551077

1078+
@admin.display(ordering="sortable_identifier")
1079+
def identifier(self, obj):
1080+
return obj.identifier
1081+
10561082
def save_formset(self, request, form, formset, change):
10571083
"""
10581084
Update the completion_level on the related Component at the end of the saving process.

component_catalog/filters.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from dje.filters import HasRelationFilter
2323
from dje.filters import MatchOrderedSearchFilter
2424
from dje.filters import RelatedLookupListFilter
25+
from dje.utils import is_purl_fragment
2526
from dje.widgets import BootstrapSelectMultipleWidget
2627
from dje.widgets import DropDownRightWidget
2728
from dje.widgets import SortDropDownWidget
@@ -183,9 +184,9 @@ def filter(self, qs, value):
183184
if not value:
184185
return qs
185186

186-
is_purl = "/" in value
187-
if is_purl:
188-
return qs.for_package_url(value)
187+
if is_purl_fragment(value):
188+
if results := qs.for_package_url(value):
189+
return results
189190

190191
return super().filter(qs, value)
191192

component_catalog/models.py

Lines changed: 161 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,14 @@
1818
from django.core.exceptions import ValidationError
1919
from django.core.validators import EMPTY_VALUES
2020
from django.db import models
21+
from django.db.models import Case
2122
from django.db.models import CharField
2223
from django.db.models import Count
2324
from django.db.models import Exists
25+
from django.db.models import F
2426
from django.db.models import OuterRef
27+
from django.db.models import Value
28+
from django.db.models import When
2529
from django.db.models.functions import Concat
2630
from django.dispatch import receiver
2731
from django.template.defaultfilters import filesizeformat
@@ -72,6 +76,7 @@
7276
from dje.models import ReferenceNotesMixin
7377
from dje.tasks import logger as tasks_logger
7478
from dje.utils import is_purl_str
79+
from dje.utils import merge_common_non_empty_values
7580
from dje.utils import set_fields_from_object
7681
from dje.validators import generic_uri_validator
7782
from dje.validators import validate_url_segment
@@ -1650,6 +1655,65 @@ def __str__(self):
16501655
PACKAGE_URL_FIELDS = ["type", "namespace", "name", "version", "qualifiers", "subpath"]
16511656

16521657

1658+
def get_plain_package_url_expression():
1659+
"""
1660+
Return a Django expression to compute the "PLAIN" Package URL (PURL).
1661+
Return an empty string if the required `type` or `name` values are missing.
1662+
"""
1663+
plain_package_url = Concat(
1664+
Value("pkg:"),
1665+
F("type"),
1666+
Case(
1667+
When(namespace="", then=Value("")),
1668+
default=Concat(Value("/"), F("namespace")),
1669+
output_field=CharField(),
1670+
),
1671+
Value("/"),
1672+
F("name"),
1673+
Case(
1674+
When(version="", then=Value("")),
1675+
default=Concat(Value("@"), F("version")),
1676+
output_field=CharField(),
1677+
),
1678+
output_field=CharField(),
1679+
)
1680+
1681+
return Case(
1682+
When(type="", then=Value("")),
1683+
When(name="", then=Value("")),
1684+
default=plain_package_url,
1685+
output_field=CharField(),
1686+
)
1687+
1688+
1689+
def get_package_url_expression():
1690+
"""
1691+
Return a Django expression to compute the "FULL" Package URL (PURL).
1692+
Return an empty string if the required `type` or `name` values are missing.
1693+
"""
1694+
package_url = Concat(
1695+
get_plain_package_url_expression(),
1696+
Case(
1697+
When(qualifiers="", then=Value("")),
1698+
default=Concat(Value("?"), F("qualifiers")),
1699+
output_field=CharField(),
1700+
),
1701+
Case(
1702+
When(subpath="", then=Value("")),
1703+
default=Concat(Value("#"), F("subpath")),
1704+
output_field=CharField(),
1705+
),
1706+
output_field=CharField(),
1707+
)
1708+
1709+
return Case(
1710+
When(type="", then=Value("")),
1711+
When(name="", then=Value("")),
1712+
default=package_url,
1713+
output_field=CharField(),
1714+
)
1715+
1716+
16531717
class PackageQuerySet(PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, DataspacedQuerySet):
16541718
def has_package_url(self):
16551719
"""Return objects with Package URL defined."""
@@ -1665,6 +1729,26 @@ def annotate_sortable_identifier(self):
16651729
sortable_identifier=Concat(*PACKAGE_URL_FIELDS, "filename", output_field=CharField())
16661730
)
16671731

1732+
def annotate_plain_package_url(self):
1733+
"""
1734+
Annotate the QuerySet with a computed 'plain' Package URL (PURL).
1735+
1736+
This plain PURL is a simplified version that includes only the core fields:
1737+
`type`, `namespace`, `name`, and `version`. It omits any qualifiers or
1738+
subpath components, providing a normalized and minimal representation
1739+
of the Package URL.
1740+
"""
1741+
return self.annotate(plain_purl=get_plain_package_url_expression())
1742+
1743+
def annotate_package_url(self):
1744+
"""
1745+
Annotate the QuerySet with a fully-computed Package URL (PURL).
1746+
1747+
This includes the core PURL fields (`type`, `namespace`, `name`, `version`)
1748+
as well as any qualifiers and subpath components.
1749+
"""
1750+
return self.annotate(purl=get_package_url_expression())
1751+
16681752
def only_rendering_fields(self):
16691753
"""Minimum requirements to render a Package element in the UI."""
16701754
return self.only(
@@ -2454,6 +2538,7 @@ def get_purldb_entries(self, user, max_request_call=0, timeout=10):
24542538
is nothing was found.
24552539
"""
24562540
payloads = []
2541+
purldb_entries = []
24572542

24582543
package_url = self.package_url
24592544
if package_url:
@@ -2468,24 +2553,69 @@ def get_purldb_entries(self, user, max_request_call=0, timeout=10):
24682553
if max_request_call and index >= max_request_call:
24692554
return
24702555

2471-
if packages_data := purldb.find_packages(payload, timeout):
2472-
return packages_data
2556+
if purldb_entries := purldb.find_packages(payload, timeout):
2557+
break
2558+
2559+
if not purldb_entries:
2560+
return []
2561+
2562+
# Cleanup the PurlDB entries:
2563+
# - Packages with different PURL are excluded.
2564+
if package_url:
2565+
purldb_entries = [entry for entry in purldb_entries if entry.get("purl") == package_url]
2566+
2567+
return purldb_entries
24732568

24742569
def update_from_purldb(self, user):
24752570
"""
2476-
Find this Package in the PurlDB and update empty fields with PurlDB data
2477-
when available.
2571+
Update this Package instance with data from PurlDB.
2572+
2573+
- Retrieves matching entries from PurlDB using the given user.
2574+
- If exactly one match is found, its data is used directly.
2575+
- If multiple entries are found, only values that are non-empty and
2576+
common across all entries are merged and used to update the Package.
24782577
"""
24792578
purldb_entries = self.get_purldb_entries(user)
24802579
if not purldb_entries:
24812580
return
24822581

2483-
package_data = purldb_entries[0]
2582+
purldb_entries_count = len(purldb_entries)
2583+
if purldb_entries_count == 1:
2584+
package_data = purldb_entries[0]
2585+
else:
2586+
package_data = merge_common_non_empty_values(purldb_entries)
2587+
24842588
# The format from PURLDB is "2019-11-18T00:00:00Z"
24852589
if release_date := package_data.get("release_date"):
24862590
package_data["release_date"] = release_date.split("T")[0]
24872591
package_data["license_expression"] = package_data.get("declared_license_expression")
24882592

2593+
# Avoid raising an IntegrityError when the values in `package_data` for the
2594+
# identifier fields already exist on another Package instance.
2595+
#
2596+
# This situation can occur when a complete package (with both `purl` and
2597+
# `download_url`) already exists in the Dataspace, and `update_from_purldb` is
2598+
# called on a different package that has the same `purl` but no `download_url`.
2599+
#
2600+
# If we try to assign the same `download_url` to the second package, it would
2601+
# violate the unique constraints defined in the Package model (since the
2602+
# combination of fields must be unique).
2603+
unique_filters_lookups = {
2604+
field_name: package_data.get(field_name, "")
2605+
for field_name in self.get_identifier_fields()
2606+
}
2607+
unique_filters_qs = (
2608+
Package.objects.scope(self.dataspace)
2609+
.filter(**unique_filters_lookups)
2610+
.exclude(pk=self.pk)
2611+
)
2612+
if unique_filters_qs.exists():
2613+
# Remove the problematic "identifier_fields" values and the checksum values
2614+
hash_field_names = [field.name for field in HashFieldsMixin._meta.fields]
2615+
identifier_fields = self.get_identifier_fields()
2616+
for field_name in [*hash_field_names, *identifier_fields]:
2617+
package_data.pop(field_name, None)
2618+
24892619
updated_fields = self.update_from_data(
24902620
user,
24912621
package_data,
@@ -2508,6 +2638,32 @@ def update_from_scan(self, user):
25082638
updated_fields = scancodeio.update_from_scan(package=self, user=user)
25092639
return updated_fields
25102640

2641+
def get_related_packages_qs(self):
2642+
"""
2643+
Return a QuerySet of packages that are considered part of the same
2644+
"Package Set".
2645+
2646+
A "Package Set" consists of all packages that share the same "plain"
2647+
Package URL (PURL), meaning they have identical values for the following PURL
2648+
components:
2649+
`type`, `namespace`, `name`, and `version`.
2650+
The `qualifiers` and `subpath` components are ignored for this comparison.
2651+
"""
2652+
plain_package_url = self.plain_package_url
2653+
if not plain_package_url:
2654+
return None
2655+
2656+
return (
2657+
self.__class__.objects.scope(self.dataspace)
2658+
.for_package_url(plain_package_url, exact_match=True)
2659+
.order_by(
2660+
*PACKAGE_URL_FIELDS,
2661+
"filename",
2662+
"download_url",
2663+
)
2664+
.distinct()
2665+
)
2666+
25112667

25122668
class PackageAssignedLicense(DataspacedModel):
25132669
package = models.ForeignKey(
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<div title="{{ package.download_url }}"{% if not package.filename %} class="text-truncate"{% endif %}>
2+
{% if package.download_url %}
3+
<a href="{{ package.download_url }}">
4+
{% if display_icons %}
5+
<i class="fa-solid fa-download me-1"></i>
6+
{% endif %}
7+
{% if package.filename %}
8+
{{ package.filename }}
9+
{% else %}
10+
{{ package.download_url|truncatechars:40 }}
11+
{% endif %}
12+
</a>
13+
{% elif package.filename %}
14+
{% if display_icons %}
15+
<i class="fa-solid fa-file me-1"></i>
16+
{% endif %}
17+
{{ package.filename }}
18+
{% endif %}
19+
</div>

component_catalog/templates/component_catalog/tables/package_list_table.html

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
{% load i18n %}
22
{% load inject_preserved_filters from dje_tags %}
3-
{% load urlize_target_blank from dje_tags %}
43
{% load naturaltime_short from dje_tags %}
54
<table id="object-list-table" class="table table-bordered table-striped table-md table-fixed-layout text-break packages-table">
65
{% if form or add_to_component_form %}
@@ -58,12 +57,8 @@
5857
<td>
5958
{{ object.primary_language }}
6059
</td>
61-
<td title="{{ object.download_url }}"{% if not object.filename %} class="text-truncate"{% endif %}>
62-
{% if object.download_url %}
63-
<a href="{{ object.download_url }}">
64-
{% if object.filename %}{{ object.filename }}{% else %}{{ object.download_url }}{% endif %}
65-
</a>
66-
{% endif %}
60+
<td>
61+
{% include 'component_catalog/includes/package_filename_as_link.html' with package=object %}
6762
</td>
6863
<td>
6964
{% with components=object.component_set.all %}

0 commit comments

Comments
 (0)