Skip to content

Commit f6c6063

Browse files
committed
Merge branch 'main' into 276-package_set
2 parents 26926c0 + e06fb5c commit f6c6063

20 files changed

Lines changed: 756 additions & 400 deletions

File tree

CHANGELOG.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ Release notes
137137
prevent the creation of duplicated "resolved" dependencies.
138138
https://github.com/aboutcode-org/dejacode/issues/297
139139

140+
- Display the filename/download_url in the Inventory tab.
141+
https://github.com/aboutcode-org/dejacode/issues/303
142+
143+
- Improve exception support in improve_packages_from_purldb task.
144+
In case of an exception, the error is properly logged on the Import instance.
145+
https://github.com/aboutcode-org/dejacode/issues/303
146+
147+
- Refine the ``update_from_purldb`` function to avoid any IntegrityError.
148+
Also, when multiple entries are returned from the PurlDB, only the common values are
149+
merged and kept for the data update.
150+
https://github.com/aboutcode-org/dejacode/issues/303
151+
140152
### Version 5.2.1
141153

142154
- Fix the models documentation navigation.

component_catalog/models.py

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
from dje.models import ReferenceNotesMixin
7777
from dje.tasks import logger as tasks_logger
7878
from dje.utils import is_purl_str
79+
from dje.utils import merge_common_non_empty_values
7980
from dje.utils import set_fields_from_object
8081
from dje.validators import generic_uri_validator
8182
from dje.validators import validate_url_segment
@@ -2551,6 +2552,7 @@ def get_purldb_entries(self, user, max_request_call=0, timeout=10):
25512552
is nothing was found.
25522553
"""
25532554
payloads = []
2555+
purldb_entries = []
25542556

25552557
package_url = self.package_url
25562558
if package_url:
@@ -2565,30 +2567,80 @@ def get_purldb_entries(self, user, max_request_call=0, timeout=10):
25652567
if max_request_call and index >= max_request_call:
25662568
return
25672569

2568-
if packages_data := purldb.find_packages(payload, timeout):
2569-
return packages_data
2570+
if purldb_entries := purldb.find_packages(payload, timeout):
2571+
break
2572+
2573+
if not purldb_entries:
2574+
return []
2575+
2576+
# Cleanup the PurlDB entries:
2577+
# - Packages with different PURL are excluded.
2578+
if package_url:
2579+
purldb_entries = [entry for entry in purldb_entries if entry.get("purl") == package_url]
2580+
2581+
return purldb_entries
25702582

25712583
def update_from_purldb(self, user):
25722584
"""
2573-
Find this Package in the PurlDB and update empty fields with PurlDB data
2574-
when available.
2585+
Update this Package instance with data from PurlDB.
2586+
2587+
- Retrieves matching entries from PurlDB using the given user.
2588+
- If exactly one match is found, its data is used directly.
2589+
- If multiple entries are found, only values that are non-empty and
2590+
common across all entries are merged and used to update the Package.
25752591
"""
25762592
purldb_entries = self.get_purldb_entries(user)
25772593
if not purldb_entries:
25782594
return
25792595

2580-
package_data = purldb_entries[0]
2596+
purldb_entries_count = len(purldb_entries)
2597+
if purldb_entries_count == 1:
2598+
package_data = purldb_entries[0]
2599+
else:
2600+
package_data = merge_common_non_empty_values(purldb_entries)
2601+
25812602
# The format from PURLDB is "2019-11-18T00:00:00Z"
25822603
if release_date := package_data.get("release_date"):
25832604
package_data["release_date"] = release_date.split("T")[0]
25842605
package_data["license_expression"] = package_data.get("declared_license_expression")
25852606

2607+
# Avoid raising an IntegrityError when the values in `package_data` for the
2608+
# identifier fields already exist on another Package instance.
2609+
#
2610+
# This situation can occur when a complete package (with both `purl` and
2611+
# `download_url`) already exists in the Dataspace, and `update_from_purldb` is
2612+
# called on a different package that has the same `purl` but no `download_url`.
2613+
#
2614+
# If we try to assign the same `download_url` to the second package, it would
2615+
# violate the unique constraints defined in the Package model (since the
2616+
# combination of fields must be unique).
2617+
unique_filters_lookups = {
2618+
field_name: package_data.get(field_name, "")
2619+
for field_name in self.get_identifier_fields()
2620+
}
2621+
unique_filters_qs = (
2622+
Package.objects.scope(self.dataspace)
2623+
.filter(**unique_filters_lookups)
2624+
.exclude(pk=self.pk)
2625+
)
2626+
if unique_filters_qs.exists():
2627+
# Remove the problematic "identifier_fields" values and the checksum values
2628+
hash_field_names = [field.name for field in HashFieldsMixin._meta.fields]
2629+
identifier_fields = self.get_identifier_fields()
2630+
for field_name in [*hash_field_names, *identifier_fields]:
2631+
package_data.pop(field_name, None)
2632+
2633+
# try:
25862634
updated_fields = self.update_from_data(
25872635
user,
25882636
package_data,
25892637
override=False,
25902638
override_unknown=True,
25912639
)
2640+
# except IntegrityError as e:
2641+
# logger.error(f"[update_from_purldb] Skipping {self} due to IntegrityError: {e}")
2642+
# return []
2643+
25922644
return updated_fields
25932645

25942646
def update_from_scan(self, user):
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<div title="{{ package.download_url }}"{% if not package.filename %} class="text-truncate"{% endif %}>
2+
{% if package.download_url %}
3+
<a href="{{ package.download_url }}">
4+
{% if display_icons %}
5+
<i class="fa-solid fa-download me-1"></i>
6+
{% endif %}
7+
{% if package.filename %}
8+
{{ package.filename }}
9+
{% else %}
10+
{{ package.download_url|truncatechars:40 }}
11+
{% endif %}
12+
</a>
13+
{% elif package.filename %}
14+
{% if display_icons %}
15+
<i class="fa-solid fa-file me-1"></i>
16+
{% endif %}
17+
{{ package.filename }}
18+
{% endif %}
19+
</div>

component_catalog/templates/component_catalog/tables/package_list_table.html

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,8 @@
5757
<td>
5858
{{ object.primary_language }}
5959
</td>
60-
<td title="{{ object.download_url }}"{% if not object.filename %} class="text-truncate"{% endif %}>
61-
{% if object.download_url %}
62-
<a href="{{ object.download_url }}">
63-
{% if object.filename %}{{ object.filename }}{% else %}{{ object.download_url }}{% endif %}
64-
</a>
65-
{% endif %}
60+
<td>
61+
{% include 'component_catalog/includes/package_filename_as_link.html' with package=object %}
6662
</td>
6763
<td>
6864
{% with components=object.component_set.all %}

component_catalog/tests/test_models.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2556,6 +2556,30 @@ def test_package_model_inferred_url_property(self):
25562556
expected = "https://github.com/package-url/packageurl-python/tree/v0.10.4"
25572557
self.assertEqual(expected, package1.inferred_url)
25582558

2559+
@mock.patch("dejacode_toolkit.purldb.PurlDB.find_packages")
2560+
def test_package_model_get_purldb_entries(self, mock_find_packages):
2561+
purl = "pkg:pypi/django@3.0"
2562+
package1 = make_package(self.dataspace, package_url=purl)
2563+
purldb_entry1 = {
2564+
"purl": purl,
2565+
"type": "pypi",
2566+
"name": "django",
2567+
"version": "3.0",
2568+
}
2569+
purldb_entry2 = {
2570+
"purl": "pkg:pypi/django",
2571+
"type": "pypi",
2572+
"name": "django",
2573+
}
2574+
2575+
mock_find_packages.return_value = None
2576+
purldb_entries = package1.get_purldb_entries(user=self.user)
2577+
2578+
mock_find_packages.return_value = [purldb_entry1, purldb_entry2]
2579+
purldb_entries = package1.get_purldb_entries(user=self.user)
2580+
# The purldb_entry2 is excluded as the PURL differs
2581+
self.assertEqual([purldb_entry1], purldb_entries)
2582+
25592583
@mock.patch("component_catalog.models.Package.get_purldb_entries")
25602584
def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
25612585
purldb_entry = {
@@ -2577,9 +2601,9 @@ def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
25772601
}
25782602

25792603
mock_get_purldb_entries.return_value = [purldb_entry]
2580-
package1 = Package.objects.create(
2604+
package1 = make_package(
2605+
self.dataspace,
25812606
filename="package",
2582-
dataspace=self.dataspace,
25832607
# "unknown" values are overrided
25842608
declared_license_expression="unknown",
25852609
)
@@ -2607,6 +2631,68 @@ def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
26072631
for field_name in updated_fields:
26082632
self.assertEqual(purldb_entry[field_name], getattr(package1, field_name))
26092633

2634+
@mock.patch("component_catalog.models.Package.get_purldb_entries")
2635+
def test_package_model_update_from_purldb_multiple_entries(self, mock_get_purldb_entries):
2636+
purldb_entry1 = {
2637+
"uuid": "326aa7a8-4f28-406d-89f9-c1404916925b",
2638+
"purl": "pkg:pypi/django@3.0",
2639+
"type": "pypi",
2640+
"name": "django",
2641+
"version": "3.0",
2642+
"keywords": ["Keyword1", "Keyword2"],
2643+
"filename": "Django-3.0.tar.gz",
2644+
"download_url": "https://files.pythonhosted.org/packages/38/Django-3.0.tar.gz",
2645+
}
2646+
purldb_entry2 = {
2647+
"uuid": "e133e70b-8dd3-4cf1-9711-72b1f57523a0",
2648+
"purl": "pkg:pypi/django@3.0",
2649+
"type": "pypi",
2650+
"name": "django",
2651+
"version": "3.0",
2652+
"primary_language": "Python",
2653+
"keywords": ["Keyword1", "Keyword2"],
2654+
"download_url": "https://another.url/Django-3.0.tar.gz",
2655+
}
2656+
2657+
mock_get_purldb_entries.return_value = [purldb_entry1, purldb_entry2]
2658+
package1 = make_package(self.dataspace, package_url="pkg:pypi/django@3.0")
2659+
updated_fields = package1.update_from_purldb(self.user)
2660+
expected = ["filename", "keywords", "primary_language"]
2661+
self.assertEqual(expected, sorted(updated_fields))
2662+
self.assertEqual("Django-3.0.tar.gz", package1.filename)
2663+
self.assertEqual(["Keyword1", "Keyword2"], package1.keywords)
2664+
self.assertEqual("Python", package1.primary_language)
2665+
2666+
@mock.patch("component_catalog.models.Package.get_purldb_entries")
2667+
def test_package_model_update_from_purldb_duplicate_exception(self, mock_get_purldb_entries):
2668+
package_url = "pkg:pypi/django@3.0"
2669+
download_url = "https://files.pythonhosted.org/packages/38/Django-3.0.tar.gz"
2670+
purldb_entry = {
2671+
"purl": package_url,
2672+
"type": "pypi",
2673+
"name": "django",
2674+
"version": "3.0",
2675+
"download_url": download_url,
2676+
"description": "This value will be updated",
2677+
"md5": "This value is skipped",
2678+
"sha1": "This value is skipped",
2679+
}
2680+
mock_get_purldb_entries.return_value = [purldb_entry]
2681+
2682+
# 2 packages with the same "pkg:pypi/django@3.0" PURL:
2683+
# - 1 with a `download_url` value
2684+
# - 1 without a `download_url` value
2685+
make_package(self.dataspace, package_url=package_url, download_url=download_url)
2686+
package_no_download_url = make_package(self.dataspace, package_url=package_url)
2687+
2688+
# Updating the package with the `download_url` from the purldb_entry data
2689+
# would violates the unique constraint.
2690+
# This is handle properly by update_from_purldb.
2691+
updated_fields = package_no_download_url.update_from_purldb(self.user)
2692+
self.assertEqual(["description"], updated_fields)
2693+
package_no_download_url.refresh_from_db()
2694+
self.assertEqual(purldb_entry["description"], package_no_download_url.description)
2695+
26102696
def test_package_model_vulnerability_queryset_mixin(self):
26112697
package1 = make_package(self.dataspace, is_vulnerable=True)
26122698
package2 = make_package(self.dataspace)

component_catalog/tests/test_views.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,20 +1194,20 @@ def test_package_list_view_download_column(self):
11941194
response = self.client.get(reverse("component_catalog:package_list"))
11951195

11961196
expected = f"""
1197-
<td title="{self.package1.download_url}">
1197+
<div title="{self.package1.download_url}">
11981198
<a href="{self.package1.download_url}">
11991199
{self.package1.filename}
12001200
</a>
1201-
</td>
1201+
</div>
12021202
"""
12031203
self.assertContains(response, expected, html=True)
12041204

12051205
expected = f"""
1206-
<td title="{self.package2.download_url}" class="text-truncate">
1206+
<div title="{self.package2.download_url}" class="text-truncate">
12071207
<a href="{self.package2.download_url}">
12081208
{self.package2.download_url}
12091209
</a>
1210-
</td>
1210+
</div>
12111211
"""
12121212
self.assertContains(response, expected, html=True)
12131213

component_catalog/views.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2462,9 +2462,12 @@ def get_tab_fields(self):
24622462
}
24632463
tab_fields.append(("", alert_context, None, "includes/field_alert.html"))
24642464

2465-
if len(purldb_entries) > 1:
2465+
len_purldb_entries = len(purldb_entries)
2466+
if len_purldb_entries > 1:
24662467
alert_context = {
2467-
"message": "There are multiple entries in the PurlDB for this Package.",
2468+
"message": (
2469+
f"There are {len_purldb_entries} entries in the PurlDB for this Package."
2470+
),
24682471
"full_width": True,
24692472
"alert_class": "alert-warning",
24702473
}

dejacode_toolkit/purldb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def get_package(self, uuid):
5656

5757
def get_package_by_purl(self, package_url):
5858
"""Get a Package details entry providing its `package_url`."""
59-
if results := self.find_packages({"purl": package_url}):
59+
if results := self.find_packages(payload={"purl": package_url}):
6060
return results[0]
6161

6262
def find_packages(self, payload, timeout=None):

0 commit comments

Comments
 (0)