Skip to content

Commit 59d9102

Browse files
authored
feat: add package_content PurlDB field on Package model #434
Signed-off-by: tdruez <tdruez@aboutcode.org>
1 parent bcdd275 commit 59d9102

File tree

12 files changed

+185
-5
lines changed

12 files changed

+185
-5
lines changed

component_catalog/admin.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,7 @@ class PackageAdmin(
884884
"parties",
885885
"datasource_id",
886886
"file_references",
887+
"package_content",
887888
)
888889
},
889890
),

component_catalog/api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,7 @@ class PackageSerializer(
618618
required=False,
619619
scope_content_type=True,
620620
)
621+
package_content = serializers.ReadOnlyField(source="get_package_content_display")
621622
collect_data = serializers.BooleanField(
622623
write_only=True,
623624
required=False,
@@ -687,6 +688,7 @@ class Meta:
687688
"parties",
688689
"datasource_id",
689690
"file_references",
691+
"package_content",
690692
"external_references",
691693
"created_date",
692694
"last_modified_date",

component_catalog/forms.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ class Meta:
339339
"version",
340340
"qualifiers",
341341
"subpath",
342+
"package_content",
342343
"collect_data",
343344
]
344345
widgets = {
@@ -407,7 +408,7 @@ def helper(self):
407408
HTML("<hr>"),
408409
Group("description", "keywords"),
409410
Group("primary_language", "cpe"),
410-
Group("size", "release_date"),
411+
Group("package_content", "size", "release_date"),
411412
Group("dependencies", "notes"),
412413
HTML("<hr>"),
413414
Group("homepage_url", "code_view_url"),
@@ -1183,6 +1184,7 @@ class Meta:
11831184
"version",
11841185
"qualifiers",
11851186
"subpath",
1187+
"package_content",
11861188
]
11871189

11881190

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 5.2.8 on 2025-11-24 12:00
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('component_catalog', '0012_alter_component_children'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='package',
15+
name='package_content',
16+
field=models.IntegerField(blank=True, choices=[(1, 'curation'), (2, 'patch'), (3, 'source_repo'), (4, 'source_archive'), (5, 'binary'), (6, 'test'), (7, 'doc')], help_text='Content of this Package as one of: curation, patch, source_repo, source_archive, binary, test, doc', null=True),
17+
),
18+
]

component_catalog/models.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
from dejacode_toolkit import spdx
5959
from dejacode_toolkit.purldb import PurlDB
6060
from dejacode_toolkit.purldb import pick_purldb_entry
61+
from dejacode_toolkit.purldb import pick_source_package
6162
from dejacode_toolkit.scancodeio import ScanCodeIO
6263
from dje import urn
6364
from dje.copier import post_copy
@@ -1652,6 +1653,42 @@ def __str__(self):
16521653
return self.label
16531654

16541655

1656+
class PackageContentFieldMixin(models.Model):
1657+
"""
1658+
Field extracted from the `purldb.packagedb.models.Package` model.
1659+
It need to stay aligned with its upstream PurlDB implementation.
1660+
"""
1661+
1662+
class PackageContentType(models.IntegerChoices):
1663+
CURATION = 1, "curation"
1664+
PATCH = 2, "patch"
1665+
SOURCE_REPO = 3, "source_repo"
1666+
SOURCE_ARCHIVE = 4, "source_archive"
1667+
BINARY = 5, "binary"
1668+
TEST = 6, "test"
1669+
DOC = 7, "doc"
1670+
1671+
package_content = models.IntegerField(
1672+
null=True,
1673+
blank=True,
1674+
choices=PackageContentType.choices,
1675+
help_text=_(
1676+
"Content of this Package as one of: {}".format(", ".join(PackageContentType.labels))
1677+
),
1678+
)
1679+
1680+
class Meta:
1681+
abstract = True
1682+
1683+
@classmethod
1684+
def get_package_content_value_from_label(cls, label):
1685+
"""Convert a package_content string label to its integer value."""
1686+
try:
1687+
return cls.PackageContentType[label.upper()].value
1688+
except (KeyError, AttributeError):
1689+
return
1690+
1691+
16551692
PACKAGE_URL_FIELDS = ["type", "namespace", "name", "version", "qualifiers", "subpath"]
16561693

16571694

@@ -1795,6 +1832,7 @@ class Package(
17951832
URLFieldsMixin,
17961833
HashFieldsMixin,
17971834
PackageURLMixin,
1835+
PackageContentFieldMixin,
17981836
DataspacedModel,
17991837
):
18001838
filename = models.CharField(
@@ -2504,7 +2542,7 @@ def create_from_url(cls, url, user):
25042542
package_for_match = cls(download_url=download_url)
25052543
package_for_match.set_package_url(package_url)
25062544
purldb_entries = package_for_match.get_purldb_entries(user)
2507-
# Look for one ith the same exact purl in that case
2545+
# Look for one with the same exact purl in that case
25082546
if purldb_data := pick_purldb_entry(purldb_entries, purl=url):
25092547
# The format from PurlDB is "2019-11-18T00:00:00Z" from DateTimeField
25102548
if release_date := purldb_data.get("release_date"):
@@ -2597,6 +2635,8 @@ def update_from_purldb(self, user):
25972635
25982636
- Retrieves matching entries from PurlDB using the given user.
25992637
- If exactly one match is found, its data is used directly.
2638+
- If multiple entries are found, leverage the package_content value when
2639+
available to select a "source" package.
26002640
- If multiple entries are found, only values that are non-empty and
26012641
common across all entries are merged and used to update the Package.
26022642
"""
@@ -2607,6 +2647,8 @@ def update_from_purldb(self, user):
26072647
purldb_entries_count = len(purldb_entries)
26082648
if purldb_entries_count == 1:
26092649
package_data = purldb_entries[0]
2650+
elif source_package := pick_source_package(purldb_entries):
2651+
package_data = source_package
26102652
else:
26112653
package_data = merge_common_non_empty_values(purldb_entries)
26122654

@@ -2615,6 +2657,10 @@ def update_from_purldb(self, user):
26152657
package_data["release_date"] = release_date.split("T")[0]
26162658
package_data["license_expression"] = package_data.get("declared_license_expression")
26172659

2660+
if package_content := package_data.get("package_content"):
2661+
package_content_value = Package.get_package_content_value_from_label(package_content)
2662+
package_data["package_content"] = package_content_value
2663+
26182664
# Avoid raising an IntegrityError when the values in `package_data` for the
26192665
# identifier fields already exist on another Package instance.
26202666
#
@@ -2647,6 +2693,12 @@ def update_from_purldb(self, user):
26472693
override=False,
26482694
override_unknown=True,
26492695
)
2696+
2697+
if updated_fields:
2698+
msg = f"Automatically updated {', '.join(updated_fields)} from PurlDB."
2699+
logger.debug(f"PurlDB: {msg}")
2700+
History.log_change(user, self, message=msg)
2701+
26502702
return updated_fields
26512703

26522704
def update_from_scan(self, user, update_products=False):

component_catalog/tests/test_models.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1366,6 +1366,7 @@ def test_component_catalog_models_get_exclude_candidates_fields(self):
13661366
"file_references",
13671367
"other_license_expression",
13681368
"parties",
1369+
"package_content",
13691370
],
13701371
),
13711372
)
@@ -2381,6 +2382,16 @@ def test_package_model_github_repo_url(self):
23812382
p.download_url = url
23822383
self.assertEqual(expected, p.github_repo_url)
23832384

2385+
def test_package_model_get_package_content_value_from_label(self):
2386+
get_label_func = Package.get_package_content_value_from_label
2387+
self.assertIsNone(get_label_func(None))
2388+
self.assertIsNone(get_label_func(100))
2389+
self.assertIsNone(get_label_func("wrong"))
2390+
2391+
self.assertEqual(2, get_label_func("patch"))
2392+
self.assertEqual(2, get_label_func("Patch"))
2393+
self.assertEqual(2, get_label_func("PATCH"))
2394+
23842395
@mock.patch("requests.get")
23852396
def test_collect_package_data(self, mock_get):
23862397
expected_message = (
@@ -2635,6 +2646,7 @@ def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
26352646
"sha256": "0a1efde1b685a6c30999ba00902f23613cf5db864c5a1532d2edf3eda7896a37",
26362647
"copyright": "(c) Copyright",
26372648
"declared_license_expression": "(bsd-simplified AND bsd-new)",
2649+
"package_content": "source_archive",
26382650
}
26392651

26402652
mock_get_purldb_entries.return_value = [purldb_entry]
@@ -2656,12 +2668,13 @@ def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
26562668
"sha256",
26572669
"copyright",
26582670
"declared_license_expression",
2671+
"package_content",
26592672
"license_expression",
26602673
]
26612674
self.assertEqual(expected, updated_fields)
26622675

26632676
package1.refresh_from_db()
2664-
# Handle release_date separatly
2677+
# Handle release_date and package_content separatly
26652678
updated_fields.remove("release_date")
26662679
self.assertEqual(purldb_entry["release_date"], str(package1.release_date))
26672680

@@ -2700,6 +2713,42 @@ def test_package_model_update_from_purldb_multiple_entries(self, mock_get_purldb
27002713
self.assertEqual(["Keyword1", "Keyword2"], package1.keywords)
27012714
self.assertEqual("Python", package1.primary_language)
27022715

2716+
@mock.patch("component_catalog.models.Package.get_purldb_entries")
2717+
def test_package_model_update_from_purldb_multiple_entries_package_content(
2718+
self, mock_get_entries
2719+
):
2720+
purldb_entry_binary = {
2721+
"uuid": "e133e70b-8dd3-4cf1-9711-72b1f57523a0",
2722+
"purl": "pkg:pypi/boto3@1.37.26?file_name=boto3-1.37.26-py3-none-any.whl",
2723+
"type": "pypi",
2724+
"name": "boto3",
2725+
"version": "1.37.26",
2726+
"filename": "boto3-1.37.26-py3-none-any.whl",
2727+
"download_url": "https://files.pythonhosted.org/packages/boto3-1.37.26-py3-none-any.whl",
2728+
"package_content": "binary",
2729+
}
2730+
purldb_entry_source = {
2731+
"uuid": "326aa7a8-4f28-406d-89f9-c1404916925b",
2732+
"purl": "pkg:pypi/boto3@1.37.26?file_name=boto3-1.37.26.tar.gz",
2733+
"type": "pypi",
2734+
"name": "boto3",
2735+
"version": "1.37.26",
2736+
"filename": "boto3-1.37.26.tar.gz",
2737+
"download_url": "https://files.pythonhosted.org/packages/boto3-1.37.26.tar.gz",
2738+
"package_content": "source_archive",
2739+
}
2740+
2741+
mock_get_entries.return_value = [purldb_entry_binary, purldb_entry_source]
2742+
package1 = make_package(self.dataspace, package_url="pkg:pypi/boto3@1.37.26")
2743+
updated_fields = package1.update_from_purldb(self.user)
2744+
expected = ["download_url", "filename", "package_content"]
2745+
self.assertEqual(expected, sorted(updated_fields))
2746+
2747+
package1.refresh_from_db()
2748+
self.assertEqual(purldb_entry_source["download_url"], package1.download_url)
2749+
self.assertEqual(purldb_entry_source["filename"], package1.filename)
2750+
self.assertEqual("source_archive", package1.get_package_content_display())
2751+
27032752
@mock.patch("component_catalog.models.Package.get_purldb_entries")
27042753
def test_package_model_update_from_purldb_duplicate_exception(self, mock_get_purldb_entries):
27052754
package_url = "pkg:pypi/django@3.0"

component_catalog/tests/test_views.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,7 @@ def test_package_details_view_num_queries(self):
12431243
# Create a Package Set
12441244
package_url = "pkg:pypi/django@5.0"
12451245
self.package1.set_package_url(package_url)
1246+
self.package1.package_content = Package.PackageContentType.SOURCE_ARCHIVE
12461247
self.package1.save()
12471248
license_expression = "{} AND {}".format(self.license1.key, self.license2.key)
12481249
make_package(self.dataspace, package_url=package_url, license_expression=license_expression)
@@ -3389,6 +3390,7 @@ def test_component_catalog_package_add_view_initial_data(
33893390
"description": "Abbot Java GUI Test Library",
33903391
"declared_license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
33913392
"keywords": ["keyword1", "keyword2"],
3393+
"package_content": "binary",
33923394
}
33933395
mock_request_get.return_value = {
33943396
"count": 1,
@@ -3411,6 +3413,7 @@ def test_component_catalog_package_add_view_initial_data(
34113413
"description": "Abbot Java GUI Test Library",
34123414
"license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
34133415
"declared_license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
3416+
"package_content": Package.PackageContentType.BINARY,
34143417
}
34153418
self.assertEqual(expected, response.context["form"].initial)
34163419

component_catalog/views.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,6 +1140,7 @@ class PackageDetailsView(
11401140
"parties",
11411141
"datasource_id",
11421142
"file_references",
1143+
"package_content",
11431144
],
11441145
},
11451146
"components": {
@@ -1293,6 +1294,7 @@ def tab_others(self):
12931294
TabField("parties"),
12941295
TabField("datasource_id"),
12951296
TabField("file_references"),
1297+
TabField("package_content", source="get_package_content_display"),
12961298
]
12971299

12981300
fields = self.get_tab_fields(tab_fields)
@@ -1930,6 +1932,12 @@ def get_initial(self):
19301932
if purldb_entry := self.get_entry_from_purldb():
19311933
# Duplicate the declared_license_expression as the "concluded" license_expression
19321934
purldb_entry["license_expression"] = purldb_entry.get("declared_license_expression")
1935+
1936+
# Convert package_content string label to integer value
1937+
if content_label := purldb_entry.pop("package_content", None):
1938+
if content_value := Package.get_package_content_value_from_label(content_label):
1939+
purldb_entry["package_content"] = content_value
1940+
19331941
model_fields = [field.name for field in Package._meta.get_fields()]
19341942
initial_from_purldb_entry = {
19351943
field_name: value

dejacode_toolkit/purldb.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ def get_package_by_purl(self, package_url):
6161

6262
def find_packages(self, payload, timeout=None):
6363
"""Get Packages details using provided `payload` filters on the PurlDB package list."""
64+
payload.update({"sort": "package_content"})
65+
6466
response = self.request_get(self.package_api_url, params=payload, timeout=timeout)
6567
if response and response.get("count") > 0:
6668
return response.get("results")
@@ -88,3 +90,17 @@ def pick_purldb_entry(purldb_entries, purl=None):
8890
matches = [entry for entry in purldb_entries if entry.get("purl") == purl]
8991
if len(matches) == 1:
9092
return matches[0]
93+
94+
95+
def pick_source_package(purldb_entries):
96+
"""Pick a source package from a list of PurlDB entries."""
97+
if not purldb_entries:
98+
return
99+
100+
if len(purldb_entries) == 1:
101+
return purldb_entries[0]
102+
103+
for entry in purldb_entries:
104+
package_content = entry.get("package_content")
105+
if package_content and package_content.lower() == "source_archive":
106+
return entry

dje/tests/testfiles/test_dataset_cc_only.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,12 +292,13 @@
292292
"vcs_url": "",
293293
"code_view_url": "",
294294
"bug_tracking_url": "",
295+
"md5": "",
296+
"sha1": "",
295297
"sha256": "",
296298
"sha512": "",
299+
"package_content": null,
297300
"filename": "systemu-2.5.2.gem",
298301
"download_url": "https://s3.amazonaws.com/production.s3.rubygems.org/gems/systemu-2.5.2.gem",
299-
"sha1": "",
300-
"md5": "",
301302
"size": null,
302303
"release_date": null,
303304
"primary_language": "",

0 commit comments

Comments
 (0)