Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Release 0.14.0 (unreleased)
===========================

* Use github purl, repo and version for a github release archive in SBOM (#1063)

Release 0.13.0 (released 2026-03-30)
====================================

Expand Down
1 change: 1 addition & 0 deletions dfetch/reporting/sbom_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def add_project(
else:
purl = vcs_url_to_purl(project.remote_url, version=version, subpath=subpath)
name = project.name if purl.type == "generic" else purl.name
version = purl.version or version
location = self.manifest.find_name_in_manifest(project.name)
component = Component(
name=name,
Expand Down
18 changes: 17 additions & 1 deletion dfetch/vcs/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import http.client
import os
import pathlib
import re
import shutil
import stat
import sys
Expand All @@ -47,6 +48,7 @@
copy_src_subset,
prune_files_by_pattern,
)
from dfetch.util.versions import coerce

logger = get_logger(__name__)

Expand Down Expand Up @@ -82,7 +84,21 @@ def archive_url_to_purl(
version: str | None = None,
subpath: str | None = None,
) -> PackageURL:
"""Build a generic PackageURL for an archive download URL."""
"""Build a github or generic PackageURL for an archive download URL."""
if match := re.search(
r"https://github\.com/(?P<org>[^/]+)/(?P<repo>[^/]+)/releases/download/(?P<version>[^/]+)/",
download_url,
):
prefix, current_version, _ = coerce(
match["version"],
)
return PackageURL(
type="github",
namespace=match["org"].lower(),
name=match["repo"].lower(),
version=str(current_version) if current_version else prefix,
)
Comment thread
spoorcc marked this conversation as resolved.

parsed = urllib.parse.urlparse(download_url)
basename = os.path.basename(parsed.path)
Comment on lines +88 to 103
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Anchor GitHub detection to parsed host/path to avoid false positives.

Using re.search on the full URL can incorrectly match GitHub patterns inside query parameters/fragments of non-GitHub URLs, resulting in wrong SBOM package identity.

Proposed fix
-    if match := re.search(
-        r"https://github\.com/(?P<org>[^/]+)/(?P<repo>[^/]+)/releases/download/(?P<version>[^/]+)/",
-        download_url,
-    ):
+    parsed = urllib.parse.urlparse(download_url)
+    if (
+        (parsed.hostname or "").lower() == "github.com"
+        and (
+            match := re.match(
+                r"^/(?P<org>[^/]+)/(?P<repo>[^/]+)/releases/download/(?P<version>[^/]+)/",
+                parsed.path,
+            )
+        )
+    ):
         prefix, current_version, _ = coerce(
             match["version"],
         )
         return PackageURL(
             type="github",
             namespace=match["org"].lower(),
             name=match["repo"].lower(),
             version=str(current_version) if current_version else prefix,
         )
 
-    parsed = urllib.parse.urlparse(download_url)
     basename = os.path.basename(parsed.path)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@dfetch/vcs/archive.py` around lines 88 - 103, The GitHub detection currently
runs re.search against the whole download_url which can match GitHub-like
strings in query/fragments; first parse the URL (use parsed =
urllib.parse.urlparse(download_url)) and then only run the GitHub regex against
parsed.netloc and parsed.path (or check parsed.hostname == "github.com" and
match parsed.path against
r"/(?P<org>[^/]+)/(?P<repo>[^/]+)/releases/download/(?P<version>[^/]+)/"); keep
the same logic that calls coerce(match["version"]) and returns the PackageURL
with namespace, name, and version, but derive match from the parsed.path (or
combined host+path) to avoid false positives.

name = strip_archive_extension(basename) or "unknown"
Expand Down
36 changes: 36 additions & 0 deletions features/report-sbom-archive.feature
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,39 @@ Feature: Create a CycloneDX SBOM for archive dependencies
]
}
"""

Scenario: An github release archive uses github purl and repo
Given the manifest 'dfetch.yaml'
"""
manifest:
version: '0.1'

remotes:
- name: github
url-base: https://github.com/

projects:
- name: TF-PSA-Crypto
vcs: archive
remote: github
dst: ext/TF-PSA-Crypto
repo-path: Mbed-TLS/TF-PSA-Crypto/releases/download/tf-psa-crypto-1.0.0/tf-psa-crypto-1.0.0.tar.bz2
integrity:
hash: sha256:31f0df2ca17897b5db2757cb0307dcde267292ba21ade831663d972a7a5b7d40
"""
And all projects are updated
When I run "dfetch report -t sbom"
Then the 'report.json' json file includes
"""
{
"components": [
{
"group": "mbed-tls",
"name": "tf-psa-crypto",
"purl": "pkg:github/mbed-tls/tf-psa-crypto@1.0.0",
"type": "library",
"version": "1.0.0"
}
]
}
"""
Loading