Skip to content

Commit 978b347

Browse files
committed
Fixes
1 parent 8704c93 commit 978b347

11 files changed

Lines changed: 215 additions & 122 deletions

File tree

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
5151
- name: Update latest tag
5252
if: ${{ steps.release_info.outputs.tag == 'latest' }}
53-
uses: EndBug/latest-tag@fabb56bc8d15d5937c76719060da2226f5c3ffa8
53+
uses: EndBug/latest-tag@fabb56bc8d15d5937c76719060da2226f5c3ffa8
5454
with:
5555
ref: latest
5656
description: Last state in main

dfetch/manifest/manifest.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ class ManifestDict(TypedDict, total=True): # pylint: disable=too-many-ancestors
9797

9898
version: int | str
9999
remotes: NotRequired[Sequence[RemoteDict | Remote]]
100-
projects: Sequence[ProjectEntryDict | ProjectEntry | dict[str, str | list[str]]]
100+
projects: Sequence[
101+
ProjectEntryDict | ProjectEntry | dict[str, str | list[str] | dict[str, str]]
102+
]
101103

102104

103105
class Manifest:
@@ -140,14 +142,16 @@ def __init__(
140142
def _init_projects(
141143
self,
142144
projects: Sequence[
143-
ProjectEntryDict | ProjectEntry | dict[str, str | list[str]]
145+
ProjectEntryDict
146+
| ProjectEntry
147+
| dict[str, str | list[str] | dict[str, str]]
144148
],
145149
) -> dict[str, ProjectEntry]:
146150
"""Iterate over projects from manifest and initialize ProjectEntries from it.
147151
148152
Args:
149153
projects (Sequence[
150-
Union[ProjectEntryDict, ProjectEntry, Dict[str, Union[str, list[str]]]]
154+
Union[ProjectEntryDict, ProjectEntry, Dict[str, Union[str, list[str], dict[str, str]]]]
151155
]): Iterable with projects
152156
153157
Raises:
@@ -304,9 +308,11 @@ def _as_dict(self) -> dict[str, ManifestDict]:
304308
if len(remotes) == 1:
305309
remotes[0].pop("default", None)
306310

307-
projects: list[dict[str, str | list[str]]] = []
311+
projects: list[dict[str, str | list[str] | dict[str, str]]] = []
308312
for project in self.projects:
309-
project_yaml: dict[str, str | list[str]] = project.as_yaml()
313+
project_yaml: dict[str, str | list[str] | dict[str, str]] = (
314+
project.as_yaml()
315+
)
310316
if len(remotes) == 1:
311317
project_yaml.pop("remote", None)
312318
projects.append(project_yaml)

dfetch/manifest/project.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def as_yaml(self) -> dict[str, str]:
370370
"repo-path": str,
371371
"vcs": str,
372372
"ignore": Sequence[str],
373-
"integrity": dict,
373+
"integrity": dict[str, str],
374374
"default_remote": str,
375375
},
376376
total=False,
@@ -398,7 +398,7 @@ def __init__(self, kwargs: ProjectEntryDict) -> None:
398398
self._tag: str = kwargs.get("tag", "")
399399
self._vcs: str = kwargs.get("vcs", "")
400400
self._ignore: Sequence[str] = kwargs.get("ignore", [])
401-
integrity_data: dict = kwargs.get("integrity", {})
401+
integrity_data: dict[str, str] = kwargs.get("integrity", {})
402402
self._integrity = Integrity(hash=integrity_data.get("hash", ""))
403403

404404
if not self._remote and not self._url:
@@ -407,7 +407,7 @@ def __init__(self, kwargs: ProjectEntryDict) -> None:
407407
@classmethod
408408
def from_yaml(
409409
cls,
410-
yamldata: dict[str, str | list[str]] | ProjectEntryDict,
410+
yamldata: dict[str, str | list[str] | dict[str, str]] | ProjectEntryDict,
411411
default_remote: str = "",
412412
) -> "ProjectEntry":
413413
"""Create a Project Entry from yaml data.

dfetch/project/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
from dfetch.project.svnsuperproject import SvnSuperProject
1616
from dfetch.util.util import resolve_absolute_path
1717

18-
SUPPORTED_SUBPROJECT_TYPES = [ArchiveSubProject, GitSubProject, SvnSubProject]
18+
SUPPORTED_SUBPROJECT_TYPES: list[
19+
type[ArchiveSubProject] | type[GitSubProject] | type[SvnSubProject]
20+
] = [ArchiveSubProject, GitSubProject, SvnSubProject]
1921
SUPPORTED_SUPERPROJECT_TYPES = [GitSuperProject, SvnSuperProject]
2022

2123
logger = get_logger(__name__)

dfetch/project/archivesubproject.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,22 @@
4444
import os
4545
import pathlib
4646
import tempfile
47+
import urllib.request as _ur
4748

4849
from dfetch.log import get_logger
4950
from dfetch.manifest.project import ProjectEntry
5051
from dfetch.manifest.version import Version
5152
from dfetch.project.subproject import SubProject
53+
from dfetch.vcs.archive import (
54+
_safe_compare_hex, # private helper, intentionally imported for internal use
55+
)
56+
from dfetch.vcs.archive import (
57+
_suffix_for_url, # private helper, intentionally imported for internal use
58+
)
5259
from dfetch.vcs.archive import (
5360
SUPPORTED_HASH_ALGORITHMS,
5461
ArchiveLocalRepo,
5562
ArchiveRemote,
56-
_safe_compare_hex, # private helper, intentionally imported for internal use
57-
_suffix_for_url, # private helper, intentionally imported for internal use
5863
compute_hash,
5964
is_archive_url,
6065
)
@@ -94,8 +99,6 @@ def revision_is_enough() -> bool:
9499
@staticmethod
95100
def list_tool_info() -> None:
96101
"""Log information about the archive fetching tool (Python's urllib)."""
97-
import urllib.request as _ur # noqa: PLC0415
98-
99102
SubProject._log_tool("urllib", _ur.__doc__ or "built-in")
100103

101104
def get_default_branch(self) -> str:

dfetch/project/subproject.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,9 @@ def freeze_project(self, project: ProjectEntry) -> str | None:
409409
return None
410410
if on_disk_version:
411411
project.version = on_disk_version
412-
return on_disk_version.revision or on_disk_version.tag or str(on_disk_version)
412+
return (
413+
on_disk_version.revision or on_disk_version.tag or str(on_disk_version)
414+
)
413415
return None
414416

415417
@staticmethod

dfetch/reporting/sbom_reporter.py

Lines changed: 51 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,14 @@
107107
from cyclonedx.model.license import LicenseAcknowledgement
108108
from cyclonedx.output import make_outputter
109109
from cyclonedx.schema import OutputFormat, SchemaVersion
110+
from packageurl import PackageURL
110111

111112
import dfetch.util.purl
112-
from dfetch.util.purl import DFETCH_TO_CDX_HASH_ALGORITHM
113113
from dfetch.manifest.manifest import Manifest
114114
from dfetch.manifest.project import ProjectEntry
115115
from dfetch.reporting.reporter import Reporter
116116
from dfetch.util.license import License
117+
from dfetch.util.purl import DFETCH_TO_CDX_HASH_ALGORITHM
117118

118119
# PyRight is pedantic with decorators see https://github.com/madpah/serializable/issues/8
119120
# It might be fixable with https://github.com/microsoft/pyright/discussions/4426, would prefer
@@ -190,11 +191,8 @@ def add_project(
190191
purl = dfetch.util.purl.remote_url_to_purl(
191192
project.remote_url, version=version, subpath=project.source or None
192193
)
193-
194194
name = project.name if purl.type == "generic" else purl.name
195-
196195
location = self.manifest.find_name_in_manifest(project.name)
197-
198196
component = Component(
199197
name=name,
200198
version=version,
@@ -250,7 +248,15 @@ def add_project(
250248
],
251249
),
252250
)
251+
self._apply_external_references(component, purl, version)
252+
self._apply_licenses(component, licenses)
253+
self._bom.components.add(component)
253254

255+
@staticmethod
256+
def _apply_external_references(
257+
component: Component, purl: PackageURL, version: str
258+
) -> None:
259+
"""Attach external references to *component* based on its PURL type."""
254260
if purl.type == "github":
255261
component.external_references.add(
256262
ExternalReference(
@@ -266,53 +272,62 @@ def add_project(
266272
)
267273
)
268274
elif purl.qualifiers.get("download_url"):
269-
# Archive dependency: add a DISTRIBUTION external reference and,
270-
# when the version encodes a cryptographic hash, record it on the component.
271-
download_url = purl.qualifiers["download_url"]
272-
component.group = purl.namespace or None # type: ignore[assignment]
275+
SbomReporter._apply_archive_refs(component, purl, version)
276+
else:
277+
SbomReporter._apply_vcs_refs(component, purl)
278+
279+
@staticmethod
280+
def _apply_archive_refs(
281+
component: Component, purl: PackageURL, version: str
282+
) -> None:
283+
"""Add DISTRIBUTION reference and optional hash for an archive dependency."""
284+
download_url = purl.qualifiers["download_url"]
285+
component.group = purl.namespace or None # type: ignore[assignment]
286+
component.external_references.add(
287+
ExternalReference(
288+
type=ExternalReferenceType.DISTRIBUTION,
289+
url=XsUri(download_url),
290+
)
291+
)
292+
if version and ":" in version:
293+
algo_prefix, hex_value = version.split(":", 1)
294+
cdx_algo_name = DFETCH_TO_CDX_HASH_ALGORITHM.get(algo_prefix)
295+
if cdx_algo_name:
296+
component.hashes.add(
297+
HashType(
298+
alg=HashAlgorithm(cdx_algo_name),
299+
content=hex_value,
300+
)
301+
)
302+
303+
@staticmethod
304+
def _apply_vcs_refs(component: Component, purl: PackageURL) -> None:
305+
"""Add VCS external reference and group for a generic VCS dependency."""
306+
component.group = purl.namespace
307+
vcs_url = purl.qualifiers.get("vcs_url", "")
308+
# ExternalReferenceType.VCS does not support ssh:// urls
309+
if vcs_url and "ssh://" not in vcs_url:
273310
component.external_references.add(
274311
ExternalReference(
275-
type=ExternalReferenceType.DISTRIBUTION,
276-
url=XsUri(download_url),
312+
type=ExternalReferenceType.VCS,
313+
url=XsUri(vcs_url),
277314
)
278315
)
279-
if version and ":" in version:
280-
algo_prefix, hex_value = version.split(":", 1)
281-
cdx_algo_name = DFETCH_TO_CDX_HASH_ALGORITHM.get(algo_prefix)
282-
if cdx_algo_name:
283-
component.hashes.add(
284-
HashType(
285-
alg=HashAlgorithm(cdx_algo_name),
286-
content=hex_value,
287-
)
288-
)
289-
else:
290-
component.group = purl.namespace
291-
292-
vcs_url = purl.qualifiers.get("vcs_url", "")
293-
# ExternalReferenceType.VCS does not support ssh:// urls
294-
if vcs_url and "ssh://" not in vcs_url:
295-
component.external_references.add(
296-
ExternalReference(
297-
type=ExternalReferenceType.VCS,
298-
url=XsUri(vcs_url),
299-
)
300-
)
301316

317+
@staticmethod
318+
def _apply_licenses(component: Component, licenses: list[License]) -> None:
319+
"""Attach *licenses* to *component* and its evidence block."""
302320
for lic in licenses:
303-
# License wants either an SPDX id or a name, prefer SPDX id when available
321+
# Prefer SPDX id when available
304322
cdx_license = (
305323
CycloneDxLicense(id=lic.spdx_id)
306324
if lic.spdx_id
307325
else CycloneDxLicense(name=lic.name)
308326
)
309-
310327
component.licenses.add(cdx_license)
311328
if component.evidence:
312329
component.evidence.licenses.add(cdx_license)
313330

314-
self._bom.components.add(component)
315-
316331
def dump_to_file(self, outfile: str) -> bool:
317332
"""Dump the SBoM to file."""
318333
output_format = OutputFormat(

dfetch/util/purl.py

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -101,59 +101,67 @@ def _known_purl_types(
101101
return None
102102

103103

104-
def remote_url_to_purl(
105-
remote_url: str, version: str | None = None, subpath: str | None = None
104+
def _archive_purl(
105+
remote_url: str, version: str | None, subpath: str | None
106106
) -> PackageURL:
107-
"""Convert a remote URL to a valid PackageURL object.
107+
"""Build a generic PURL for an archive URL."""
108+
parsed = urlparse(remote_url)
109+
basename = os.path.basename(parsed.path)
110+
name = _strip_archive_extension(basename) or DEFAULT_NAME
111+
namespace = parsed.hostname or ""
112+
return PackageURL(
113+
type="generic",
114+
namespace=namespace or None,
115+
name=name,
116+
version=version,
117+
qualifiers={"download_url": remote_url},
118+
subpath=subpath,
119+
)
108120

109-
Supports GitHub, Bitbucket, SVN, SSH paths, and archive downloads.
110-
Optionally specify version and subpath.
111-
"""
112-
purl = _known_purl_types(remote_url, version, subpath)
113-
if purl:
114-
return purl
115121

116-
# Archive URLs (tar.gz, zip, …) get a generic PURL with a download_url qualifier.
117-
# The name is derived from the archive filename (extension stripped); the
118-
# namespace is the hostname (empty for file:// URLs).
119-
if _is_archive_url(remote_url):
120-
parsed = urlparse(remote_url)
121-
basename = os.path.basename(parsed.path)
122-
name = _strip_archive_extension(basename) or DEFAULT_NAME
123-
namespace = parsed.hostname or ""
124-
return PackageURL(
125-
type="generic",
126-
namespace=namespace or None,
127-
name=name,
128-
version=version,
129-
qualifiers={"download_url": remote_url},
130-
subpath=subpath,
131-
)
122+
def _vcs_namespace_and_name(remote_url: str) -> tuple[str, str, str]:
123+
"""Derive namespace, name, and normalised URL for a generic VCS remote URL.
132124
125+
Returns:
126+
A ``(namespace, name, remote_url)`` tuple where *remote_url* may have
127+
been normalised (e.g. SSH short-form converted to ``ssh://`` scheme).
128+
"""
133129
parsed = urlparse(remote_url)
134130
path = parsed.path.lstrip("/")
135-
136131
if "svn" in parsed.scheme or "svn." in parsed.netloc:
137132
namespace, name = _namespace_and_name_from_domain_and_path(parsed.netloc, path)
138133
if namespace.startswith("p/"):
139134
namespace = namespace[len("p/") :]
140135
namespace = namespace.replace("/svn/", "/")
141-
142136
else:
143137
match = SSH_REGEX.match(remote_url)
144138
if match:
145139
namespace, name = _namespace_and_name_from_domain_and_path(
146-
match.group("host"),
147-
match.group("path"),
140+
match.group("host"), match.group("path")
148141
)
149-
150142
if not parsed.scheme:
151143
remote_url = f"ssh://{parsed.path.replace(':', '/')}"
152144
else:
153145
namespace, name = _namespace_and_name_from_domain_and_path(
154146
remote_url, path.replace(".git", "")
155147
)
148+
return namespace, name, remote_url
149+
156150

151+
def remote_url_to_purl(
152+
remote_url: str, version: str | None = None, subpath: str | None = None
153+
) -> PackageURL:
154+
"""Convert a remote URL to a valid PackageURL object.
155+
156+
Supports GitHub, Bitbucket, SVN, SSH paths, and archive downloads.
157+
Optionally specify version and subpath.
158+
"""
159+
purl = _known_purl_types(remote_url, version, subpath)
160+
if purl:
161+
return purl
162+
if _is_archive_url(remote_url):
163+
return _archive_purl(remote_url, version, subpath)
164+
namespace, name, remote_url = _vcs_namespace_and_name(remote_url)
157165
return PackageURL(
158166
type="generic",
159167
namespace=namespace,

0 commit comments

Comments
 (0)