From 36ec3fce5c07e6ba61e6149ff3b6c3585fcee2b0 Mon Sep 17 00:00:00 2001 From: Ben Date: Sat, 4 Oct 2025 18:25:17 +0000 Subject: [PATCH 01/13] Support multiple licenses per project --- CHANGELOG.rst | 1 + dfetch/commands/report.py | 31 ++++++++++++++++------------- dfetch/reporting/reporter.py | 6 +++++- dfetch/reporting/sbom_reporter.py | 11 +++++++--- dfetch/reporting/stdout_reporter.py | 11 ++++++++-- dfetch/util/license.py | 27 +++++++++++++++++++++++++ 6 files changed, 67 insertions(+), 20 deletions(-) create mode 100644 dfetch/util/license.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 303a822f..ee7cb14a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,7 @@ Release 0.11.0 (unreleased) * Use CycloneDX schema version 1.6 (#542) * Add security policy (#784) * Add provenance / release attestation to pypi package (#784) +* Support multiple licenses per project (#788) Release 0.10.0 (released 2025-03-12) ==================================== diff --git a/dfetch/commands/report.py b/dfetch/commands/report.py index cf8e67ba..c6485971 100644 --- a/dfetch/commands/report.py +++ b/dfetch/commands/report.py @@ -6,8 +6,7 @@ import argparse import glob import os - -import infer_license +from typing import List, Tuple import dfetch.commands.command import dfetch.manifest.manifest @@ -17,9 +16,12 @@ from dfetch.project.metadata import Metadata from dfetch.project.vcs import VCS from dfetch.reporting import REPORTERS, ReportTypes +from dfetch.util.license import guess_license_in_file logger = get_logger(__name__) +LICENSE_PROBABILITY_THRESHOLD = 0.80 + class Report(dfetch.commands.command.Command): """Generate reports containing information about the projects components. @@ -66,37 +68,38 @@ def __call__(self, args: argparse.Namespace) -> None: with dfetch.util.util.in_directory(os.path.dirname(path)): for project in manifest.selected_projects(args.projects): - determined_license = self._determine_license(project) + determined_licenses = self._determine_licenses(project) version = self._determine_version(project) reporter.add_project( - project=project, license_name=determined_license, version=version + project=project, license_names=determined_licenses, version=version ) if reporter.dump_to_file(args.outfile): logger.info(f"Generated {reporter.name} report: {args.outfile}") @staticmethod - def _determine_license(project: ProjectEntry) -> str: + def _determine_licenses(project: ProjectEntry) -> List[Tuple[str, float]]: """Try to determine license of fetched project.""" if not os.path.exists(project.destination): logger.print_warning_line( project.name, "Never fetched, fetch it to get license info." ) - return "" + return [] + license_files = [] with dfetch.util.util.in_directory(project.destination): + for license_file in filter(VCS.is_license_file, glob.glob("*")): logger.debug(f"Found license file {license_file} for {project.name}") - guessed_license = infer_license.api.guess_file(license_file) + guessed_license, probability = guess_license_in_file(license_file) if guessed_license: - return str(guessed_license.name) - - logger.print_warning_line( - project.name, f"Could not determine license in {license_file}" - ) - - return "" + license_files.append((str(guessed_license.name), probability)) + else: + logger.print_warning_line( + project.name, f"Could not determine license in {license_file}" + ) + return license_files @staticmethod def _determine_version(project: ProjectEntry) -> str: diff --git a/dfetch/reporting/reporter.py b/dfetch/reporting/reporter.py index 23ef3bae..ff62ab51 100644 --- a/dfetch/reporting/reporter.py +++ b/dfetch/reporting/reporter.py @@ -1,6 +1,7 @@ """Abstract reporting interface.""" from abc import ABC, abstractmethod +from typing import List, Tuple from dfetch.manifest.project import ProjectEntry @@ -12,7 +13,10 @@ class Reporter(ABC): @abstractmethod def add_project( - self, project: ProjectEntry, license_name: str, version: str + self, + project: ProjectEntry, + license_names: List[Tuple[str, float]], + version: str, ) -> None: """Add a project to the report.""" diff --git a/dfetch/reporting/sbom_reporter.py b/dfetch/reporting/sbom_reporter.py index 946f2490..66372851 100644 --- a/dfetch/reporting/sbom_reporter.py +++ b/dfetch/reporting/sbom_reporter.py @@ -15,6 +15,8 @@ An fetched project generates an sbom """ +from typing import List, Tuple + from cyclonedx.builder.this import this_component as cdx_lib_component from cyclonedx.model import ExternalReference, ExternalReferenceType, XsUri from cyclonedx.model.bom import Bom @@ -48,7 +50,10 @@ def __init__(self) -> None: self._bom.metadata.tools.components.add(cdx_lib_component()) def add_project( - self, project: ProjectEntry, license_name: str, version: str + self, + project: ProjectEntry, + license_names: List[Tuple[str, float]], + version: str, ) -> None: """Add a project to the report.""" purl = dfetch.util.purl.remote_url_to_purl( @@ -89,8 +94,8 @@ def add_project( ) ) - if license_name: - component.licenses.add(LicenseExpression(license_name)) + for name, _ in license_names: + component.licenses.add(LicenseExpression(name)) self._bom.components.add(component) def dump_to_file(self, outfile: str) -> bool: diff --git a/dfetch/reporting/stdout_reporter.py b/dfetch/reporting/stdout_reporter.py index 5da202ec..9b2f5177 100644 --- a/dfetch/reporting/stdout_reporter.py +++ b/dfetch/reporting/stdout_reporter.py @@ -4,6 +4,8 @@ from the manifest or the metadata (``.dfetch_data.yaml``). """ +from typing import List, Tuple + from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.project.metadata import Metadata @@ -18,7 +20,10 @@ class StdoutReporter(Reporter): name = "stdout" def add_project( - self, project: ProjectEntry, license_name: str, version: str + self, + project: ProjectEntry, + license_names: List[Tuple[str, float]], + version: str, ) -> None: """Add a project to the report.""" del version @@ -32,7 +37,9 @@ def add_project( logger.print_info_field(" last fetch", str(metadata.last_fetch)) logger.print_info_field(" revision", metadata.revision) logger.print_info_field(" patch", metadata.patch) - logger.print_info_field(" license", license_name) + logger.print_info_field( + " licenses", ",".join(license for license, _ in license_names) + ) except FileNotFoundError: logger.print_info_field(" last fetch", "never") diff --git a/dfetch/util/license.py b/dfetch/util/license.py new file mode 100644 index 00000000..5f35856b --- /dev/null +++ b/dfetch/util/license.py @@ -0,0 +1,27 @@ +"""*Dfetch* uses *Infer-License* to guess licenses from files.""" + +import os +from typing import Optional, Tuple, Union + +import infer_license +from infer_license.types import License + +LICENSE_PROBABILITY_THRESHOLD = 0.80 + + +def guess_license_in_file( + filename: Union[str, "os.PathLike[str]"], +) -> Tuple[Optional[License], float]: + """Guess license from file.""" + try: + with open(filename, encoding="utf-8") as f: + license_text = f.read() + except UnicodeDecodeError: + with open(filename, encoding="latin-1") as f: + license_text = f.read() + + probable_license = infer_license.api.probabilities(license_text) + if probable_license and probable_license[0][1] > LICENSE_PROBABILITY_THRESHOLD: + return probable_license[0] + + return None, 0.0 From f0babae6e8f36d1c723e1166bc7718c841046d4b Mon Sep 17 00:00:00 2001 From: Ben Date: Sat, 4 Oct 2025 19:23:19 +0000 Subject: [PATCH 02/13] wip --- dfetch/commands/report.py | 17 +++++++++------ dfetch/reporting/reporter.py | 5 +++-- dfetch/reporting/sbom_reporter.py | 9 ++++---- dfetch/reporting/stdout_reporter.py | 7 +++--- dfetch/util/license.py | 33 +++++++++++++++++++++++------ 5 files changed, 48 insertions(+), 23 deletions(-) diff --git a/dfetch/commands/report.py b/dfetch/commands/report.py index c6485971..91aecf83 100644 --- a/dfetch/commands/report.py +++ b/dfetch/commands/report.py @@ -6,7 +6,7 @@ import argparse import glob import os -from typing import List, Tuple +from typing import List import dfetch.commands.command import dfetch.manifest.manifest @@ -16,7 +16,7 @@ from dfetch.project.metadata import Metadata from dfetch.project.vcs import VCS from dfetch.reporting import REPORTERS, ReportTypes -from dfetch.util.license import guess_license_in_file +from dfetch.util.license import License, guess_license_in_file logger = get_logger(__name__) @@ -71,14 +71,14 @@ def __call__(self, args: argparse.Namespace) -> None: determined_licenses = self._determine_licenses(project) version = self._determine_version(project) reporter.add_project( - project=project, license_names=determined_licenses, version=version + project=project, licenses=determined_licenses, version=version ) if reporter.dump_to_file(args.outfile): logger.info(f"Generated {reporter.name} report: {args.outfile}") @staticmethod - def _determine_licenses(project: ProjectEntry) -> List[Tuple[str, float]]: + def _determine_licenses(project: ProjectEntry) -> List[License]: """Try to determine license of fetched project.""" if not os.path.exists(project.destination): logger.print_warning_line( @@ -91,10 +91,13 @@ def _determine_licenses(project: ProjectEntry) -> List[Tuple[str, float]]: for license_file in filter(VCS.is_license_file, glob.glob("*")): logger.debug(f"Found license file {license_file} for {project.name}") - guessed_license, probability = guess_license_in_file(license_file) + guessed_license = guess_license_in_file(license_file) - if guessed_license: - license_files.append((str(guessed_license.name), probability)) + if ( + guessed_license + and guessed_license.probability > LICENSE_PROBABILITY_THRESHOLD + ): + license_files.append(guessed_license) else: logger.print_warning_line( project.name, f"Could not determine license in {license_file}" diff --git a/dfetch/reporting/reporter.py b/dfetch/reporting/reporter.py index ff62ab51..d6effc4d 100644 --- a/dfetch/reporting/reporter.py +++ b/dfetch/reporting/reporter.py @@ -1,9 +1,10 @@ """Abstract reporting interface.""" from abc import ABC, abstractmethod -from typing import List, Tuple +from typing import List from dfetch.manifest.project import ProjectEntry +from dfetch.util.license import License class Reporter(ABC): @@ -15,7 +16,7 @@ class Reporter(ABC): def add_project( self, project: ProjectEntry, - license_names: List[Tuple[str, float]], + licenses: List[License], version: str, ) -> None: """Add a project to the report.""" diff --git a/dfetch/reporting/sbom_reporter.py b/dfetch/reporting/sbom_reporter.py index 66372851..96394e2c 100644 --- a/dfetch/reporting/sbom_reporter.py +++ b/dfetch/reporting/sbom_reporter.py @@ -15,7 +15,7 @@ An fetched project generates an sbom """ -from typing import List, Tuple +from typing import List from cyclonedx.builder.this import this_component as cdx_lib_component from cyclonedx.model import ExternalReference, ExternalReferenceType, XsUri @@ -29,6 +29,7 @@ import dfetch.util.purl from dfetch.manifest.project import ProjectEntry from dfetch.reporting.reporter import Reporter +from dfetch.util.license import License # PyRight is pedantic with decorators see https://github.com/madpah/serializable/issues/8 # It might be fixable with https://github.com/microsoft/pyright/discussions/4426, would prefer @@ -52,7 +53,7 @@ def __init__(self) -> None: def add_project( self, project: ProjectEntry, - license_names: List[Tuple[str, float]], + licenses: List[License], version: str, ) -> None: """Add a project to the report.""" @@ -94,8 +95,8 @@ def add_project( ) ) - for name, _ in license_names: - component.licenses.add(LicenseExpression(name)) + for lic in licenses: + component.licenses.add(LicenseExpression(lic.name)) self._bom.components.add(component) def dump_to_file(self, outfile: str) -> bool: diff --git a/dfetch/reporting/stdout_reporter.py b/dfetch/reporting/stdout_reporter.py index 9b2f5177..868c2ffd 100644 --- a/dfetch/reporting/stdout_reporter.py +++ b/dfetch/reporting/stdout_reporter.py @@ -4,12 +4,13 @@ from the manifest or the metadata (``.dfetch_data.yaml``). """ -from typing import List, Tuple +from typing import List from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry from dfetch.project.metadata import Metadata from dfetch.reporting.reporter import Reporter +from dfetch.util.license import License logger = get_logger(__name__) @@ -22,7 +23,7 @@ class StdoutReporter(Reporter): def add_project( self, project: ProjectEntry, - license_names: List[Tuple[str, float]], + licenses: List[License], version: str, ) -> None: """Add a project to the report.""" @@ -38,7 +39,7 @@ def add_project( logger.print_info_field(" revision", metadata.revision) logger.print_info_field(" patch", metadata.patch) logger.print_info_field( - " licenses", ",".join(license for license, _ in license_names) + " licenses", ",".join(license.name for license in licenses) ) except FileNotFoundError: diff --git a/dfetch/util/license.py b/dfetch/util/license.py index 5f35856b..705adc29 100644 --- a/dfetch/util/license.py +++ b/dfetch/util/license.py @@ -1,17 +1,38 @@ """*Dfetch* uses *Infer-License* to guess licenses from files.""" import os -from typing import Optional, Tuple, Union +from dataclasses import dataclass +from typing import Optional, Union import infer_license -from infer_license.types import License +from infer_license.types import License as InferredLicense -LICENSE_PROBABILITY_THRESHOLD = 0.80 + +@dataclass +class License: + """Class to hold license information.""" + + name: str # SPDX Full name + shortname: str # SPDX Identifier + trove_classifier: Optional[str] + probability: float + + @staticmethod + def from_inferred( + inferred_license: InferredLicense, probability: float + ) -> "License": + """Create License from an InferredLicense.""" + return License( + name=inferred_license.name, + shortname=inferred_license.shortname, + trove_classifier=inferred_license.trove_classifier, + probability=probability, + ) def guess_license_in_file( filename: Union[str, "os.PathLike[str]"], -) -> Tuple[Optional[License], float]: +) -> Optional[License]: """Guess license from file.""" try: with open(filename, encoding="utf-8") as f: @@ -21,7 +42,5 @@ def guess_license_in_file( license_text = f.read() probable_license = infer_license.api.probabilities(license_text) - if probable_license and probable_license[0][1] > LICENSE_PROBABILITY_THRESHOLD: - return probable_license[0] - return None, 0.0 + return None if not probable_license else License.from_inferred(*probable_license[0]) From de197be9a7c368b8290b9316de030f04efe381eb Mon Sep 17 00:00:00 2001 From: Ben Date: Sat, 4 Oct 2025 19:23:19 +0000 Subject: [PATCH 03/13] Extend sbom --- dfetch/reporting/sbom_reporter.py | 108 ++++++++++++++++++++++++++++-- dfetch/util/license.py | 8 +-- 2 files changed, 107 insertions(+), 9 deletions(-) diff --git a/dfetch/reporting/sbom_reporter.py b/dfetch/reporting/sbom_reporter.py index 96394e2c..ae98c124 100644 --- a/dfetch/reporting/sbom_reporter.py +++ b/dfetch/reporting/sbom_reporter.py @@ -15,14 +15,23 @@ An fetched project generates an sbom """ +from decimal import Decimal from typing import List from cyclonedx.builder.this import this_component as cdx_lib_component from cyclonedx.model import ExternalReference, ExternalReferenceType, XsUri from cyclonedx.model.bom import Bom from cyclonedx.model.component import Component, ComponentType -from cyclonedx.model.license import LicenseExpression -from cyclonedx.model.tool import Tool +from cyclonedx.model.component_evidence import ( + AnalysisTechnique, + ComponentEvidence, + Identity, + IdentityField, + Method, + Occurrence, +) +from cyclonedx.model.contact import OrganizationalEntity +from cyclonedx.model.license import DisjunctiveLicense as CycloneDxLicense from cyclonedx.output import make_outputter from cyclonedx.schema import OutputFormat, SchemaVersion @@ -40,14 +49,57 @@ class SbomReporter(Reporter): """Reporter for generating SBoM's.""" - dfetch_tool = Tool(vendor="dfetch-org", name="dfetch", version=dfetch.__version__) + dfetch_tool = Component( + type=ComponentType.APPLICATION, + supplier=OrganizationalEntity(name="dfetch-org"), + name="dfetch", + version=dfetch.__version__, + bom_ref=f"dfetch-{dfetch.__version__}", + licenses=[CycloneDxLicense(name="MIT License", id="MIT")], + external_references=[ + ExternalReference( + type=ExternalReferenceType.VCS, + url=XsUri("https://github.com/dfetch-org/dfetch"), + ), + ExternalReference( + type=ExternalReferenceType.BUILD_SYSTEM, + url=XsUri("https://github.com/dfetch-org/dfetch/actions"), + ), + ExternalReference( + type=ExternalReferenceType.ISSUE_TRACKER, + url=XsUri("https://github.com/dfetch-org/dfetch/issues"), + ), + ExternalReference( + type=ExternalReferenceType.DISTRIBUTION, + url=XsUri("https://pypi.org/project/dfetch/"), + ), + ExternalReference( + type=ExternalReferenceType.DOCUMENTATION, + url=XsUri("https://dfetch.readthedocs.io/"), + ), + ExternalReference( + type=ExternalReferenceType.LICENSE, + url=XsUri("https://github.com/dfetch-org/dfetch/blob/main/LICENSE"), + ), + ExternalReference( + type=ExternalReferenceType.RELEASE_NOTES, + url=XsUri( + "https://github.com/dfetch-org/dfetch/blob/main/CHANGELOG.rst" + ), + ), + ExternalReference( + type=ExternalReferenceType.WEBSITE, + url=XsUri("https://dfetch-org.github.io/"), + ), + ], + ) name = "SBoM" def __init__(self) -> None: """Start the report.""" self._bom = Bom() - self._bom.metadata.tools.tools.add(self.dfetch_tool) + self._bom.metadata.tools.components.add(self.dfetch_tool) self._bom.metadata.tools.components.add(cdx_lib_component()) def add_project( @@ -66,6 +118,47 @@ def add_project( version=version, type=ComponentType.LIBRARY, purl=purl, + evidence=ComponentEvidence( + occurrences=[Occurrence(location="dfetch.yaml")], + identity=[ + Identity( + field=IdentityField.NAME, + tools=[self.dfetch_tool.bom_ref], + methods=[ + Method( + technique=AnalysisTechnique.MANIFEST_ANALYSIS, + confidence=Decimal.from_float(0.4), + value="Name as used for project in dfetch.yaml", + ) + ], + concluded_value=project.name, + ), + Identity( + field=IdentityField.VERSION, + tools=[self.dfetch_tool.bom_ref], + methods=[ + Method( + technique=AnalysisTechnique.MANIFEST_ANALYSIS, + confidence=Decimal.from_float(0.4), + value="Version as used for project in dfetch.yaml", + ) + ], + concluded_value=version, + ), + Identity( + field=IdentityField.PURL, + tools=[self.dfetch_tool.bom_ref], + methods=[ + Method( + technique=AnalysisTechnique.MANIFEST_ANALYSIS, + confidence=Decimal.from_float(0.4), + value="Determined from the VCS url as used for project in dfetch.yaml", + ) + ], + concluded_value=purl.to_string(), + ), + ], + ), ) if purl.type == "github": @@ -96,7 +189,12 @@ def add_project( ) for lic in licenses: - component.licenses.add(LicenseExpression(lic.name)) + cdx_license = CycloneDxLicense(name=lic.name, id=lic.spdx_id) + + component.licenses.add(cdx_license) + if component.evidence: + component.evidence.licenses.add(cdx_license.bom_ref) + self._bom.components.add(component) def dump_to_file(self, outfile: str) -> bool: diff --git a/dfetch/util/license.py b/dfetch/util/license.py index 705adc29..282c1b3b 100644 --- a/dfetch/util/license.py +++ b/dfetch/util/license.py @@ -1,7 +1,7 @@ """*Dfetch* uses *Infer-License* to guess licenses from files.""" -import os from dataclasses import dataclass +from os import PathLike from typing import Optional, Union import infer_license @@ -13,7 +13,7 @@ class License: """Class to hold license information.""" name: str # SPDX Full name - shortname: str # SPDX Identifier + spdx_id: str # SPDX Identifier trove_classifier: Optional[str] probability: float @@ -24,14 +24,14 @@ def from_inferred( """Create License from an InferredLicense.""" return License( name=inferred_license.name, - shortname=inferred_license.shortname, + spdx_id=inferred_license.shortname, trove_classifier=inferred_license.trove_classifier, probability=probability, ) def guess_license_in_file( - filename: Union[str, "os.PathLike[str]"], + filename: Union[str, PathLike[str]], ) -> Optional[License]: """Guess license from file.""" try: From 247bf6ffa32b94c238dbdb1589a4d7cd3a863996 Mon Sep 17 00:00:00 2001 From: Ben Date: Mon, 6 Oct 2025 19:53:04 +0000 Subject: [PATCH 04/13] Fixup list output --- features/list-projects.feature | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/features/list-projects.feature b/features/list-projects.feature index 15ee942c..1826298e 100644 --- a/features/list-projects.feature +++ b/features/list-projects.feature @@ -36,7 +36,7 @@ Feature: List dependencies last fetch : 02/07/2021, 20:25:56 revision : e1fda19a57b873eb8e6ae37780594cbb77b70f1a patch : - license : MIT License + licenses : MIT License project : ext/test-rev-and-branch remote : github-com-dfetch-org remote url : https://github.com/dfetch-org/test-repo @@ -45,7 +45,7 @@ Feature: List dependencies last fetch : 02/07/2021, 20:25:56 revision : patch : - license : MIT License + licenses : MIT License """ @remote-svn @@ -76,7 +76,7 @@ Feature: List dependencies last fetch : 29/12/2024, 20:09:21 revision : 4007 patch : - license : + licenses : """ Scenario: Git repo with applied patch @@ -93,5 +93,5 @@ Feature: List dependencies last fetch : 02/07/2021, 20:25:56 revision : patch : diff.patch - license : MIT License + licenses : MIT License """ From a538af43e16329f63f6b4dc4efe19a5b16614a1d Mon Sep 17 00:00:00 2001 From: Ben Date: Mon, 6 Oct 2025 21:11:44 +0000 Subject: [PATCH 05/13] Add more info to sbom --- dfetch/commands/report.py | 4 ++-- dfetch/reporting/reporter.py | 31 +++++++++++++++++++++++++++- dfetch/reporting/sbom_reporter.py | 34 +++++++++++++++++++++++-------- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/dfetch/commands/report.py b/dfetch/commands/report.py index 91aecf83..3bc6702b 100644 --- a/dfetch/commands/report.py +++ b/dfetch/commands/report.py @@ -64,9 +64,9 @@ def __call__(self, args: argparse.Namespace) -> None: """Generate the report.""" manifest, path = dfetch.manifest.manifest.get_manifest() - reporter = REPORTERS[args.type]() - with dfetch.util.util.in_directory(os.path.dirname(path)): + reporter = REPORTERS[args.type](path) + for project in manifest.selected_projects(args.projects): determined_licenses = self._determine_licenses(project) version = self._determine_version(project) diff --git a/dfetch/reporting/reporter.py b/dfetch/reporting/reporter.py index d6effc4d..2d03dd1f 100644 --- a/dfetch/reporting/reporter.py +++ b/dfetch/reporting/reporter.py @@ -1,7 +1,9 @@ """Abstract reporting interface.""" +import io +import re from abc import ABC, abstractmethod -from typing import List +from typing import List, Tuple from dfetch.manifest.project import ProjectEntry from dfetch.util.license import License @@ -12,6 +14,16 @@ class Reporter(ABC): name: str = "abstract" + def __init__(self, manifest_path: str) -> None: + """Create the reporter. + + Args: + manifest_path (str): The path to the manifest. + """ + self._manifest_path = manifest_path + with open(self._manifest_path, "r", encoding="utf-8") as manifest: + self._manifest_buffer = io.StringIO(manifest.read()) + @abstractmethod def add_project( self, @@ -21,6 +33,23 @@ def add_project( ) -> None: """Add a project to the report.""" + def find_name_in_manifest(self, name: str) -> Tuple[int, int, int]: + """Find the location of a project name in the manifest.""" + self._manifest_buffer.seek(0) + for line_nr, line in enumerate(self._manifest_buffer, start=1): + match = re.search(rf"^\s+-\s*name:\s*(?P{name})\s", line) + + if match: + return ( + line_nr, + int(match.start("name")) + 1, + int(match.end("name")), + ) + raise RuntimeError( + "An entry from the manifest was provided," + " that doesn't exist in the manifest!" + ) + @abstractmethod def dump_to_file(self, outfile: str) -> bool: """Do nothing.""" diff --git a/dfetch/reporting/sbom_reporter.py b/dfetch/reporting/sbom_reporter.py index ae98c124..42019832 100644 --- a/dfetch/reporting/sbom_reporter.py +++ b/dfetch/reporting/sbom_reporter.py @@ -32,6 +32,7 @@ ) from cyclonedx.model.contact import OrganizationalEntity from cyclonedx.model.license import DisjunctiveLicense as CycloneDxLicense +from cyclonedx.model.license import LicenseAcknowledgement from cyclonedx.output import make_outputter from cyclonedx.schema import OutputFormat, SchemaVersion @@ -55,7 +56,9 @@ class SbomReporter(Reporter): name="dfetch", version=dfetch.__version__, bom_ref=f"dfetch-{dfetch.__version__}", - licenses=[CycloneDxLicense(name="MIT License", id="MIT")], + licenses=[ + CycloneDxLicense(id="MIT", acknowledgement=LicenseAcknowledgement.DECLARED) + ], external_references=[ ExternalReference( type=ExternalReferenceType.VCS, @@ -96,8 +99,9 @@ class SbomReporter(Reporter): name = "SBoM" - def __init__(self) -> None: + def __init__(self, manifest_path: str) -> None: """Start the report.""" + super().__init__(manifest_path) self._bom = Bom() self._bom.metadata.tools.components.add(self.dfetch_tool) self._bom.metadata.tools.components.add(cdx_lib_component()) @@ -113,13 +117,20 @@ def add_project( project.remote_url, version=version, subpath=project.source or None ) + name = project.name if purl.type == "generic" else purl.name + + line_nr, start, _ = self.find_name_in_manifest(project.name) + component = Component( - name=project.name, + name=name, version=version, + bom_ref=f"{project.name}-{version}", type=ComponentType.LIBRARY, purl=purl, evidence=ComponentEvidence( - occurrences=[Occurrence(location="dfetch.yaml")], + occurrences=[ + Occurrence(location=self._manifest_path, line=line_nr, offset=start) + ], identity=[ Identity( field=IdentityField.NAME, @@ -131,7 +142,7 @@ def add_project( value="Name as used for project in dfetch.yaml", ) ], - concluded_value=project.name, + concluded_value=name, ), Identity( field=IdentityField.VERSION, @@ -152,7 +163,8 @@ def add_project( Method( technique=AnalysisTechnique.MANIFEST_ANALYSIS, confidence=Decimal.from_float(0.4), - value="Determined from the VCS url as used for project in dfetch.yaml", + value=f"Determined from {project.remote_url} as used" + f" for the project {project.name} in dfetch.yaml", ) ], concluded_value=purl.to_string(), @@ -189,11 +201,17 @@ def add_project( ) for lic in licenses: - cdx_license = CycloneDxLicense(name=lic.name, id=lic.spdx_id) + + # License wants either an SPDX id or a name, prefer SPDX id when available + cdx_license = ( + CycloneDxLicense(id=lic.spdx_id) + if lic.spdx_id + else CycloneDxLicense(name=lic.name) + ) component.licenses.add(cdx_license) if component.evidence: - component.evidence.licenses.add(cdx_license.bom_ref) + component.evidence.licenses.add(cdx_license) self._bom.components.add(component) From 875b96f35e119228aa2aff013faa9dc785bf9d0d Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 7 Oct 2025 21:11:43 +0000 Subject: [PATCH 06/13] rework suggestions by korbit --- dfetch/commands/report.py | 1 + dfetch/util/license.py | 38 +++++++++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/dfetch/commands/report.py b/dfetch/commands/report.py index 3bc6702b..36db3bd4 100644 --- a/dfetch/commands/report.py +++ b/dfetch/commands/report.py @@ -20,6 +20,7 @@ logger = get_logger(__name__) +# Minimum confidence to accept a license guess LICENSE_PROBABILITY_THRESHOLD = 0.80 diff --git a/dfetch/util/license.py b/dfetch/util/license.py index 282c1b3b..0bdb0d4b 100644 --- a/dfetch/util/license.py +++ b/dfetch/util/license.py @@ -14,8 +14,8 @@ class License: name: str # SPDX Full name spdx_id: str # SPDX Identifier - trove_classifier: Optional[str] - probability: float + trove_classifier: Optional[str] # Python package classifier + probability: float # Confidence level of the license inference @staticmethod def from_inferred( @@ -33,14 +33,30 @@ def from_inferred( def guess_license_in_file( filename: Union[str, PathLike[str]], ) -> Optional[License]: - """Guess license from file.""" - try: - with open(filename, encoding="utf-8") as f: - license_text = f.read() - except UnicodeDecodeError: - with open(filename, encoding="latin-1") as f: - license_text = f.read() + """Attempt to identify the license of a given file. - probable_license = infer_license.api.probabilities(license_text) + Args: + filename (Union[str, os.PathLike[str]]): Path to the file to analyze - return None if not probable_license else License.from_inferred(*probable_license[0]) + Returns: + Optional[License]: The most probable license if found, None if no license could be detected + """ + try: + with open(filename, "rb") as f: + file_bytes = f.read() + try: + license_text = file_bytes.decode("utf-8") + except UnicodeDecodeError: + license_text = file_bytes.decode("latin-1") + except (FileNotFoundError, PermissionError, IsADirectoryError): + # Return None for file access issues + return None + except OSError: + # Handle other OS-level file errors + return None + + probable_licenses = infer_license.api.probabilities(license_text) + + return ( + None if not probable_licenses else License.from_inferred(*probable_licenses[0]) + ) From f1e106d4124be2ec1159765921113e24b6000df7 Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 7 Oct 2025 21:13:57 +0000 Subject: [PATCH 07/13] Move manifest path & text location into Manifest class --- dfetch/commands/check.py | 23 ++++--- dfetch/commands/common.py | 14 ++--- dfetch/commands/diff.py | 8 +-- dfetch/commands/freeze.py | 4 +- dfetch/commands/report.py | 6 +- dfetch/commands/update.py | 6 +- dfetch/manifest/manifest.py | 61 +++++++++++++++---- .../reporting/check/code_climate_reporter.py | 11 ++-- dfetch/reporting/check/jenkins_reporter.py | 11 ++-- dfetch/reporting/check/reporter.py | 30 ++------- dfetch/reporting/check/sarif_reporter.py | 13 ++-- dfetch/reporting/reporter.py | 30 ++------- dfetch/reporting/sbom_reporter.py | 9 +-- tests/manifest_mock.py | 4 +- tests/test_check.py | 5 +- tests/test_manifest.py | 9 ++- tests/test_report.py | 4 +- tests/test_svn.py | 4 +- tests/test_update.py | 23 ++++--- 19 files changed, 139 insertions(+), 136 deletions(-) diff --git a/dfetch/commands/check.py b/dfetch/commands/check.py index 655716a1..4f67fd4b 100644 --- a/dfetch/commands/check.py +++ b/dfetch/commands/check.py @@ -29,6 +29,7 @@ import dfetch.project from dfetch.commands.common import check_child_manifests from dfetch.log import get_logger +from dfetch.manifest.manifest import Manifest from dfetch.reporting.check.code_climate_reporter import CodeClimateReporter from dfetch.reporting.check.jenkins_reporter import JenkinsReporter from dfetch.reporting.check.reporter import CheckReporter @@ -83,10 +84,10 @@ def create_menu(subparsers: dfetch.commands.command.SubparserActionType) -> None def __call__(self, args: argparse.Namespace) -> None: """Perform the check.""" - manifest, path = dfetch.manifest.manifest.get_manifest() - reporters = self._get_reporters(args, path) + manifest = dfetch.manifest.manifest.get_manifest() + reporters = self._get_reporters(args, manifest) - with in_directory(os.path.dirname(path)): + with in_directory(os.path.dirname(manifest.path)): exceptions: List[str] = [] for project in manifest.selected_projects(args.projects): with catch_runtime_exceptions(exceptions) as exceptions: @@ -94,7 +95,7 @@ def __call__(self, args: argparse.Namespace) -> None: if not args.no_recommendations and os.path.isdir(project.destination): with in_directory(project.destination): - check_child_manifests(manifest, project, path) + check_child_manifests(manifest, project) for reporter in reporters: reporter.dump_to_file() @@ -103,21 +104,23 @@ def __call__(self, args: argparse.Namespace) -> None: raise RuntimeError("\n".join(exceptions)) @staticmethod - def _get_reporters(args: argparse.Namespace, path: str) -> List[CheckReporter]: + def _get_reporters( + args: argparse.Namespace, manifest: Manifest + ) -> List[CheckReporter]: """Get all reporters. Args: args (argparse.Namespace): Arguments given to the command line - path (str): Path to the manifest + manifest (Manifest): The manifest Returns: List[CheckReporter]: List of reporters that each provide a unique report """ - reporters: List[CheckReporter] = [CheckStdoutReporter(path)] + reporters: List[CheckReporter] = [CheckStdoutReporter(manifest)] if args.jenkins_json: - reporters += [JenkinsReporter(path, args.jenkins_json)] + reporters += [JenkinsReporter(manifest, args.jenkins_json)] if args.sarif: - reporters += [SarifReporter(path, args.sarif)] + reporters += [SarifReporter(manifest, args.sarif)] if args.code_climate: - reporters += [CodeClimateReporter(path, args.code_climate)] + reporters += [CodeClimateReporter(manifest, args.code_climate)] return reporters diff --git a/dfetch/commands/common.py b/dfetch/commands/common.py index c4b187e8..eafbd712 100644 --- a/dfetch/commands/common.py +++ b/dfetch/commands/common.py @@ -12,18 +12,14 @@ logger = get_logger(__name__) -def check_child_manifests(manifest: Manifest, project: ProjectEntry, path: str) -> None: +def check_child_manifests(manifest: Manifest, project: ProjectEntry) -> None: """Check for child manifests within a project. Args: manifest (dfetch.manifest.manifest.Manifest): The parent manifest with projects. project (ProjectEntry): The parent project. - path (str): The path of the parent manifest. """ - for ( - childmanifest, - childmanifest_path, - ) in get_childmanifests(skip=[path]): + for childmanifest in get_childmanifests(skip=[manifest.path]): recommendations: List[ProjectEntry] = [] for childproject in childmanifest.projects: if childproject.remote_url not in [ @@ -32,10 +28,10 @@ def check_child_manifests(manifest: Manifest, project: ProjectEntry, path: str) recommendations.append(childproject.as_recommendation()) if recommendations: - childmanifest_path = os.path.relpath( - childmanifest_path, start=os.path.dirname(path) + childmanifest_relpath = os.path.relpath( + childmanifest.path, start=os.path.dirname(manifest.path) ).replace("\\", "/") - _make_recommendation(project, recommendations, childmanifest_path) + _make_recommendation(project, recommendations, childmanifest_relpath) def _make_recommendation( diff --git a/dfetch/commands/diff.py b/dfetch/commands/diff.py index df3f073b..d0e1586b 100644 --- a/dfetch/commands/diff.py +++ b/dfetch/commands/diff.py @@ -100,10 +100,10 @@ def create_menu(subparsers: dfetch.commands.command.SubparserActionType) -> None def __call__(self, args: argparse.Namespace) -> None: """Perform the diff.""" - manifest, path = dfetch.manifest.manifest.get_manifest() + manifest = dfetch.manifest.manifest.get_manifest() revs = [r for r in args.revs.strip(":").split(":", maxsplit=1) if r] - with in_directory(os.path.dirname(path)): + with in_directory(os.path.dirname(manifest.path)): exceptions: List[str] = [] projects = manifest.selected_projects(args.projects) if not projects: @@ -113,10 +113,10 @@ def __call__(self, args: argparse.Namespace) -> None: for project in projects: patch_name = f"{project.name}.patch" with catch_runtime_exceptions(exceptions) as exceptions: - repo = _get_repo(path, project) + repo = _get_repo(manifest.path, project) patch = _diff_from_repo(repo, project, revs) - _dump_patch(path, revs, project, patch_name, patch) + _dump_patch(manifest.path, revs, project, patch_name, patch) if exceptions: raise RuntimeError("\n".join(exceptions)) diff --git a/dfetch/commands/freeze.py b/dfetch/commands/freeze.py index 4875dec3..9e305e86 100644 --- a/dfetch/commands/freeze.py +++ b/dfetch/commands/freeze.py @@ -70,12 +70,12 @@ def __call__(self, args: argparse.Namespace) -> None: """Perform the freeze.""" del args # unused - manifest, path = get_manifest() + manifest = get_manifest() exceptions: List[str] = [] projects: List[ProjectEntry] = [] - with in_directory(os.path.dirname(path)): + with in_directory(os.path.dirname(manifest.path)): for project in manifest.projects: with catch_runtime_exceptions(exceptions) as exceptions: on_disk_version = dfetch.project.make(project).on_disk_version() diff --git a/dfetch/commands/report.py b/dfetch/commands/report.py index 36db3bd4..43478248 100644 --- a/dfetch/commands/report.py +++ b/dfetch/commands/report.py @@ -63,10 +63,10 @@ def create_menu(subparsers: dfetch.commands.command.SubparserActionType) -> None def __call__(self, args: argparse.Namespace) -> None: """Generate the report.""" - manifest, path = dfetch.manifest.manifest.get_manifest() + manifest = dfetch.manifest.manifest.get_manifest() - with dfetch.util.util.in_directory(os.path.dirname(path)): - reporter = REPORTERS[args.type](path) + with dfetch.util.util.in_directory(os.path.dirname(manifest.path)): + reporter = REPORTERS[args.type](manifest) for project in manifest.selected_projects(args.projects): determined_licenses = self._determine_licenses(project) diff --git a/dfetch/commands/update.py b/dfetch/commands/update.py index 972f6268..c82f07ec 100644 --- a/dfetch/commands/update.py +++ b/dfetch/commands/update.py @@ -69,13 +69,13 @@ def create_menu(subparsers: dfetch.commands.command.SubparserActionType) -> None def __call__(self, args: argparse.Namespace) -> None: """Perform the update.""" - manifest, path = dfetch.manifest.manifest.get_manifest() + manifest = dfetch.manifest.manifest.get_manifest() exceptions: List[str] = [] destinations: List[str] = [ os.path.realpath(project.destination) for project in manifest.projects ] - with in_directory(os.path.dirname(path)): + with in_directory(os.path.dirname(manifest.path)): for project in manifest.selected_projects(args.projects): with catch_runtime_exceptions(exceptions) as exceptions: self._check_destination(project, destinations) @@ -85,7 +85,7 @@ def __call__(self, args: argparse.Namespace) -> None: project.destination ): with in_directory(project.destination): - check_child_manifests(manifest, project, path) + check_child_manifests(manifest, project) if exceptions: raise RuntimeError("\n".join(exceptions)) diff --git a/dfetch/manifest/manifest.py b/dfetch/manifest/manifest.py index 81ad054f..15a42e28 100644 --- a/dfetch/manifest/manifest.py +++ b/dfetch/manifest/manifest.py @@ -22,6 +22,7 @@ import io import os import pathlib +import re from typing import IO, Any, Dict, List, Optional, Sequence, Tuple, Union import yaml @@ -102,9 +103,16 @@ class Manifest: CURRENT_VERSION = "0.0" - def __init__(self, manifest: ManifestDict) -> None: + def __init__( + self, + manifest: ManifestDict, + text: Optional[str] = None, + path: Optional[Union[str, os.PathLike[str]]] = None, + ) -> None: """Create the manifest.""" self.__version: str = str(manifest.get("version", self.CURRENT_VERSION)) + self.__text: str = str(text) + self.__path: str = str(path) self._remotes, default_remotes = self._determine_remotes( manifest.get("remotes", []) @@ -180,7 +188,10 @@ def _determine_remotes( return (remotes, default_remotes) @staticmethod - def from_yaml(text: Union[io.TextIOWrapper, str, IO[str]]) -> "Manifest": + def from_yaml( + text: Union[io.TextIOWrapper, str, IO[str]], + path: Optional[Union[str, os.PathLike[str]]] = None, + ) -> "Manifest": """Create a manifest from a file like object.""" loaded_yaml = Manifest._load_yaml(text) @@ -192,7 +203,11 @@ def from_yaml(text: Union[io.TextIOWrapper, str, IO[str]]) -> "Manifest": if not manifest: raise RuntimeError("Missing manifest root element!") - return Manifest(manifest) + if isinstance(text, (io.TextIOWrapper, IO)): + text.seek(0) + text = text.read() + + return Manifest(manifest, text=text, path=path) @staticmethod def _load_yaml(text: Union[io.TextIOWrapper, str, IO[str]]) -> Any: @@ -217,7 +232,12 @@ def from_file(path: str) -> "Manifest": FileNotFoundError: Given path was not a file. """ with open(path, "r", encoding="utf-8") as opened_file: - return Manifest.from_yaml(opened_file) + return Manifest.from_yaml(opened_file, path) + + @property + def path(self) -> str: + """Path to the manifest file.""" + return self.__path @property def version(self) -> str: @@ -293,6 +313,25 @@ def dump(self, path: str) -> None: self._as_dict(), manifest_file, Dumper=ManifestDumper, sort_keys=False ) + def find_name_in_manifest(self, name: str) -> Tuple[int, int, int]: + """Find the location of a project name in the manifest.""" + if not self.__text: + raise FileNotFoundError("No manifest text available") + + for line_nr, line in enumerate(self.__text.splitlines(), start=1): + match = re.search(rf"^\s+-\s*name:\s*(?P{name})\s*$", line) + + if match: + return ( + line_nr, + int(match.start("name")) + 1, + int(match.end("name")), + ) + raise RuntimeError( + "An entry from the manifest was provided," + " that doesn't exist in the manifest!" + ) + def find_manifest() -> str: """Find a manifest.""" @@ -308,25 +347,22 @@ def find_manifest() -> str: return os.path.realpath(paths[0]) -def get_manifest() -> Tuple[Manifest, str]: +def get_manifest() -> Manifest: """Get manifest and its path.""" logger.debug("Looking for manifest") manifest_path = find_manifest() validate(manifest_path) logger.debug(f"Using manifest {manifest_path}") - return ( - Manifest.from_file(manifest_path), - manifest_path, - ) + return Manifest.from_file(manifest_path) -def get_childmanifests(skip: Optional[List[str]] = None) -> List[Tuple[Manifest, str]]: +def get_childmanifests(skip: Optional[List[str]] = None) -> List[Manifest]: """Get manifest and its path.""" skip = skip or [] logger.debug("Looking for sub-manifests") - childmanifests: List[Tuple[Manifest, str]] = [] + childmanifests: List[Manifest] = [] for path in find_file(DEFAULT_MANIFEST_NAME, "."): path = os.path.realpath(path) if path not in skip: @@ -335,8 +371,7 @@ def get_childmanifests(skip: Optional[List[str]] = None) -> List[Tuple[Manifest, pathlib.Path(path).relative_to(os.path.dirname(os.getcwd())).as_posix() ): validate(path) - childmanifest = Manifest.from_file(path) - childmanifests += [(childmanifest, path)] + childmanifests += [Manifest.from_file(path)] return childmanifests diff --git a/dfetch/reporting/check/code_climate_reporter.py b/dfetch/reporting/check/code_climate_reporter.py index b575b645..31d1e140 100644 --- a/dfetch/reporting/check/code_climate_reporter.py +++ b/dfetch/reporting/check/code_climate_reporter.py @@ -61,6 +61,7 @@ from typing import Any, Dict, List from dfetch.log import get_logger +from dfetch.manifest.manifest import Manifest from dfetch.manifest.project import ProjectEntry from dfetch.reporting.check.reporter import CheckReporter, Issue, IssueSeverity @@ -82,14 +83,14 @@ class CodeClimateReporter(CheckReporter): name = "code-climate" - def __init__(self, manifest_path: str, report_path: str) -> None: + def __init__(self, manifest: Manifest, report_path: str) -> None: """Create the code climate reporter. Args: - manifest_path (str): Path to the manifest. + manifest (Manifest): The manifest. report_path (str): Output path of the report. """ - super().__init__(manifest_path) + super().__init__(manifest) self._report_path = report_path @@ -111,7 +112,7 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: project (ProjectEntry): Project with the issue issue (Issue): The issue to add to the report """ - line, col_start, col_end = self.find_name_in_manifest(project.name) + line, col_start, col_end = self._manifest.find_name_in_manifest(project.name) self._report += [ { @@ -123,7 +124,7 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: ).hexdigest(), "severity": self._determine_severity(issue.severity).value, "location": { - "path": os.path.relpath(self._manifest_path), + "path": os.path.relpath(self._manifest.path), "positions": { "begin": {"line": line, "column": col_start}, "end": {"line": line, "column": col_end}, diff --git a/dfetch/reporting/check/jenkins_reporter.py b/dfetch/reporting/check/jenkins_reporter.py index 0d5e646f..47b6648d 100644 --- a/dfetch/reporting/check/jenkins_reporter.py +++ b/dfetch/reporting/check/jenkins_reporter.py @@ -55,6 +55,7 @@ from typing import Any, Dict from dfetch.log import get_logger +from dfetch.manifest.manifest import Manifest from dfetch.manifest.project import ProjectEntry from dfetch.reporting.check.reporter import CheckReporter, Issue @@ -66,14 +67,14 @@ class JenkinsReporter(CheckReporter): name = "jenkins" - def __init__(self, manifest_path: str, report_path: str) -> None: + def __init__(self, manifest: Manifest, report_path: str) -> None: """Create the jenkins reporter. Args: - manifest_path (str): Path to the manifest. + manifest (Manifest): The manifest. report_path (str): Output path of the report. """ - super().__init__(manifest_path) + super().__init__(manifest) self._report_path = report_path @@ -89,10 +90,10 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: project (ProjectEntry): Project with the issue issue (Issue): The issue to add to the report """ - line, col_start, col_end = self.find_name_in_manifest(project.name) + line, col_start, col_end = self._manifest.find_name_in_manifest(project.name) self._report["issues"] += [ { - "fileName": os.path.relpath(self._manifest_path), + "fileName": os.path.relpath(self._manifest.path), "severity": str(issue.severity.value), "message": f"{project.name} : {issue.message}", "description": issue.description, diff --git a/dfetch/reporting/check/reporter.py b/dfetch/reporting/check/reporter.py index 228d6346..422a1daa 100644 --- a/dfetch/reporting/check/reporter.py +++ b/dfetch/reporting/check/reporter.py @@ -25,13 +25,12 @@ """ -import io -import re from abc import abstractmethod from dataclasses import dataclass from enum import Enum -from typing import Sequence, Tuple +from typing import Sequence +from dfetch.manifest.manifest import Manifest from dfetch.manifest.project import ProjectEntry from dfetch.manifest.version import Version from dfetch.project.abstract_check_reporter import AbstractCheckReporter @@ -130,15 +129,13 @@ class CheckReporter(AbstractCheckReporter): ), ] - def __init__(self, manifest_path: str) -> None: + def __init__(self, manifest: Manifest) -> None: """Create the reporter. Args: - manifest_path (str): The path to the manifest. + manifest (Manifest): The manifest. """ - self._manifest_path = manifest_path - with open(self._manifest_path, "r", encoding="utf-8") as manifest: - self._manifest_buffer = io.StringIO(manifest.read()) + self._manifest = manifest def unfetched_project( self, project: ProjectEntry, wanted_version: Version, latest: Version @@ -277,23 +274,6 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: issue (Issue): The issue to add """ - def find_name_in_manifest(self, name: str) -> Tuple[int, int, int]: - """Find the location of a project name in the manifest.""" - self._manifest_buffer.seek(0) - for line_nr, line in enumerate(self._manifest_buffer, start=1): - match = re.search(rf"^\s+-\s*name:\s*(?P{name})\s", line) - - if match: - return ( - line_nr, - int(match.start("name")) + 1, - int(match.end("name")), - ) - raise RuntimeError( - "An entry from the manifest was provided," - " that doesn't exist in the manifest!" - ) - @abstractmethod def dump_to_file(self) -> None: """Do nothing.""" diff --git a/dfetch/reporting/check/sarif_reporter.py b/dfetch/reporting/check/sarif_reporter.py index 27f175f0..cd336802 100644 --- a/dfetch/reporting/check/sarif_reporter.py +++ b/dfetch/reporting/check/sarif_reporter.py @@ -98,6 +98,7 @@ ) from dfetch.log import get_logger +from dfetch.manifest.manifest import Manifest from dfetch.manifest.project import ProjectEntry from dfetch.reporting.check.reporter import CheckReporter, Issue, IssueSeverity @@ -123,14 +124,14 @@ class SarifReporter(CheckReporter): "master/Documents/CommitteeSpecifications/2.1.0/sarif-schema-2.1.0.json" ) - def __init__(self, manifest_path: str, report_path: str) -> None: + def __init__(self, manifest: Manifest, report_path: str) -> None: """Create the sarif reporter. Args: - manifest_path (str): Path to the manifest. + manifest (Manifest): The manifest. report_path (str): Output path of the report. """ - super().__init__(manifest_path) + super().__init__(manifest) self._report_path = report_path @@ -154,7 +155,7 @@ def __init__(self, manifest_path: str, report_path: str) -> None: ) self._run.artifacts = [ Artifact( - location=ArtifactLocation(uri=os.path.relpath(self._manifest_path)), + location=ArtifactLocation(uri=os.path.relpath(self._manifest.path)), source_language="yaml", ) ] @@ -176,7 +177,7 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: project (ProjectEntry): Project with the issue issue (Issue): The issue to add """ - line, col_start, col_end = self.find_name_in_manifest(project.name) + line, col_start, col_end = self._manifest.find_name_in_manifest(project.name) result = Result( message=Message(text=f"{project.name} : {issue.message}"), @@ -186,7 +187,7 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: Location( physical_location=PhysicalLocation( artifact_location=ArtifactLocation( - uri=os.path.relpath(self._manifest_path), index=0 + uri=os.path.relpath(self._manifest.path), index=0 ), region=Region( start_line=line, diff --git a/dfetch/reporting/reporter.py b/dfetch/reporting/reporter.py index 2d03dd1f..6b863bae 100644 --- a/dfetch/reporting/reporter.py +++ b/dfetch/reporting/reporter.py @@ -1,10 +1,9 @@ """Abstract reporting interface.""" -import io -import re from abc import ABC, abstractmethod -from typing import List, Tuple +from typing import List +from dfetch.manifest.manifest import Manifest from dfetch.manifest.project import ProjectEntry from dfetch.util.license import License @@ -14,15 +13,13 @@ class Reporter(ABC): name: str = "abstract" - def __init__(self, manifest_path: str) -> None: + def __init__(self, manifest: Manifest) -> None: """Create the reporter. Args: - manifest_path (str): The path to the manifest. + manifest (Manifest): The manifest to report on """ - self._manifest_path = manifest_path - with open(self._manifest_path, "r", encoding="utf-8") as manifest: - self._manifest_buffer = io.StringIO(manifest.read()) + self._manifest = manifest @abstractmethod def add_project( @@ -33,23 +30,6 @@ def add_project( ) -> None: """Add a project to the report.""" - def find_name_in_manifest(self, name: str) -> Tuple[int, int, int]: - """Find the location of a project name in the manifest.""" - self._manifest_buffer.seek(0) - for line_nr, line in enumerate(self._manifest_buffer, start=1): - match = re.search(rf"^\s+-\s*name:\s*(?P{name})\s", line) - - if match: - return ( - line_nr, - int(match.start("name")) + 1, - int(match.end("name")), - ) - raise RuntimeError( - "An entry from the manifest was provided," - " that doesn't exist in the manifest!" - ) - @abstractmethod def dump_to_file(self, outfile: str) -> bool: """Do nothing.""" diff --git a/dfetch/reporting/sbom_reporter.py b/dfetch/reporting/sbom_reporter.py index 42019832..42b21db2 100644 --- a/dfetch/reporting/sbom_reporter.py +++ b/dfetch/reporting/sbom_reporter.py @@ -37,6 +37,7 @@ from cyclonedx.schema import OutputFormat, SchemaVersion import dfetch.util.purl +from dfetch.manifest.manifest import Manifest from dfetch.manifest.project import ProjectEntry from dfetch.reporting.reporter import Reporter from dfetch.util.license import License @@ -99,9 +100,9 @@ class SbomReporter(Reporter): name = "SBoM" - def __init__(self, manifest_path: str) -> None: + def __init__(self, manifest: Manifest) -> None: """Start the report.""" - super().__init__(manifest_path) + super().__init__(manifest) self._bom = Bom() self._bom.metadata.tools.components.add(self.dfetch_tool) self._bom.metadata.tools.components.add(cdx_lib_component()) @@ -119,7 +120,7 @@ def add_project( name = project.name if purl.type == "generic" else purl.name - line_nr, start, _ = self.find_name_in_manifest(project.name) + line_nr, start, _ = self._manifest.find_name_in_manifest(project.name) component = Component( name=name, @@ -129,7 +130,7 @@ def add_project( purl=purl, evidence=ComponentEvidence( occurrences=[ - Occurrence(location=self._manifest_path, line=line_nr, offset=start) + Occurrence(location=self._manifest.path, line=line_nr, offset=start) ], identity=[ Identity( diff --git a/tests/manifest_mock.py b/tests/manifest_mock.py index 33ea237a..f523a270 100644 --- a/tests/manifest_mock.py +++ b/tests/manifest_mock.py @@ -6,7 +6,7 @@ from dfetch.manifest.project import ProjectEntry -def mock_manifest(projects): +def mock_manifest(projects, path: str = "/some/path") -> MagicMock: """Create a manifest mock.""" project_mocks = [] @@ -17,6 +17,6 @@ def mock_manifest(projects): mock_project.destination = "some_dest" project_mocks += [mock_project] - mocked_manifest = MagicMock(spec=Manifest, projects=project_mocks) + mocked_manifest = MagicMock(spec=Manifest, projects=project_mocks, path=path) mocked_manifest.selected_projects.return_value = project_mocks return mocked_manifest diff --git a/tests/test_check.py b/tests/test_check.py index f03acb5b..075e1f5b 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -36,10 +36,7 @@ def test_check(name, projects): with patch("os.path.exists"): with patch("dfetch.commands.check.in_directory"): with patch("dfetch.commands.check.CheckStdoutReporter"): - mocked_get_manifest.return_value = ( - mock_manifest(projects), - "/", - ) + mocked_get_manifest.return_value = mock_manifest(projects) mocked_get_childmanifests.return_value = [] check(DEFAULT_ARGS) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 7a92fb4f..fb7d80ee 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -142,14 +142,17 @@ def test_get_childmanifests(name, manifest_paths) -> None: with patch("dfetch.manifest.manifest.find_file") as find_file_mock: with patch("dfetch.manifest.manifest.validate"): - with patch("dfetch.manifest.manifest.Manifest"): + with patch("dfetch.manifest.manifest.Manifest") as manifest_mock: find_file_mock.return_value = manifest_paths found_childmanifests = get_childmanifests([parent.name]) assert len(found_childmanifests) == len(manifest_paths) - for path, result in zip(manifest_paths, found_childmanifests): - assert os.path.realpath(path) == result[1] + + for path, call in zip( + manifest_paths, manifest_mock.from_file.call_args_list, strict=True + ): + assert os.path.realpath(path) == call[0][0] def test_suggestion_found() -> None: diff --git a/tests/test_report.py b/tests/test_report.py index fa1a24b3..d256b3cf 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -4,7 +4,7 @@ # flake8: noqa import argparse -from unittest.mock import patch +from unittest.mock import mock_open, patch import pytest @@ -30,7 +30,7 @@ def test_report(name, projects): with patch("dfetch.manifest.manifest.get_manifest") as mocked_get_manifest: with patch("dfetch.log.DLogger.print_info_line") as mocked_print_info_line: - mocked_get_manifest.return_value = (mock_manifest(projects), "/") + mocked_get_manifest.return_value = mock_manifest(projects) report(DEFAULT_ARGS) diff --git a/tests/test_svn.py b/tests/test_svn.py index 09d31e96..c29c25b2 100644 --- a/tests/test_svn.py +++ b/tests/test_svn.py @@ -141,7 +141,9 @@ def test_externals(name, externals, expectations): cwd_mock.return_value = CWD parsed_externals = SvnRepo.externals() - for actual, expected in zip(parsed_externals, expectations): + for actual, expected in zip( + parsed_externals, expectations, strict=True + ): assert actual == expected diff --git a/tests/test_update.py b/tests/test_update.py index 88be0460..258e8b7f 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -36,15 +36,12 @@ def test_update(name, projects): with patch("os.path.exists"): with patch("dfetch.commands.update.in_directory"): with patch("dfetch.commands.update.Update._check_destination"): - mocked_get_manifest.return_value = ( - mock_manifest(projects), - "/", - ) + mocked_get_manifest.return_value = mock_manifest(projects) mocked_get_childmanifests.return_value = [] update(DEFAULT_ARGS) - for project in projects: + for _ in projects: mocked_make.return_value.update.assert_called() @@ -52,10 +49,8 @@ def test_forced_update(): update = Update() with patch("dfetch.manifest.manifest.get_manifest") as mocked_get_manifest: - mocked_get_manifest.return_value = ( - mock_manifest([{"name": "some_project"}]), - "/", - ) + mocked_get_manifest.return_value = mock_manifest([{"name": "some_project"}]) + with patch( "dfetch.manifest.manifest.get_childmanifests" ) as mocked_get_childmanifests: @@ -87,7 +82,15 @@ def test_create_menu(): ["-N", "--no-recommendations"], ] - for action, expected_options in zip(subparsers.choices["update"]._actions, options): + for action, expected_options in zip( + [ + action + for action in subparsers.choices["update"]._actions + if action.option_strings + ], + options, + strict=True, + ): assert action.option_strings == expected_options From 89a0466d9980da44d5e43a665fb6e6783c3a2514 Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 10 Oct 2025 18:23:46 +0000 Subject: [PATCH 08/13] Enforce minimum python version --- pyproject.toml | 1 + tests/test_manifest.py | 3 ++- tests/test_svn.py | 2 +- tests/test_update.py | 2 +- tests/test_vcs.py | 6 +++--- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9acda7e6..7e68ae79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -165,3 +165,4 @@ exclude = ["doc/static/uml/generate_diagram.py"] standard = ["dfetch", "features"] reportMissingImports = false reportMissingModuleSource = false +pythonVersion = "3.9" diff --git a/tests/test_manifest.py b/tests/test_manifest.py index fb7d80ee..bd86ecb2 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -150,7 +150,8 @@ def test_get_childmanifests(name, manifest_paths) -> None: assert len(found_childmanifests) == len(manifest_paths) for path, call in zip( - manifest_paths, manifest_mock.from_file.call_args_list, strict=True + manifest_paths, + manifest_mock.from_file.call_args_list, # , strict=True ): assert os.path.realpath(path) == call[0][0] diff --git a/tests/test_svn.py b/tests/test_svn.py index c29c25b2..72ea70fa 100644 --- a/tests/test_svn.py +++ b/tests/test_svn.py @@ -142,7 +142,7 @@ def test_externals(name, externals, expectations): parsed_externals = SvnRepo.externals() for actual, expected in zip( - parsed_externals, expectations, strict=True + parsed_externals, expectations # , strict=True ): assert actual == expected diff --git a/tests/test_update.py b/tests/test_update.py index 258e8b7f..f8c8da8d 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -89,7 +89,7 @@ def test_create_menu(): if action.option_strings ], options, - strict=True, + # strict=True, ): assert action.option_strings == expected_options diff --git a/tests/test_vcs.py b/tests/test_vcs.py index bdab4405..90708473 100644 --- a/tests/test_vcs.py +++ b/tests/test_vcs.py @@ -3,7 +3,7 @@ # mypy: ignore-errors # flake8: noqa -from typing import Optional +from typing import Optional, Union from unittest.mock import patch import pytest @@ -99,10 +99,10 @@ def get_default_branch(self): ) def test_check_wanted_with_local( name: str, - given_on_disk: Version | None, + given_on_disk: Union[Version, None], given_wanted: Version, expect_wanted: Version, - expect_have: Version | None, + expect_have: Union[Version, None], ): with patch("dfetch.project.vcs.os.path.exists") as mocked_path_exists: with patch("dfetch.project.vcs.Metadata.from_file") as mocked_metadata: From 4494a190279c9f74a80d04726e12c5e39aa09558 Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 10 Oct 2025 18:24:09 +0000 Subject: [PATCH 09/13] Korbit review comments --- dfetch/manifest/manifest.py | 48 ++++++++++++------- .../reporting/check/code_climate_reporter.py | 9 ++-- dfetch/reporting/check/jenkins_reporter.py | 10 ++-- dfetch/reporting/check/sarif_reporter.py | 10 ++-- dfetch/reporting/reporter.py | 2 +- dfetch/reporting/sbom_reporter.py | 10 ++-- dfetch/reporting/stdout_reporter.py | 5 ++ dfetch/util/license.py | 29 ++++++++--- 8 files changed, 82 insertions(+), 41 deletions(-) diff --git a/dfetch/manifest/manifest.py b/dfetch/manifest/manifest.py index 15a42e28..4e85f641 100644 --- a/dfetch/manifest/manifest.py +++ b/dfetch/manifest/manifest.py @@ -23,6 +23,7 @@ import os import pathlib import re +from dataclasses import dataclass from typing import IO, Any, Dict, List, Optional, Sequence, Tuple, Union import yaml @@ -38,6 +39,15 @@ logger = get_logger(__name__) +@dataclass +class ManifestEntryLocation: + """Location of an entry in the manifest file.""" + + line_number: int + start: int + end: int + + class RequestedProjectNotFoundError(RuntimeError): """Exception if items are not found in list of possibilities.""" @@ -111,8 +121,8 @@ def __init__( ) -> None: """Create the manifest.""" self.__version: str = str(manifest.get("version", self.CURRENT_VERSION)) - self.__text: str = str(text) - self.__path: str = str(path) + self.__text: str = text if text else "" + self.__path: str = str(path) if path else "" self._remotes, default_remotes = self._determine_remotes( manifest.get("remotes", []) @@ -193,6 +203,9 @@ def from_yaml( path: Optional[Union[str, os.PathLike[str]]] = None, ) -> "Manifest": """Create a manifest from a file like object.""" + if isinstance(text, (io.TextIOWrapper, IO)): + text = text.read() + loaded_yaml = Manifest._load_yaml(text) if not loaded_yaml: @@ -203,10 +216,6 @@ def from_yaml( if not manifest: raise RuntimeError("Missing manifest root element!") - if isinstance(text, (io.TextIOWrapper, IO)): - text.seek(0) - text = text.read() - return Manifest(manifest, text=text, path=path) @staticmethod @@ -313,24 +322,29 @@ def dump(self, path: str) -> None: self._as_dict(), manifest_file, Dumper=ManifestDumper, sort_keys=False ) - def find_name_in_manifest(self, name: str) -> Tuple[int, int, int]: - """Find the location of a project name in the manifest.""" + def find_name_in_manifest(self, name: str) -> ManifestEntryLocation: + """Find the location of a project name in the manifest. + + Returns: + ManifestEntryLocation of the project name in the manifest. + + Raises: + FileNotFoundError: If manifest text is not available + RuntimeError: If the project name is not found in the manifest + """ if not self.__text: raise FileNotFoundError("No manifest text available") for line_nr, line in enumerate(self.__text.splitlines(), start=1): - match = re.search(rf"^\s+-\s*name:\s*(?P{name})\s*$", line) + match = re.search(rf"^\s+-\s*name:\s*(?P{re.escape(name)})\s*$", line) if match: - return ( - line_nr, - int(match.start("name")) + 1, - int(match.end("name")), + return ManifestEntryLocation( + line_number=line_nr, + start=int(match.start("name")) + 1, + end=int(match.end("name")), ) - raise RuntimeError( - "An entry from the manifest was provided," - " that doesn't exist in the manifest!" - ) + raise RuntimeError(f"{name} was not found in the manifest!") def find_manifest() -> str: diff --git a/dfetch/reporting/check/code_climate_reporter.py b/dfetch/reporting/check/code_climate_reporter.py index 31d1e140..cf0b0565 100644 --- a/dfetch/reporting/check/code_climate_reporter.py +++ b/dfetch/reporting/check/code_climate_reporter.py @@ -112,7 +112,7 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: project (ProjectEntry): Project with the issue issue (Issue): The issue to add to the report """ - line, col_start, col_end = self._manifest.find_name_in_manifest(project.name) + location = self._manifest.find_name_in_manifest(project.name) self._report += [ { @@ -126,8 +126,11 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: "location": { "path": os.path.relpath(self._manifest.path), "positions": { - "begin": {"line": line, "column": col_start}, - "end": {"line": line, "column": col_end}, + "begin": { + "line": location.line_number, + "column": location.start, + }, + "end": {"line": location.line_number, "column": location.end}, }, }, } diff --git a/dfetch/reporting/check/jenkins_reporter.py b/dfetch/reporting/check/jenkins_reporter.py index 47b6648d..0815c8b5 100644 --- a/dfetch/reporting/check/jenkins_reporter.py +++ b/dfetch/reporting/check/jenkins_reporter.py @@ -90,17 +90,17 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: project (ProjectEntry): Project with the issue issue (Issue): The issue to add to the report """ - line, col_start, col_end = self._manifest.find_name_in_manifest(project.name) + location = self._manifest.find_name_in_manifest(project.name) self._report["issues"] += [ { "fileName": os.path.relpath(self._manifest.path), "severity": str(issue.severity.value), "message": f"{project.name} : {issue.message}", "description": issue.description, - "lineStart": line, - "lineEnd": line, - "columnStart": col_start, - "columnEnd": col_end, + "lineStart": location.line_number, + "lineEnd": location.line_number, + "columnStart": location.start, + "columnEnd": location.end, } ] diff --git a/dfetch/reporting/check/sarif_reporter.py b/dfetch/reporting/check/sarif_reporter.py index cd336802..e02bf973 100644 --- a/dfetch/reporting/check/sarif_reporter.py +++ b/dfetch/reporting/check/sarif_reporter.py @@ -177,7 +177,7 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: project (ProjectEntry): Project with the issue issue (Issue): The issue to add """ - line, col_start, col_end = self._manifest.find_name_in_manifest(project.name) + location = self._manifest.find_name_in_manifest(project.name) result = Result( message=Message(text=f"{project.name} : {issue.message}"), @@ -190,10 +190,10 @@ def add_issue(self, project: ProjectEntry, issue: Issue) -> None: uri=os.path.relpath(self._manifest.path), index=0 ), region=Region( - start_line=line, - start_column=col_start, - end_line=line, - end_column=col_end + 1, + start_line=location.line_number, + start_column=location.start, + end_line=location.line_number, + end_column=location.end + 1, ), ) ) diff --git a/dfetch/reporting/reporter.py b/dfetch/reporting/reporter.py index 6b863bae..d405826c 100644 --- a/dfetch/reporting/reporter.py +++ b/dfetch/reporting/reporter.py @@ -13,13 +13,13 @@ class Reporter(ABC): name: str = "abstract" + @abstractmethod def __init__(self, manifest: Manifest) -> None: """Create the reporter. Args: manifest (Manifest): The manifest to report on """ - self._manifest = manifest @abstractmethod def add_project( diff --git a/dfetch/reporting/sbom_reporter.py b/dfetch/reporting/sbom_reporter.py index 42b21db2..35170e58 100644 --- a/dfetch/reporting/sbom_reporter.py +++ b/dfetch/reporting/sbom_reporter.py @@ -102,7 +102,7 @@ class SbomReporter(Reporter): def __init__(self, manifest: Manifest) -> None: """Start the report.""" - super().__init__(manifest) + self._manifest = manifest self._bom = Bom() self._bom.metadata.tools.components.add(self.dfetch_tool) self._bom.metadata.tools.components.add(cdx_lib_component()) @@ -120,7 +120,7 @@ def add_project( name = project.name if purl.type == "generic" else purl.name - line_nr, start, _ = self._manifest.find_name_in_manifest(project.name) + location = self._manifest.find_name_in_manifest(project.name) component = Component( name=name, @@ -130,7 +130,11 @@ def add_project( purl=purl, evidence=ComponentEvidence( occurrences=[ - Occurrence(location=self._manifest.path, line=line_nr, offset=start) + Occurrence( + location=self._manifest.path, + line=location.line_number, + offset=location.start, + ) ], identity=[ Identity( diff --git a/dfetch/reporting/stdout_reporter.py b/dfetch/reporting/stdout_reporter.py index 868c2ffd..d5c2e214 100644 --- a/dfetch/reporting/stdout_reporter.py +++ b/dfetch/reporting/stdout_reporter.py @@ -7,6 +7,7 @@ from typing import List from dfetch.log import get_logger +from dfetch.manifest.manifest import Manifest from dfetch.manifest.project import ProjectEntry from dfetch.project.metadata import Metadata from dfetch.reporting.reporter import Reporter @@ -20,6 +21,10 @@ class StdoutReporter(Reporter): name = "stdout" + def __init__(self, manifest: Manifest) -> None: + """Initialize the reporter.""" + del manifest + def add_project( self, project: ProjectEntry, diff --git a/dfetch/util/license.py b/dfetch/util/license.py index 0bdb0d4b..f85a851d 100644 --- a/dfetch/util/license.py +++ b/dfetch/util/license.py @@ -7,21 +7,36 @@ import infer_license from infer_license.types import License as InferredLicense +# Limit the max size of alicense file to parse +MAX_LICENSE_FILE_SIZE = 1024 * 1024 + @dataclass class License: - """Class to hold license information.""" + """Represents a software license with its SPDX identifiers and detection confidence. + + This class encapsulates license information detected by the infer-license library, + providing standardized identifiers and confidence level of the detection. + """ - name: str # SPDX Full name - spdx_id: str # SPDX Identifier - trove_classifier: Optional[str] # Python package classifier - probability: float # Confidence level of the license inference + name: str #: SPDX Full name + spdx_id: str #: SPDX Identifier + trove_classifier: Optional[str] #: Python package classifier + probability: float #: Confidence level of the license inference @staticmethod def from_inferred( inferred_license: InferredLicense, probability: float ) -> "License": - """Create License from an InferredLicense.""" + """Convert an infer-license License object to our internal License representation. + + Args: + inferred_license: The license object from infer-license library + probability: The confidence score (0-1) of the license detection + + Returns: + License: A new License instance with the inferred information + """ return License( name=inferred_license.name, spdx_id=inferred_license.shortname, @@ -43,7 +58,7 @@ def guess_license_in_file( """ try: with open(filename, "rb") as f: - file_bytes = f.read() + file_bytes = f.read(MAX_LICENSE_FILE_SIZE) try: license_text = file_bytes.decode("utf-8") except UnicodeDecodeError: From f82fb0d4475ecadac97f3158fe0949976f766fd6 Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 10 Oct 2025 18:49:04 +0000 Subject: [PATCH 10/13] Fix sbom feature test --- dfetch/manifest/manifest.py | 8 + dfetch/reporting/sbom_reporter.py | 2 +- features/report-sbom.feature | 240 ++++++++++++++++++++++-------- features/steps/generic_steps.py | 25 +--- 4 files changed, 192 insertions(+), 83 deletions(-) diff --git a/dfetch/manifest/manifest.py b/dfetch/manifest/manifest.py index 4e85f641..db905030 100644 --- a/dfetch/manifest/manifest.py +++ b/dfetch/manifest/manifest.py @@ -123,6 +123,9 @@ def __init__( self.__version: str = str(manifest.get("version", self.CURRENT_VERSION)) self.__text: str = text if text else "" self.__path: str = str(path) if path else "" + self.__relative_path: str = ( + os.path.relpath(self.__path, os.getcwd()) if self.__path else "" + ) self._remotes, default_remotes = self._determine_remotes( manifest.get("remotes", []) @@ -248,6 +251,11 @@ def path(self) -> str: """Path to the manifest file.""" return self.__path + @property + def relative_path(self) -> str: + """Path to the manifest file relative to the current working directory.""" + return self.__relative_path + @property def version(self) -> str: """Version of the manifest file.""" diff --git a/dfetch/reporting/sbom_reporter.py b/dfetch/reporting/sbom_reporter.py index 35170e58..e3b83fea 100644 --- a/dfetch/reporting/sbom_reporter.py +++ b/dfetch/reporting/sbom_reporter.py @@ -131,7 +131,7 @@ def add_project( evidence=ComponentEvidence( occurrences=[ Occurrence( - location=self._manifest.path, + location=self._manifest.relative_path, line=location.line_number, offset=location.start, ) diff --git a/features/report-sbom.feature b/features/report-sbom.feature index c139f362..19cd0f7b 100644 --- a/features/report-sbom.feature +++ b/features/report-sbom.feature @@ -1,3 +1,4 @@ +@wip Feature: Create an CycloneDX sbom *Dfetch* can generate a software Bill-of-Materials (SBOM). @@ -19,78 +20,75 @@ Feature: Create an CycloneDX sbom url: https://github.com/cpputest/cpputest tag: v3.4 src: 'include/CppUTest' - """ And all projects are updated When I run "dfetch report -t sbom" Then the 'report.json' file contains """ - { - "$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json", - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "serialNumber": "urn:uuid:3ce78767-c202-4beb-935e-67f539cf3a58", - "version": 1, - "dependencies": [ + { + "components": [ { - "ref": "BomRef.7805091949677974.3172811758515278" - } - ], - "metadata": { - "timestamp": "2025-10-03T20:56:03.645362+00:00", - "tools": [ - { - "vendor": "dfetch-org", - "name": "dfetch", - "version": "0.10.0" - }, - { - "vendor": "CycloneDX", - "name": "cyclonedx-python-lib", - "version": "11.1.0", - "externalReferences": [ - { - "url": "https://pypi.org/project/cyclonedx-python-lib/", - "type": "distribution" - }, - { - "url": "https://github.com/CycloneDX/cyclonedx-python-lib/#readme", - "type": "website" - }, + "bom-ref": "cpputest-v3.4", + "evidence": { + "identity": [ { - "url": "https://github.com/CycloneDX/cyclonedx-python-lib/actions", - "type": "build-system" + "concludedValue": "cpputest", + "field": "name", + "methods": [ + { + "confidence": 0.4, + "technique": "manifest-analysis", + "value": "Name as used for project in dfetch.yaml" + } + ], + "tools": [ + "dfetch-0.10.0" + ] }, { - "url": "https://github.com/CycloneDX/cyclonedx-python-lib/blob/main/LICENSE", - "type": "license" + "concludedValue": "pkg:github/cpputest/cpputest@v3.4#include/CppUTest", + "field": "purl", + "methods": [ + { + "confidence": 0.4, + "technique": "manifest-analysis", + "value": "Determined from https://github.com/cpputest/cpputest as used for the project cpputest in dfetch.yaml" + } + ], + "tools": [ + "dfetch-0.10.0" + ] }, { - "url": "https://github.com/CycloneDX/cyclonedx-python-lib/blob/main/CHANGELOG.md", - "type": "release-notes" - }, - { - "url": "https://cyclonedx-python-library.readthedocs.io/", - "type": "documentation" - }, + "concludedValue": "v3.4", + "field": "version", + "methods": [ + { + "confidence": 0.4, + "technique": "manifest-analysis", + "value": "Version as used for project in dfetch.yaml" + } + ], + "tools": [ + "dfetch-0.10.0" + ] + } + ], + "licenses": [ { - "url": "https://github.com/CycloneDX/cyclonedx-python-lib/issues", - "type": "issue-tracker" - }, + "license": { + "id": "BSD-3-Clause" + } + } + ], + "occurrences": [ { - "url": "https://github.com/CycloneDX/cyclonedx-python-lib", - "type": "vcs" + "line": 5, + "location": "dfetch.yaml", + "offset": 13 } ] - } - ] - }, - "components": [ - { - "type": "library", - "bom-ref": "BomRef.7805091949677974.3172811758515278", - "name": "cpputest", - "version": "v3.4", + }, "externalReferences": [ { "type": "vcs", @@ -99,11 +97,133 @@ Feature: Create an CycloneDX sbom ], "licenses": [ { - "expression": "BSD 3-Clause \"New\" or \"Revised\" License" + "license": { + "id": "BSD-3-Clause" + } } ], - "purl": "pkg:github/cpputest/cpputest@v3.4#include/CppUTest" + "name": "cpputest", + "purl": "pkg:github/cpputest/cpputest@v3.4#include/CppUTest", + "type": "library", + "version": "v3.4" + } + ], + "dependencies": [ + { + "ref": "cpputest-v3.4" } - ] + ], + "metadata": { + "timestamp": "2025-10-10T18:28:32.074803+00:00", + "tools": { + "components": [ + { + "bom-ref": "dfetch-0.10.0", + "externalReferences": [ + { + "type": "build-system", + "url": "https://github.com/dfetch-org/dfetch/actions" + }, + { + "type": "distribution", + "url": "https://pypi.org/project/dfetch/" + }, + { + "type": "documentation", + "url": "https://dfetch.readthedocs.io/" + }, + { + "type": "issue-tracker", + "url": "https://github.com/dfetch-org/dfetch/issues" + }, + { + "type": "license", + "url": "https://github.com/dfetch-org/dfetch/blob/main/LICENSE" + }, + { + "type": "release-notes", + "url": "https://github.com/dfetch-org/dfetch/blob/main/CHANGELOG.rst" + }, + { + "type": "vcs", + "url": "https://github.com/dfetch-org/dfetch" + }, + { + "type": "website", + "url": "https://dfetch-org.github.io/" + } + ], + "licenses": [ + { + "license": { + "acknowledgement": "declared", + "id": "MIT" + } + } + ], + "name": "dfetch", + "supplier": { + "name": "dfetch-org" + }, + "type": "application", + "version": "0.10.0" + }, + { + "description": "Python library for CycloneDX", + "externalReferences": [ + { + "type": "build-system", + "url": "https://github.com/CycloneDX/cyclonedx-python-lib/actions" + }, + { + "type": "distribution", + "url": "https://pypi.org/project/cyclonedx-python-lib/" + }, + { + "type": "documentation", + "url": "https://cyclonedx-python-library.readthedocs.io/" + }, + { + "type": "issue-tracker", + "url": "https://github.com/CycloneDX/cyclonedx-python-lib/issues" + }, + { + "type": "license", + "url": "https://github.com/CycloneDX/cyclonedx-python-lib/blob/main/LICENSE" + }, + { + "type": "release-notes", + "url": "https://github.com/CycloneDX/cyclonedx-python-lib/blob/main/CHANGELOG.md" + }, + { + "type": "vcs", + "url": "https://github.com/CycloneDX/cyclonedx-python-lib" + }, + { + "type": "website", + "url": "https://github.com/CycloneDX/cyclonedx-python-lib/#readme" + } + ], + "group": "CycloneDX", + "licenses": [ + { + "license": { + "acknowledgement": "declared", + "id": "Apache-2.0" + } + } + ], + "name": "cyclonedx-python-lib", + "type": "library", + "version": "11.1.0" + } + ] + } + }, + "serialNumber": "urn:uuid:7621038e-3047-4862-99e7-d637ee9458a9", + "version": 1, + "$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "bomFormat": "CycloneDX", + "specVersion": "1.6" } """ diff --git a/features/steps/generic_steps.py b/features/steps/generic_steps.py index 93da62c0..9ed66e4b 100644 --- a/features/steps/generic_steps.py +++ b/features/steps/generic_steps.py @@ -9,7 +9,7 @@ import pathlib import re from itertools import zip_longest -from typing import Iterable, List, Optional, Pattern, Tuple +from typing import Iterable, List, Optional, Pattern, Tuple, Union from behave import given, then, when # pylint: disable=no-name-in-module from behave.runner import Context @@ -57,38 +57,19 @@ def check_file_exists(path): assert os.path.isfile(path), f"Expected {path} to exist, but it didn't!" -def check_json(path, content): - """Check a file.""" +def check_json(path: Union[str, os.PathLike], content: str) -> None: + """Check a JSON file.""" with open(path, "r", encoding="UTF-8") as file_to_check: actual_json = json.load(file_to_check) expected_json = json.loads(content) - if "bomFormat" in expected_json: - sort_sbom(expected_json) - if "bomFormat" in actual_json: - sort_sbom(actual_json) - check_content( json.dumps(expected_json, indent=4, sort_keys=True).splitlines(), json.dumps(actual_json, indent=4, sort_keys=True).splitlines(), ) -def sort_sbom(sbom): - """Sort some fields in an sbom.""" - - for tool in sbom["metadata"]["tools"]: - if "externalReferences" in tool: - tool["externalReferences"] = sorted( - tool["externalReferences"], key=lambda x: x["type"] - ) - - sbom["metadata"]["tools"] = sorted( - sbom["metadata"]["tools"], key=lambda x: x["name"] - ) - - def check_content( expected_content: Iterable[str], actual_content: Iterable[str] ) -> None: From 579e96f7c781466a8e9dbf73dc6d4c7ea29733d7 Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 10 Oct 2025 18:54:04 +0000 Subject: [PATCH 11/13] Improve tests --- features/report-sbom.feature | 3 +-- tests/test_manifest.py | 3 +-- tests/test_report.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/features/report-sbom.feature b/features/report-sbom.feature index 19cd0f7b..1afac93d 100644 --- a/features/report-sbom.feature +++ b/features/report-sbom.feature @@ -1,9 +1,8 @@ -@wip Feature: Create an CycloneDX sbom *Dfetch* can generate a software Bill-of-Materials (SBOM). - An SBOM lists the components and their supply chain relationships. Downstream + This SBOM lists the components and their supply chain relationships. Downstream users of the software can assess the licenses used and potential risk of dependencies. The generated SBOM can be used as input for other tools to monitor dependencies. diff --git a/tests/test_manifest.py b/tests/test_manifest.py index bd86ecb2..7f2a8093 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -8,7 +8,6 @@ import pytest -import dfetch.manifest.manifest from dfetch import DEFAULT_MANIFEST_NAME from dfetch.manifest.manifest import ( Manifest, @@ -73,7 +72,7 @@ def test_no_projects() -> None: """Test that manifest without projects cannot be read.""" with pytest.raises(KeyError): - manifest = given_manifest_from_text(MANIFEST_NO_PROJECTS) + given_manifest_from_text(MANIFEST_NO_PROJECTS) def test_no_remotes() -> None: diff --git a/tests/test_report.py b/tests/test_report.py index d256b3cf..635839e4 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -4,7 +4,7 @@ # flake8: noqa import argparse -from unittest.mock import mock_open, patch +from unittest.mock import patch import pytest From b1d5dcb7a45634e2984ad76f2de66cc17754f95e Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 10 Oct 2025 19:09:54 +0000 Subject: [PATCH 12/13] Korbit review changes --- dfetch/commands/report.py | 2 +- dfetch/reporting/reporter.py | 7 ++++++- dfetch/reporting/sbom_reporter.py | 6 +++--- dfetch/reporting/stdout_reporter.py | 5 ----- dfetch/util/license.py | 13 ++++++------- 5 files changed, 16 insertions(+), 17 deletions(-) diff --git a/dfetch/commands/report.py b/dfetch/commands/report.py index 43478248..c5b03aae 100644 --- a/dfetch/commands/report.py +++ b/dfetch/commands/report.py @@ -20,7 +20,7 @@ logger = get_logger(__name__) -# Minimum confidence to accept a license guess +# Only accept license guesses with below or higher confidence to avoid false positives LICENSE_PROBABILITY_THRESHOLD = 0.80 diff --git a/dfetch/reporting/reporter.py b/dfetch/reporting/reporter.py index d405826c..705be102 100644 --- a/dfetch/reporting/reporter.py +++ b/dfetch/reporting/reporter.py @@ -13,13 +13,18 @@ class Reporter(ABC): name: str = "abstract" - @abstractmethod def __init__(self, manifest: Manifest) -> None: """Create the reporter. Args: manifest (Manifest): The manifest to report on """ + self._manifest = manifest + + @property + def manifest(self) -> Manifest: + """Get the manifest.""" + return self._manifest @abstractmethod def add_project( diff --git a/dfetch/reporting/sbom_reporter.py b/dfetch/reporting/sbom_reporter.py index e3b83fea..fcb02f82 100644 --- a/dfetch/reporting/sbom_reporter.py +++ b/dfetch/reporting/sbom_reporter.py @@ -102,7 +102,7 @@ class SbomReporter(Reporter): def __init__(self, manifest: Manifest) -> None: """Start the report.""" - self._manifest = manifest + super().__init__(manifest) self._bom = Bom() self._bom.metadata.tools.components.add(self.dfetch_tool) self._bom.metadata.tools.components.add(cdx_lib_component()) @@ -120,7 +120,7 @@ def add_project( name = project.name if purl.type == "generic" else purl.name - location = self._manifest.find_name_in_manifest(project.name) + location = self.manifest.find_name_in_manifest(project.name) component = Component( name=name, @@ -131,7 +131,7 @@ def add_project( evidence=ComponentEvidence( occurrences=[ Occurrence( - location=self._manifest.relative_path, + location=self.manifest.relative_path, line=location.line_number, offset=location.start, ) diff --git a/dfetch/reporting/stdout_reporter.py b/dfetch/reporting/stdout_reporter.py index d5c2e214..868c2ffd 100644 --- a/dfetch/reporting/stdout_reporter.py +++ b/dfetch/reporting/stdout_reporter.py @@ -7,7 +7,6 @@ from typing import List from dfetch.log import get_logger -from dfetch.manifest.manifest import Manifest from dfetch.manifest.project import ProjectEntry from dfetch.project.metadata import Metadata from dfetch.reporting.reporter import Reporter @@ -21,10 +20,6 @@ class StdoutReporter(Reporter): name = "stdout" - def __init__(self, manifest: Manifest) -> None: - """Initialize the reporter.""" - del manifest - def add_project( self, project: ProjectEntry, diff --git a/dfetch/util/license.py b/dfetch/util/license.py index f85a851d..47a2bb61 100644 --- a/dfetch/util/license.py +++ b/dfetch/util/license.py @@ -7,8 +7,8 @@ import infer_license from infer_license.types import License as InferredLicense -# Limit the max size of alicense file to parse -MAX_LICENSE_FILE_SIZE = 1024 * 1024 +# Limit license file size to below number of bytes to prevent memory issues with large files +MAX_LICENSE_FILE_SIZE = 1024 * 1024 # 1 MB @dataclass @@ -50,6 +50,9 @@ def guess_license_in_file( ) -> Optional[License]: """Attempt to identify the license of a given file. + Tries UTF-8 encoding first, falling back to Latin-1 for legacy license files. + If the file cannot be read or no license is detected, returns None. + Args: filename (Union[str, os.PathLike[str]]): Path to the file to analyze @@ -63,11 +66,7 @@ def guess_license_in_file( license_text = file_bytes.decode("utf-8") except UnicodeDecodeError: license_text = file_bytes.decode("latin-1") - except (FileNotFoundError, PermissionError, IsADirectoryError): - # Return None for file access issues - return None - except OSError: - # Handle other OS-level file errors + except (FileNotFoundError, PermissionError, IsADirectoryError, OSError): return None probable_licenses = infer_license.api.probabilities(license_text) From 61b2cb4f7cd4c9fe3e834465dc9fc2153da85031 Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 10 Oct 2025 19:13:59 +0000 Subject: [PATCH 13/13] Update changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ee7cb14a..b12b51a1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ Release 0.11.0 (unreleased) * Add security policy (#784) * Add provenance / release attestation to pypi package (#784) * Support multiple licenses per project (#788) +* Add evidence to sbom report (#788) Release 0.10.0 (released 2025-03-12) ====================================