diff --git a/tests/test_json_steps.py b/tests/test_json_steps.py new file mode 100644 index 00000000..937025da --- /dev/null +++ b/tests/test_json_steps.py @@ -0,0 +1,327 @@ +"""Unit tests for features/steps/json_steps.py – _json_subset_matches and _normalise_json.""" + +# mypy: ignore-errors +# flake8: noqa + +import json +import os + +import pytest + +from features.steps.json_steps import _json_subset_matches, _normalise_json, check_json_subset + + +# --------------------------------------------------------------------------- +# _normalise_json +# --------------------------------------------------------------------------- + + +def test_normalise_json_replaces_iso_timestamp(): + result = _normalise_json("2024-01-15T12:34:56.789012+00:00") + assert result == "[timestamp]" + + +def test_normalise_json_replaces_urn_uuid(): + result = _normalise_json("urn:uuid:550e8400-e29b-41d4-a716-446655440000") + assert result == "[urn-uuid]" + + +def test_normalise_json_preserves_regular_string(): + result = _normalise_json("MIT") + assert result == "MIT" + + +def test_normalise_json_preserves_empty_string(): + result = _normalise_json("") + assert result == "" + + +def test_normalise_json_preserves_numeric(): + result = _normalise_json(42) + assert result == 42 + + +def test_normalise_json_preserves_boolean(): + assert _normalise_json(True) is True + assert _normalise_json(False) is False + + +def test_normalise_json_preserves_none(): + assert _normalise_json(None) is None + + +def test_normalise_json_normalises_nested_dict(): + obj = { + "name": "test", + "timestamp": "2024-01-15T12:34:56.789012+00:00", + "serial": "urn:uuid:550e8400-e29b-41d4-a716-446655440000", + } + result = _normalise_json(obj) + assert result["name"] == "test" + assert result["timestamp"] == "[timestamp]" + assert result["serial"] == "[urn-uuid]" + + +def test_normalise_json_normalises_list_elements(): + items = [ + "2024-01-15T12:34:56.789012+00:00", + "plain string", + "urn:uuid:550e8400-e29b-41d4-a716-446655440000", + ] + result = _normalise_json(items) + assert result[0] == "[timestamp]" + assert result[1] == "plain string" + assert result[2] == "[urn-uuid]" + + +def test_normalise_json_nested_list_in_dict(): + obj = { + "items": [ + {"ts": "2024-01-15T12:34:56.789012+00:00"}, + {"name": "keep"}, + ] + } + result = _normalise_json(obj) + assert result["items"][0]["ts"] == "[timestamp]" + assert result["items"][1]["name"] == "keep" + + +def test_normalise_json_string_not_matching_patterns_unchanged(): + # Close but not matching timestamp format + result = _normalise_json("2024-01-15 12:34:56") + assert result == "2024-01-15 12:34:56" + + +# --------------------------------------------------------------------------- +# _json_subset_matches – scalar matching +# --------------------------------------------------------------------------- + + +def test_json_subset_matches_equal_scalars(): + assert _json_subset_matches("MIT", "MIT") is True + + +def test_json_subset_matches_unequal_scalars(): + assert _json_subset_matches("MIT", "Apache-2.0") is False + + +def test_json_subset_matches_numeric(): + assert _json_subset_matches(42, 42) is True + assert _json_subset_matches(42, 43) is False + + +# --------------------------------------------------------------------------- +# _json_subset_matches – dict matching +# --------------------------------------------------------------------------- + + +def test_json_subset_matches_exact_dict(): + assert _json_subset_matches({"a": 1}, {"a": 1}) is True + + +def test_json_subset_matches_dict_subset(): + """Expected is a strict subset of actual – should match.""" + assert _json_subset_matches({"a": 1}, {"a": 1, "b": 2}) is True + + +def test_json_subset_matches_dict_missing_key(): + """Expected has a key not present in actual – should not match.""" + assert _json_subset_matches({"a": 1, "c": 3}, {"a": 1, "b": 2}) is False + + +def test_json_subset_matches_dict_wrong_value(): + assert _json_subset_matches({"a": 1}, {"a": 2}) is False + + +def test_json_subset_matches_dict_vs_non_dict(): + assert _json_subset_matches({"a": 1}, [1, 2]) is False + assert _json_subset_matches({"a": 1}, "string") is False + + +def test_json_subset_matches_nested_dict(): + expected = {"outer": {"inner": "value"}} + actual = {"outer": {"inner": "value", "extra": "ignored"}} + assert _json_subset_matches(expected, actual) is True + + +# --------------------------------------------------------------------------- +# _json_subset_matches – list matching (backtracking) +# --------------------------------------------------------------------------- + + +def test_json_subset_matches_list_empty_expected(): + """Empty expected list matches any actual list.""" + assert _json_subset_matches([], [1, 2, 3]) is True + assert _json_subset_matches([], []) is True + + +def test_json_subset_matches_list_exact(): + assert _json_subset_matches([1, 2], [1, 2]) is True + + +def test_json_subset_matches_list_subset_present(): + assert _json_subset_matches([1], [1, 2, 3]) is True + assert _json_subset_matches([2], [1, 2, 3]) is True + assert _json_subset_matches([3], [1, 2, 3]) is True + + +def test_json_subset_matches_list_item_not_present(): + assert _json_subset_matches([4], [1, 2, 3]) is False + + +def test_json_subset_matches_list_vs_non_list(): + assert _json_subset_matches([1], "not a list") is False + assert _json_subset_matches([1], {"a": 1}) is False + + +def test_json_subset_matches_list_backtracking_avoids_greedy_pitfall(): + """The backtracking algorithm should not claim the more-specific item greedily. + + Classic greedy failure: + expected = [{"a": 1}, {"a": 1, "b": 2}] + actual = [{"a": 1, "b": 2}] + + Greedy matching would consume {"a": 1, "b": 2} for the first expected item + (because it is a superset of {"a": 1}), leaving nothing for the second. + With backtracking the algorithm discovers that assigning the single actual + item to the second expected item satisfies both. + """ + expected = [{"a": 1, "b": 2}, {"a": 1}] + actual = [{"a": 1, "b": 2}] + # The second expected is a subset of the only actual item; combined they + # can't both be satisfied since there is only one actual item. + # This should fail because we need two distinct actual items. + assert _json_subset_matches(expected, actual) is False + + +def test_json_subset_matches_list_backtracking_succeeds_when_possible(): + """Both expected items can be matched when actual has enough entries.""" + expected = [{"a": 1, "b": 2}, {"a": 1}] + actual = [{"a": 1, "b": 2}, {"a": 1, "c": 3}] + assert _json_subset_matches(expected, actual) is True + + +def test_json_subset_matches_list_order_independent(): + """Items in the expected list don't have to appear in the same order as actual.""" + expected = [{"name": "B"}, {"name": "A"}] + actual = [{"name": "A", "val": 1}, {"name": "B", "val": 2}] + assert _json_subset_matches(expected, actual) is True + + +def test_json_subset_matches_list_of_dicts_subset(): + """Each expected dict is checked as a subset of a corresponding actual dict.""" + expected = [{"id": "MIT"}, {"id": "Apache-2.0"}] + actual = [ + {"id": "MIT", "name": "MIT License"}, + {"id": "Apache-2.0", "name": "Apache License 2.0"}, + {"id": "GPL-2.0"}, + ] + assert _json_subset_matches(expected, actual) is True + + +def test_json_subset_matches_list_each_actual_used_once(): + """The same actual item cannot be used to satisfy two expected items.""" + expected = [{"id": "MIT"}, {"id": "MIT"}] + actual = [{"id": "MIT"}] + # Only one MIT in actual; can't satisfy two expected MITs + assert _json_subset_matches(expected, actual) is False + + +def test_json_subset_matches_deeply_nested(): + expected = { + "components": [ + { + "name": "my-lib", + "licenses": [{"license": {"id": "MIT"}}], + } + ] + } + actual = { + "components": [ + { + "name": "my-lib", + "version": "1.0", + "licenses": [ + {"license": {"id": "MIT", "name": "MIT License"}}, + ], + } + ] + } + assert _json_subset_matches(expected, actual) is True + + +def test_json_subset_matches_deeply_nested_mismatch(): + expected = { + "components": [ + { + "name": "my-lib", + "licenses": [{"license": {"id": "GPL-2.0"}}], + } + ] + } + actual = { + "components": [ + { + "name": "my-lib", + "licenses": [{"license": {"id": "MIT"}}], + } + ] + } + assert _json_subset_matches(expected, actual) is False + + +# --------------------------------------------------------------------------- +# check_json_subset – integration with file I/O and substitutions +# --------------------------------------------------------------------------- + + +def test_check_json_subset_passes_when_subset_matches(tmp_path): + actual = {"name": "SomeProject", "version": "1.0", "extra": "ignored"} + json_file = tmp_path / "report.json" + json_file.write_text(json.dumps(actual), encoding="utf-8") + + context = object() # no archive substitutions needed + # Should not raise + check_json_subset(str(json_file), '{"name": "SomeProject"}', context) + + +def test_check_json_subset_raises_when_not_subset(tmp_path): + actual = {"name": "SomeProject"} + json_file = tmp_path / "report.json" + json_file.write_text(json.dumps(actual), encoding="utf-8") + + context = object() + with pytest.raises(AssertionError, match="JSON subset mismatch"): + check_json_subset(str(json_file), '{"name": "OtherProject"}', context) + + +def test_check_json_subset_normalises_timestamps(tmp_path): + """Timestamps in actual are normalised so the expected placeholder matches.""" + actual = { + "metadata": {"timestamp": "2024-06-01T10:00:00.000000+00:00"}, + "name": "test", + } + json_file = tmp_path / "report.json" + json_file.write_text(json.dumps(actual), encoding="utf-8") + + context = object() + # Use a different timestamp value; after normalisation both become [timestamp] + check_json_subset( + str(json_file), + '{"metadata": {"timestamp": "2024-01-01T00:00:00.000000+00:00"}}', + context, + ) + + +def test_check_json_subset_substitutes_archive_url(tmp_path): + """ placeholder is replaced from context.archive_url.""" + actual = {"url": "http://example.com/archive.tar.gz"} + json_file = tmp_path / "report.json" + json_file.write_text(json.dumps(actual), encoding="utf-8") + + class FakeContext: + archive_url = "http://example.com/archive.tar.gz" + + check_json_subset( + str(json_file), '{"url": ""}', FakeContext() + ) \ No newline at end of file diff --git a/tests/test_license.py b/tests/test_license.py new file mode 100644 index 00000000..9667065f --- /dev/null +++ b/tests/test_license.py @@ -0,0 +1,177 @@ +"""Unit tests for dfetch.util.license – LicenseScanResult and License changes.""" + +# mypy: ignore-errors +# flake8: noqa + +from unittest.mock import MagicMock, patch + +import pytest + +from dfetch.util.license import License, LicenseScanResult, guess_license_in_file + + +# --------------------------------------------------------------------------- +# LicenseScanResult – dataclass defaults and states +# --------------------------------------------------------------------------- + + +def test_license_scan_result_defaults(): + """Default LicenseScanResult has safe/empty values.""" + result = LicenseScanResult() + assert result.identified == [] + assert result.unclassified_files == [] + assert result.was_scanned is False + assert result.threshold == 0.0 + + +def test_license_scan_result_not_scanned(): + """was_scanned=False represents a project that was never fetched.""" + result = LicenseScanResult(was_scanned=False) + assert not result.was_scanned + assert result.identified == [] + assert result.unclassified_files == [] + + +def test_license_scan_result_identified(): + """was_scanned=True with identified licenses is the happy path.""" + lic = License(name="MIT License", spdx_id="MIT", trove_classifier=None, probability=0.95) + result = LicenseScanResult( + identified=[lic], + was_scanned=True, + threshold=0.80, + ) + assert result.was_scanned is True + assert len(result.identified) == 1 + assert result.identified[0].spdx_id == "MIT" + assert result.unclassified_files == [] + assert result.threshold == 0.80 + + +def test_license_scan_result_unclassified(): + """was_scanned=True with unclassified_files and no identified is the unclassifiable case.""" + result = LicenseScanResult( + unclassified_files=["LICENSE"], + was_scanned=True, + threshold=0.80, + ) + assert result.was_scanned is True + assert result.identified == [] + assert result.unclassified_files == ["LICENSE"] + + +def test_license_scan_result_no_license_file(): + """was_scanned=True with empty identified and empty unclassified_files means no file found.""" + result = LicenseScanResult(was_scanned=True, threshold=0.80) + assert result.was_scanned is True + assert result.identified == [] + assert result.unclassified_files == [] + + +def test_license_scan_result_multiple_identified(): + """Multiple identified licenses are all stored.""" + lic1 = License(name="MIT License", spdx_id="MIT", trove_classifier=None, probability=0.95) + lic2 = License(name="Apache 2.0", spdx_id="Apache-2.0", trove_classifier=None, probability=0.90) + result = LicenseScanResult(identified=[lic1, lic2], was_scanned=True) + assert len(result.identified) == 2 + + +def test_license_scan_result_multiple_unclassified(): + """Multiple unclassified files are all stored.""" + result = LicenseScanResult( + unclassified_files=["LICENSE", "COPYING"], + was_scanned=True, + ) + assert len(result.unclassified_files) == 2 + + +# --------------------------------------------------------------------------- +# License.from_inferred – text field propagation +# --------------------------------------------------------------------------- + + +def _make_inferred_license(name="MIT License", shortname="MIT", trove="License :: OSI Approved :: MIT License"): + """Build a minimal infer-license InferredLicense-like mock.""" + mock = MagicMock() + mock.name = name + mock.shortname = shortname + mock.trove_classifier = trove + return mock + + +def test_license_from_inferred_without_text(): + """License.from_inferred without text leaves text as None.""" + inferred = _make_inferred_license() + lic = License.from_inferred(inferred, probability=0.92) + assert lic.text is None + assert lic.name == "MIT License" + assert lic.spdx_id == "MIT" + assert lic.probability == 0.92 + assert lic.trove_classifier == "License :: OSI Approved :: MIT License" + + +def test_license_from_inferred_with_text(): + """License.from_inferred stores provided license text.""" + inferred = _make_inferred_license() + raw_text = "MIT License\n\nCopyright (c) 2024 Test\n" + lic = License.from_inferred(inferred, probability=0.95, text=raw_text) + assert lic.text == raw_text + + +def test_license_from_inferred_with_none_text_explicitly(): + """Explicitly passing text=None behaves the same as omitting it.""" + inferred = _make_inferred_license() + lic = License.from_inferred(inferred, probability=0.85, text=None) + assert lic.text is None + + +def test_license_from_inferred_probability_stored(): + """Probability is stored verbatim on the License.""" + inferred = _make_inferred_license() + lic = License.from_inferred(inferred, probability=0.80) + assert lic.probability == 0.80 + + +def test_license_text_field_default_on_direct_construction(): + """License constructed directly has text=None by default.""" + lic = License(name="MIT License", spdx_id="MIT", trove_classifier=None, probability=0.9) + assert lic.text is None + + +def test_license_text_field_can_be_set_directly(): + """License.text can be set during direct construction.""" + lic = License( + name="MIT License", spdx_id="MIT", trove_classifier=None, probability=0.9, text="some text" + ) + assert lic.text == "some text" + + +# --------------------------------------------------------------------------- +# guess_license_in_file – text is forwarded to returned License +# --------------------------------------------------------------------------- + + +def test_guess_license_in_file_returns_text(tmp_path): + """guess_license_in_file passes the file text to License.from_inferred.""" + license_file = tmp_path / "LICENSE" + license_text = "MIT License\nPermission is hereby granted..." + license_file.write_text(license_text, encoding="utf-8") + + mock_inferred = _make_inferred_license() + with patch("dfetch.util.license.infer_license.api.probabilities") as mock_prob: + mock_prob.return_value = [(mock_inferred, 0.95)] + result = guess_license_in_file(str(license_file)) + + assert result is not None + assert result.text == license_text + + +def test_guess_license_in_file_returns_none_when_no_probabilities(tmp_path): + """guess_license_in_file returns None when infer_license returns empty list.""" + license_file = tmp_path / "LICENSE" + license_file.write_text("some random text", encoding="utf-8") + + with patch("dfetch.util.license.infer_license.api.probabilities") as mock_prob: + mock_prob.return_value = [] + result = guess_license_in_file(str(license_file)) + + assert result is None \ No newline at end of file diff --git a/tests/test_report.py b/tests/test_report.py index 55d0cbe1..887af5bb 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -4,12 +4,15 @@ # flake8: noqa import argparse +import os from pathlib import Path -from unittest.mock import Mock, patch +from unittest.mock import Mock, MagicMock, patch, call import pytest -from dfetch.commands.report import Report, ReportTypes +from dfetch.commands.report import Report, ReportTypes, LICENSE_PROBABILITY_THRESHOLD +from dfetch.manifest.project import ProjectEntry +from dfetch.util.license import License, LicenseScanResult from tests.manifest_mock import mock_manifest DEFAULT_ARGS = argparse.Namespace() @@ -45,3 +48,149 @@ def test_report(name, projects): mocked_print_info_line.assert_any_call(project["name"], "") else: mocked_print_info_line.assert_not_called() + + +# --------------------------------------------------------------------------- +# Report._determine_licenses – LicenseScanResult return values +# --------------------------------------------------------------------------- + + +def _make_project_entry(name: str = "my-project", destination: str = "/some/path") -> Mock: + p = Mock(spec=ProjectEntry) + p.name = name + p.destination = destination + return p + + +def test_determine_licenses_returns_not_scanned_when_destination_missing(): + """Returns LicenseScanResult(was_scanned=False) when project destination doesn't exist.""" + project = _make_project_entry(destination="/nonexistent/path") + with patch("os.path.exists", return_value=False): + result = Report._determine_licenses(project) + assert isinstance(result, LicenseScanResult) + assert result.was_scanned is False + assert result.identified == [] + assert result.unclassified_files == [] + + +def test_determine_licenses_returns_scanned_true_when_destination_exists(tmp_path): + """Returns was_scanned=True when destination exists.""" + dest = tmp_path / "proj" + dest.mkdir() + project = _make_project_entry(destination=str(dest)) + + with patch("dfetch.util.license.infer_license.api.probabilities", return_value=[]): + result = Report._determine_licenses(project) + + assert result.was_scanned is True + + +def test_determine_licenses_sets_threshold(tmp_path): + """Returned LicenseScanResult.threshold matches LICENSE_PROBABILITY_THRESHOLD.""" + dest = tmp_path / "proj" + dest.mkdir() + project = _make_project_entry(destination=str(dest)) + + with patch("dfetch.util.license.infer_license.api.probabilities", return_value=[]): + result = Report._determine_licenses(project) + + assert result.threshold == LICENSE_PROBABILITY_THRESHOLD + + +def test_determine_licenses_identifies_license_above_threshold(tmp_path): + """A license file whose probability >= threshold ends up in identified.""" + dest = tmp_path / "proj" + dest.mkdir() + (dest / "LICENSE").write_text("MIT License\nCopyright 2024", encoding="utf-8") + project = _make_project_entry(destination=str(dest)) + + mock_inferred = MagicMock() + mock_inferred.name = "MIT License" + mock_inferred.shortname = "MIT" + mock_inferred.trove_classifier = None + + with patch( + "dfetch.util.license.infer_license.api.probabilities", + return_value=[(mock_inferred, LICENSE_PROBABILITY_THRESHOLD)], + ): + result = Report._determine_licenses(project) + + assert len(result.identified) == 1 + assert result.identified[0].spdx_id == "MIT" + assert result.unclassified_files == [] + + +def test_determine_licenses_rejects_license_below_threshold(tmp_path): + """A license file whose probability < threshold ends up in unclassified_files.""" + dest = tmp_path / "proj" + dest.mkdir() + (dest / "LICENSE").write_text("Some vague text", encoding="utf-8") + project = _make_project_entry(destination=str(dest)) + + mock_inferred = MagicMock() + mock_inferred.name = "MIT License" + mock_inferred.shortname = "MIT" + mock_inferred.trove_classifier = None + + with patch( + "dfetch.util.license.infer_license.api.probabilities", + return_value=[(mock_inferred, LICENSE_PROBABILITY_THRESHOLD - 0.01)], + ): + result = Report._determine_licenses(project) + + assert result.identified == [] + assert "LICENSE" in result.unclassified_files + + +def test_determine_licenses_no_license_file(tmp_path): + """When no license file exists, both identified and unclassified_files are empty.""" + dest = tmp_path / "proj" + dest.mkdir() + (dest / "README.md").write_text("# My Project", encoding="utf-8") + project = _make_project_entry(destination=str(dest)) + + result = Report._determine_licenses(project) + + assert result.was_scanned is True + assert result.identified == [] + assert result.unclassified_files == [] + + +def test_determine_licenses_unclassifiable_file_no_probabilities(tmp_path): + """When infer_license returns nothing, the file goes to unclassified_files.""" + dest = tmp_path / "proj" + dest.mkdir() + (dest / "LICENSE").write_text("No recognizable license text here", encoding="utf-8") + project = _make_project_entry(destination=str(dest)) + + with patch( + "dfetch.util.license.infer_license.api.probabilities", + return_value=[], + ): + result = Report._determine_licenses(project) + + assert result.identified == [] + assert "LICENSE" in result.unclassified_files + + +def test_determine_licenses_threshold_boundary_exact_match(tmp_path): + """A probability exactly equal to the threshold is accepted (>= not >).""" + dest = tmp_path / "proj" + dest.mkdir() + (dest / "LICENSE").write_text("MIT License text", encoding="utf-8") + project = _make_project_entry(destination=str(dest)) + + mock_inferred = MagicMock() + mock_inferred.name = "MIT License" + mock_inferred.shortname = "MIT" + mock_inferred.trove_classifier = None + + # Exactly at threshold: should be identified, not rejected + with patch( + "dfetch.util.license.infer_license.api.probabilities", + return_value=[(mock_inferred, 0.80)], + ): + result = Report._determine_licenses(project) + + assert len(result.identified) == 1 + assert result.unclassified_files == [] \ No newline at end of file diff --git a/tests/test_sbom_reporter.py b/tests/test_sbom_reporter.py new file mode 100644 index 00000000..fbbc446c --- /dev/null +++ b/tests/test_sbom_reporter.py @@ -0,0 +1,455 @@ +"""Unit tests for dfetch.reporting.sbom_reporter – new license handling.""" + +# mypy: ignore-errors +# flake8: noqa + +import base64 +from unittest.mock import patch + +import pytest + +from cyclonedx.model import AttachedText, Encoding, Property +from cyclonedx.model.component import Component, ComponentType +from cyclonedx.model.license import ( + DisjunctiveLicense as CycloneDxLicense, + LicenseAcknowledgement, +) + +from dfetch.reporting.sbom_reporter import SbomReporter, _make_license_text_attachment +from dfetch.util.license import License as DfetchLicense, LicenseScanResult + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_license( + name: str = "MIT License", + spdx_id: str = "MIT", + probability: float = 0.95, + text: str | None = None, +) -> DfetchLicense: + return DfetchLicense( + name=name, + spdx_id=spdx_id, + trove_classifier=None, + probability=probability, + text=text, + ) + + +def _make_component() -> Component: + return Component(name="test-component", type=ComponentType.LIBRARY) + + +def _get_property_names(component: Component) -> list[str]: + return [p.name for p in component.properties] + + +def _get_property_value(component: Component, name: str) -> str | None: + for p in component.properties: + if p.name == name: + return p.value + return None + + +def _get_license_ids(component: Component) -> list[str]: + return [lic.id for lic in component.licenses] + + +# --------------------------------------------------------------------------- +# _make_license_text_attachment +# --------------------------------------------------------------------------- + + +def test_make_license_text_attachment_base64_encodes_text(): + """The attachment content must be the base64 encoding of the input text.""" + text = "MIT License\nCopyright 2024" + attachment = _make_license_text_attachment(text) + expected = base64.b64encode(text.encode("utf-8")).decode("ascii") + assert attachment.content == expected + + +def test_make_license_text_attachment_sets_content_type(): + attachment = _make_license_text_attachment("hello") + assert attachment.content_type == "text/plain" + + +def test_make_license_text_attachment_sets_encoding(): + attachment = _make_license_text_attachment("hello") + assert attachment.encoding == Encoding.BASE_64 + + +def test_make_license_text_attachment_empty_string(): + """Empty string encodes to a valid (empty) base64 string.""" + attachment = _make_license_text_attachment("") + assert attachment.content == base64.b64encode(b"").decode("ascii") + + +def test_make_license_text_attachment_utf8_content(): + """Non-ASCII text is correctly base64-encoded via UTF-8.""" + text = "Lizenz: Urheberrecht © 2024" + attachment = _make_license_text_attachment(text) + expected = base64.b64encode(text.encode("utf-8")).decode("ascii") + assert attachment.content == expected + + +# --------------------------------------------------------------------------- +# SbomReporter._build_cdx_license +# --------------------------------------------------------------------------- + + +def test_build_cdx_license_with_spdx_id(): + """When spdx_id is set it is used as the license id.""" + lic = _make_license(spdx_id="MIT", text=None) + cdx = SbomReporter._build_cdx_license(lic) + assert cdx.id == "MIT" + + +def test_build_cdx_license_with_text_embeds_base64(): + """Text is embedded as a base64 AttachedText when provided.""" + raw = "MIT License\nCopyright 2024" + lic = _make_license(spdx_id="MIT", text=raw) + cdx = SbomReporter._build_cdx_license(lic) + expected_b64 = base64.b64encode(raw.encode("utf-8")).decode("ascii") + assert cdx.text is not None + assert cdx.text.content == expected_b64 + assert cdx.text.encoding == Encoding.BASE_64 + + +def test_build_cdx_license_without_text_has_no_attachment(): + """When text is None no text attachment is created.""" + lic = _make_license(spdx_id="MIT", text=None) + cdx = SbomReporter._build_cdx_license(lic) + assert cdx.text is None + + +def test_build_cdx_license_falls_back_to_name_when_no_spdx_id(): + """When spdx_id is empty/None the license name is used.""" + lic = DfetchLicense( + name="Some Custom License", + spdx_id="", + trove_classifier=None, + probability=0.9, + ) + cdx = SbomReporter._build_cdx_license(lic) + assert cdx.id is None + assert cdx.name == "Some Custom License" + + +# --------------------------------------------------------------------------- +# SbomReporter._attach_identified_licenses +# --------------------------------------------------------------------------- + + +def test_attach_identified_licenses_adds_license_to_component(): + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + SbomReporter._attach_identified_licenses(component, [lic]) + assert "MIT" in _get_license_ids(component) + + +def test_attach_identified_licenses_adds_confidence_property(): + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + SbomReporter._attach_identified_licenses(component, [lic]) + prop_value = _get_property_value(component, "dfetch:license:MIT:confidence") + assert prop_value == "0.95" + + +def test_attach_identified_licenses_confidence_formatted_two_decimal_places(): + component = _make_component() + lic = _make_license(spdx_id="Apache-2.0", probability=0.9) + SbomReporter._attach_identified_licenses(component, [lic]) + prop_value = _get_property_value(component, "dfetch:license:Apache-2.0:confidence") + assert prop_value == "0.90" + + +def test_attach_identified_licenses_multiple_licenses(): + component = _make_component() + lic1 = _make_license(spdx_id="MIT", probability=0.95) + lic2 = _make_license(name="Apache 2.0", spdx_id="Apache-2.0", probability=0.88) + SbomReporter._attach_identified_licenses(component, [lic1, lic2]) + ids = _get_license_ids(component) + assert "MIT" in ids + assert "Apache-2.0" in ids + + +def test_attach_identified_licenses_uses_name_as_label_when_no_spdx_id(): + """When spdx_id is empty, the name is used as the confidence property label.""" + component = _make_component() + lic = DfetchLicense( + name="Custom License", spdx_id="", trove_classifier=None, probability=0.85 + ) + SbomReporter._attach_identified_licenses(component, [lic]) + prop_value = _get_property_value(component, "dfetch:license:Custom License:confidence") + assert prop_value == "0.85" + + +def test_attach_identified_licenses_embeds_text_in_license(): + """If the DfetchLicense has text, it should be embedded in the CycloneDX license.""" + component = _make_component() + raw_text = "MIT License\nCopyright 2024" + lic = _make_license(spdx_id="MIT", probability=0.95, text=raw_text) + SbomReporter._attach_identified_licenses(component, [lic]) + cdx_lic = next(iter(component.licenses)) + assert cdx_lic.text is not None + expected_b64 = base64.b64encode(raw_text.encode("utf-8")).decode("ascii") + assert cdx_lic.text.content == expected_b64 + + +# --------------------------------------------------------------------------- +# SbomReporter._apply_licenses – not scanned +# --------------------------------------------------------------------------- + + +def test_apply_licenses_not_scanned_adds_nothing(): + """When was_scanned=False no properties or licenses are added.""" + component = _make_component() + scan = LicenseScanResult(was_scanned=False) + SbomReporter._apply_licenses(component, scan) + assert len(list(component.licenses)) == 0 + assert len(list(component.properties)) == 0 + + +# --------------------------------------------------------------------------- +# SbomReporter._apply_licenses – scanned with identified licenses +# --------------------------------------------------------------------------- + + +def test_apply_licenses_identified_adds_tool_property(): + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + scan = LicenseScanResult(identified=[lic], was_scanned=True, threshold=0.80) + with patch("dfetch.reporting.sbom_reporter.INFER_LICENSE_VERSION", "1.2.3"): + SbomReporter._apply_licenses(component, scan) + assert _get_property_value(component, "dfetch:license:tool") == "infer-license 1.2.3" + + +def test_apply_licenses_identified_adds_threshold_property(): + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + scan = LicenseScanResult(identified=[lic], was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + assert _get_property_value(component, "dfetch:license:threshold") == "0.80" + + +def test_apply_licenses_identified_adds_license_to_component(): + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + scan = LicenseScanResult(identified=[lic], was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + assert "MIT" in _get_license_ids(component) + + +def test_apply_licenses_identified_no_noassertion(): + """When a license is identified NOASSERTION should not be present.""" + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + scan = LicenseScanResult(identified=[lic], was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + assert "NOASSERTION" not in _get_license_ids(component) + + +def test_apply_licenses_identified_no_finding_property(): + """No dfetch:license:finding property when a license is identified.""" + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + scan = LicenseScanResult(identified=[lic], was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + assert _get_property_value(component, "dfetch:license:finding") is None + + +def test_apply_licenses_identified_adds_confidence_property(): + """Confidence property is set for identified licenses.""" + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + scan = LicenseScanResult(identified=[lic], was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + assert _get_property_value(component, "dfetch:license:MIT:confidence") == "0.95" + + +# --------------------------------------------------------------------------- +# SbomReporter._apply_licenses – scanned with unclassified files +# --------------------------------------------------------------------------- + + +def test_apply_licenses_unclassified_sets_noassertion(): + component = _make_component() + scan = LicenseScanResult( + unclassified_files=["LICENSE"], was_scanned=True, threshold=0.80 + ) + SbomReporter._apply_licenses(component, scan) + assert "NOASSERTION" in _get_license_ids(component) + + +def test_apply_licenses_unclassified_sets_noassertion_reason(): + component = _make_component() + scan = LicenseScanResult( + unclassified_files=["LICENSE"], was_scanned=True, threshold=0.80 + ) + SbomReporter._apply_licenses(component, scan) + reason = _get_property_value(component, "dfetch:license:noassertion:reason") + assert reason == "UNCLASSIFIABLE_LICENSE_TEXT" + + +def test_apply_licenses_unclassified_sets_finding_property(): + component = _make_component() + scan = LicenseScanResult( + unclassified_files=["LICENSE"], was_scanned=True, threshold=0.80 + ) + SbomReporter._apply_licenses(component, scan) + finding = _get_property_value(component, "dfetch:license:finding") + assert "LICENSE" in finding + assert "could not be classified" in finding + + +def test_apply_licenses_unclassified_multiple_files_sorted(): + """Multiple unclassified files appear sorted in the finding property.""" + component = _make_component() + scan = LicenseScanResult( + unclassified_files=["COPYING", "LICENSE"], was_scanned=True, threshold=0.80 + ) + SbomReporter._apply_licenses(component, scan) + finding = _get_property_value(component, "dfetch:license:finding") + # sorted: COPYING before LICENSE + assert "COPYING, LICENSE" in finding + + +def test_apply_licenses_unclassified_adds_tool_and_threshold(): + component = _make_component() + scan = LicenseScanResult( + unclassified_files=["LICENSE"], was_scanned=True, threshold=0.80 + ) + with patch("dfetch.reporting.sbom_reporter.INFER_LICENSE_VERSION", "2.0"): + SbomReporter._apply_licenses(component, scan) + assert _get_property_value(component, "dfetch:license:tool") == "infer-license 2.0" + assert _get_property_value(component, "dfetch:license:threshold") == "0.80" + + +def test_apply_licenses_unclassified_noassertion_has_acknowledgement(): + """NOASSERTION license entry has acknowledgement=CONCLUDED.""" + component = _make_component() + scan = LicenseScanResult( + unclassified_files=["LICENSE"], was_scanned=True, threshold=0.80 + ) + SbomReporter._apply_licenses(component, scan) + noassertion_lic = next( + (lic for lic in component.licenses if lic.id == "NOASSERTION"), None + ) + assert noassertion_lic is not None + assert noassertion_lic.acknowledgement == LicenseAcknowledgement.CONCLUDED + + +def test_apply_licenses_unclassified_noassertion_has_text(): + """NOASSERTION license entry has a human-readable text explanation.""" + component = _make_component() + scan = LicenseScanResult( + unclassified_files=["LICENSE"], was_scanned=True, threshold=0.80 + ) + SbomReporter._apply_licenses(component, scan) + noassertion_lic = next( + (lic for lic in component.licenses if lic.id == "NOASSERTION"), None + ) + assert noassertion_lic is not None + assert noassertion_lic.text is not None + assert "LICENSE" in noassertion_lic.text.content + + +# --------------------------------------------------------------------------- +# SbomReporter._apply_licenses – scanned with no license file +# --------------------------------------------------------------------------- + + +def test_apply_licenses_no_license_file_sets_noassertion(): + component = _make_component() + scan = LicenseScanResult(was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + assert "NOASSERTION" in _get_license_ids(component) + + +def test_apply_licenses_no_license_file_sets_reason(): + component = _make_component() + scan = LicenseScanResult(was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + reason = _get_property_value(component, "dfetch:license:noassertion:reason") + assert reason == "NO_LICENSE_FILE" + + +def test_apply_licenses_no_license_file_sets_finding_property(): + component = _make_component() + scan = LicenseScanResult(was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + finding = _get_property_value(component, "dfetch:license:finding") + assert finding == "No license file found in source tree" + + +def test_apply_licenses_no_license_file_noassertion_has_acknowledgement(): + component = _make_component() + scan = LicenseScanResult(was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + noassertion_lic = next( + (lic for lic in component.licenses if lic.id == "NOASSERTION"), None + ) + assert noassertion_lic is not None + assert noassertion_lic.acknowledgement == LicenseAcknowledgement.CONCLUDED + + +def test_apply_licenses_no_license_file_noassertion_text_explains_reason(): + component = _make_component() + scan = LicenseScanResult(was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + noassertion_lic = next( + (lic for lic in component.licenses if lic.id == "NOASSERTION"), None + ) + assert noassertion_lic is not None + assert noassertion_lic.text is not None + assert "No license file found" in noassertion_lic.text.content + + +def test_apply_licenses_no_license_file_adds_tool_and_threshold(): + component = _make_component() + scan = LicenseScanResult(was_scanned=True, threshold=0.80) + with patch("dfetch.reporting.sbom_reporter.INFER_LICENSE_VERSION", "0.5.0"): + SbomReporter._apply_licenses(component, scan) + assert _get_property_value(component, "dfetch:license:tool") == "infer-license 0.5.0" + assert _get_property_value(component, "dfetch:license:threshold") == "0.80" + + +# --------------------------------------------------------------------------- +# SbomReporter._apply_licenses – threshold formatting +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "threshold, expected", + [ + (0.80, "0.80"), + (0.5, "0.50"), + (1.0, "1.00"), + (0.0, "0.00"), + ], +) +def test_apply_licenses_threshold_formatted_two_decimal_places(threshold, expected): + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.95) + scan = LicenseScanResult(identified=[lic], was_scanned=True, threshold=threshold) + SbomReporter._apply_licenses(component, scan) + assert _get_property_value(component, "dfetch:license:threshold") == expected + + +# --------------------------------------------------------------------------- +# SbomReporter._apply_licenses – boundary: threshold = probability (>=) +# --------------------------------------------------------------------------- + + +def test_apply_licenses_exactly_at_threshold_is_identified(): + """The >= comparison means a license at exactly the threshold is accepted.""" + component = _make_component() + lic = _make_license(spdx_id="MIT", probability=0.80) + # Simulate that Report._determine_licenses already accepted it (probability >= threshold) + scan = LicenseScanResult(identified=[lic], was_scanned=True, threshold=0.80) + SbomReporter._apply_licenses(component, scan) + assert "MIT" in _get_license_ids(component) + assert "NOASSERTION" not in _get_license_ids(component) \ No newline at end of file diff --git a/tests/test_stdout_reporter.py b/tests/test_stdout_reporter.py new file mode 100644 index 00000000..342e2bd6 --- /dev/null +++ b/tests/test_stdout_reporter.py @@ -0,0 +1,169 @@ +"""Unit tests for dfetch.reporting.stdout_reporter – LicenseScanResult integration.""" + +# mypy: ignore-errors +# flake8: noqa + +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from dfetch.manifest.project import ProjectEntry +from dfetch.reporting.stdout_reporter import StdoutReporter +from dfetch.util.license import License, LicenseScanResult + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_project(name: str = "my-project") -> Mock: + p = Mock(spec=ProjectEntry) + p.name = name + p.remote = "" + p.destination = name + return p + + +def _make_license(name: str, spdx_id: str = "MIT", probability: float = 0.95) -> License: + return License(name=name, spdx_id=spdx_id, trove_classifier=None, probability=probability) + + +def _make_manifest() -> MagicMock: + m = MagicMock() + return m + + +# --------------------------------------------------------------------------- +# StdoutReporter.add_project – licenses display +# --------------------------------------------------------------------------- + + +def test_add_project_with_empty_identified_list_prints_empty_licenses(): + """When no licenses are identified the licenses field is printed as empty.""" + reporter = StdoutReporter(_make_manifest()) + project = _make_project() + scan = LicenseScanResult(was_scanned=True) + + with patch("dfetch.reporting.stdout_reporter.logger") as mock_logger, patch( + "dfetch.project.metadata.Metadata.from_file" + ) as mock_meta: + mock_metadata = MagicMock() + mock_metadata.dependencies = [] + mock_metadata.patch = [] + mock_meta.return_value = mock_metadata + + reporter.add_project(project=project, license_scan=scan, version="1.0") + + # The 'licenses' field should be called with an empty string + calls = [ + call for call in mock_logger.print_info_field.call_args_list + if call.args[0] == " licenses" + ] + assert calls, "print_info_field was not called with ' licenses'" + licenses_value = calls[0].args[1] + assert licenses_value == "" + + +def test_add_project_with_identified_licenses_prints_names(): + """Identified license names are joined and printed.""" + reporter = StdoutReporter(_make_manifest()) + project = _make_project() + lic = _make_license(name="MIT License") + scan = LicenseScanResult(identified=[lic], was_scanned=True) + + with patch("dfetch.reporting.stdout_reporter.logger") as mock_logger, patch( + "dfetch.project.metadata.Metadata.from_file" + ) as mock_meta: + mock_metadata = MagicMock() + mock_metadata.dependencies = [] + mock_metadata.patch = [] + mock_meta.return_value = mock_metadata + + reporter.add_project(project=project, license_scan=scan, version="1.0") + + calls = [ + call for call in mock_logger.print_info_field.call_args_list + if call.args[0] == " licenses" + ] + assert calls + licenses_value = calls[0].args[1] + assert "MIT License" in licenses_value + + +def test_add_project_with_multiple_identified_licenses_joins_names(): + """Multiple license names are comma-joined.""" + reporter = StdoutReporter(_make_manifest()) + project = _make_project() + lic1 = _make_license(name="MIT License") + lic2 = _make_license(name="Apache Software License", spdx_id="Apache-2.0") + scan = LicenseScanResult(identified=[lic1, lic2], was_scanned=True) + + with patch("dfetch.reporting.stdout_reporter.logger") as mock_logger, patch( + "dfetch.project.metadata.Metadata.from_file" + ) as mock_meta: + mock_metadata = MagicMock() + mock_metadata.dependencies = [] + mock_metadata.patch = [] + mock_meta.return_value = mock_metadata + + reporter.add_project(project=project, license_scan=scan, version="1.0") + + calls = [ + call for call in mock_logger.print_info_field.call_args_list + if call.args[0] == " licenses" + ] + assert calls + licenses_value = calls[0].args[1] + assert "MIT License" in licenses_value + assert "Apache Software License" in licenses_value + + +def test_add_project_not_scanned_uses_identified_empty(): + """When was_scanned=False, identified is empty so licenses field is blank.""" + reporter = StdoutReporter(_make_manifest()) + project = _make_project() + scan = LicenseScanResult(was_scanned=False) + + with patch("dfetch.reporting.stdout_reporter.logger") as mock_logger, patch( + "dfetch.project.metadata.Metadata.from_file" + ) as mock_meta: + mock_metadata = MagicMock() + mock_metadata.dependencies = [] + mock_metadata.patch = [] + mock_meta.return_value = mock_metadata + + reporter.add_project(project=project, license_scan=scan, version="1.0") + + calls = [ + call for call in mock_logger.print_info_field.call_args_list + if call.args[0] == " licenses" + ] + assert calls + licenses_value = calls[0].args[1] + assert licenses_value == "" + + +def test_add_project_unclassified_only_prints_empty_licenses(): + """Unclassified files do not appear in the licenses field for stdout.""" + reporter = StdoutReporter(_make_manifest()) + project = _make_project() + scan = LicenseScanResult(unclassified_files=["LICENSE"], was_scanned=True) + + with patch("dfetch.reporting.stdout_reporter.logger") as mock_logger, patch( + "dfetch.project.metadata.Metadata.from_file" + ) as mock_meta: + mock_metadata = MagicMock() + mock_metadata.dependencies = [] + mock_metadata.patch = [] + mock_meta.return_value = mock_metadata + + reporter.add_project(project=project, license_scan=scan, version="1.0") + + calls = [ + call for call in mock_logger.print_info_field.call_args_list + if call.args[0] == " licenses" + ] + assert calls + licenses_value = calls[0].args[1] + assert licenses_value == "" \ No newline at end of file