diff --git a/scanpipe/models.py b/scanpipe/models.py index 61c626e4ba..82c73e4f07 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -3772,6 +3772,18 @@ class DiscoveredDependencyQuerySet( VulnerabilityQuerySetMixin, ProjectRelatedQuerySet, ): + def project_dependencies(self): + return self.filter(for_package__isnull=True) + + def package_dependencies(self): + return self.filter(for_package__isnull=False) + + def resolved(self): + return self.filter(resolved_to_package__isnull=False) + + def unresolved(self): + return self.filter(resolved_to_package__isnull=True) + def prefetch_for_serializer(self): """ Optimized prefetching for a QuerySet to be consumed by the @@ -3816,6 +3828,26 @@ class DiscoveredDependency( system and application packages discovered in the code under analysis. Dependencies are usually collected from parsed package data such as a package manifest or lockfile. + + This class manages dependencies with the following considerations: + + 1. A dependency can be associated with a Package via the ``for_package`` field. + In this case, it is termed a "Package's dependency". + If there is no such association, the dependency is considered a + "Project's dependency". + + 2. A dependency can also be linked to a Package through the ``resolved_to_package`` + field. When this link exists, the dependency is considered "resolved". + + 3. Dependencies can be either direct or transitive: + - A **direct dependency** is explicitly declared in a package manifest or + lockfile. + - A **transitive dependency** is not declared directly, but is required by one + of the project's direct dependencies. + + Understanding the distinction between direct and transitive dependencies is + important for analyzing dependency trees, resolving version conflicts, and + assessing potential security risks. """ # Overrides the `project` field to set the proper `related_name`. @@ -3966,6 +3998,24 @@ def datafile_path(self): if self.datafile_resource: return self.datafile_resource.path + @property + def is_project_dependency(self): + """ + Return True if the dependency is directly associated with the project + (not tied to a specific package). + """ + return not bool(self.for_package_id) + + @property + def is_package_dependency(self): + """Return True if the dependency is explicitly associated with a package.""" + return bool(self.for_package_id) + + @property + def is_resolved_to_package(self): + """Return True if the dependency is resolved to a package.""" + return bool(self.resolved_to_package_id) + @classmethod def create_from_data( cls, @@ -3981,6 +4031,14 @@ def create_from_data( Create and returns a DiscoveredDependency for a `project` from the `dependency_data`. + The `for_package` and `resolved_to_package` FKs can be provided as args, + or in the `dependency_data` using the `for_package_uid` and + `resolve_to_package_uid`. + + Note that a dependency: + - without a `for_package` FK is a "Project's dependency" + - without a `resolve_to_package` is "unresolved". + If `strip_datafile_path_root` is True, then `create_from_data()` will strip the root path segment from the `datafile_path` of `dependency_data` before looking up the corresponding CodebaseResource @@ -3989,51 +4047,36 @@ def create_from_data( not stripped for `datafile_path`. """ dependency_data = dependency_data.copy() - required_fields = ["purl", "dependency_uid"] - missing_values = [ - field_name - for field_name in required_fields - if not dependency_data.get(field_name) - ] + project_packages_qs = project.discoveredpackages - if missing_values: - message = ( - f"No values for the following required fields: " - f"{', '.join(missing_values)}" - ) + if not dependency_data.get("dependency_uid"): + dependency_data["dependency_uid"] = str(uuid.uuid4()) - project.add_warning(description=message, model=cls, details=dependency_data) - return - - if not for_package: - for_package_uid = dependency_data.get("for_package_uid") - if for_package_uid: - for_package = project.discoveredpackages.get( - package_uid=for_package_uid - ) + for_package_uid = dependency_data.get("for_package_uid") + if not for_package and for_package_uid: + for_package = project_packages_qs.get_or_none(package_uid=for_package_uid) - if not resolved_to_package: - resolved_to_uid = dependency_data.get("resolved_to_uid") - if resolved_to_uid: - resolved_to_package = project.discoveredpackages.get( - package_uid=resolved_to_uid - ) + resolve_to_package_uid = dependency_data.get("resolve_to_package_uid") + if not resolved_to_package and resolve_to_package_uid: + resolved_to_package = project_packages_qs.get_or_none( + package_uid=resolve_to_package_uid + ) - if not datafile_resource: - datafile_path = dependency_data.get("datafile_path") - if datafile_path: - if strip_datafile_path_root: - segments = datafile_path.split("/") - datafile_path = "/".join(segments[1:]) - datafile_resource = project.codebaseresources.get(path=datafile_path) + datafile_path = dependency_data.get("datafile_path") + if not datafile_resource and datafile_path: + if strip_datafile_path_root: + segments = datafile_path.split("/") + datafile_path = "/".join(segments[1:]) + datafile_resource = project.codebaseresources.get(path=datafile_path) if datasource_id: dependency_data["datasource_id"] = datasource_id - # Set purl fields from `purl` + # Set package_url fields from the ``purl`` string. purl = dependency_data.get("purl") - purl_mapping = PackageURL.from_string(purl).to_dict() - dependency_data.update(**purl_mapping) + if purl: + purl_data_dict = PackageURL.from_string(purl).to_dict() + dependency_data.update(**purl_data_dict) cleaned_data = { field_name: value @@ -4072,7 +4115,7 @@ def spdx_id(self): # "SPDXID is a unique string containing letters, numbers, ., and/or -" return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.uuid}" - def as_spdx(self): + def as_spdx_package(self): """Return this Dependency as an SPDX Package entry.""" from scanpipe.pipes import spdx diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index a0aad7f0cb..18a5d72c77 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -325,7 +325,6 @@ def get_dependencies(project, dependency_data): Given a `dependency_data` mapping, get a list of DiscoveredDependency objects for that `project` with similar dependency data. """ - dependency = None dependency_uid = dependency_data.get("dependency_uid") extracted_requirement = dependency_data.get("extracted_requirement") or "" diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py index ba159d7844..d4c5f2632f 100644 --- a/scanpipe/pipes/output.py +++ b/scanpipe/pipes/output.py @@ -692,7 +692,7 @@ def to_spdx(project, include_files=False): license_expressions.append(license_expression) for dependency in discovereddependency_qs: - packages_as_spdx.append(dependency.as_spdx()) + packages_as_spdx.append(dependency.as_spdx_package()) if dependency.for_package: relationships.append( spdx.Relationship( diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 78f7d189c9..c589cccf53 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -2041,8 +2041,20 @@ def test_scanpipe_discovered_package_queryset_dependency_methods(self): z = make_package(project, "pkg:type/z") # Project -> A -> B -> C # Project -> Z - make_dependency(project, for_package=a, resolved_to_package=b) - make_dependency(project, for_package=b, resolved_to_package=c) + a_to_b = make_dependency( + project, for_package=a, resolved_to_package=b, dependency_uid="a_to_b" + ) + b_to_c = make_dependency( + project, for_package=b, resolved_to_package=c, dependency_uid="b_to_c" + ) + unresolved_dependency = make_dependency(project, dependency_uid="unresolved") + + self.assertFalse(a_to_b.is_project_dependency) + self.assertTrue(a_to_b.is_package_dependency) + self.assertTrue(a_to_b.is_resolved_to_package) + self.assertTrue(unresolved_dependency.is_project_dependency) + self.assertFalse(unresolved_dependency.is_package_dependency) + self.assertFalse(unresolved_dependency.is_resolved_to_package) project_packages_qs = project.discoveredpackages.order_by("name") root_packages = project_packages_qs.root_packages() @@ -2050,6 +2062,14 @@ def test_scanpipe_discovered_package_queryset_dependency_methods(self): non_root_packages = project_packages_qs.non_root_packages() self.assertEqual([b, c], list(non_root_packages)) + dependency_qs = project.discovereddependencies + self.assertEqual( + [unresolved_dependency], list(dependency_qs.project_dependencies()) + ) + self.assertEqual([a_to_b, b_to_c], list(dependency_qs.package_dependencies())) + self.assertEqual([a_to_b, b_to_c], list(dependency_qs.resolved())) + self.assertEqual([unresolved_dependency], list(dependency_qs.unresolved())) + @skipIf(sys.platform != "linux", "Ordering differs on macOS.") def test_scanpipe_codebase_resource_model_walk_method(self): fixtures = self.data / "asgiref" / "asgiref-3.3.0_walk_test_fixtures.json" @@ -2955,10 +2975,11 @@ def test_scanpipe_discovered_package_model_create_from_data_missing_type(self): def test_scanpipe_discovered_dependency_model_create_from_data(self): project1 = make_project("Analysis") - DiscoveredPackage.create_from_data(project1, package_data1) + package1 = DiscoveredPackage.create_from_data(project1, package_data1) CodebaseResource.objects.create( project=project1, path="daglib-0.3.2.tar.gz-extract/daglib-0.3.2/PKG-INFO" ) + # Unresolved dependency dependency = DiscoveredDependency.create_from_data( project1, dependency_data1, strip_datafile_path_root=False ) @@ -2982,23 +3003,17 @@ def test_scanpipe_discovered_dependency_model_create_from_data(self): dependency.datafile_path, ) self.assertEqual("pypi_sdist_pkginfo", dependency.datasource_id) + self.assertFalse(dependency.is_project_dependency) + self.assertTrue(dependency.is_package_dependency) + self.assertFalse(dependency.is_resolved_to_package) - # Test field validation when using create_from_data - dependency_count = DiscoveredDependency.objects.count() - incomplete_data = dict(dependency_data1) - incomplete_data["dependency_uid"] = "" - self.assertIsNone( - DiscoveredDependency.create_from_data(project1, incomplete_data) + # Resolved project dependency, resolved_to_package provided as arg + dependency2 = DiscoveredDependency.create_from_data( + project1, dependency_data={}, resolved_to_package=package1 ) - self.assertEqual(dependency_count, DiscoveredDependency.objects.count()) - message = project1.projectmessages.latest("created_date") - self.assertEqual("DiscoveredDependency", message.model) - self.assertEqual(ProjectMessage.Severity.WARNING, message.severity) - expected_message = "No values for the following required fields: dependency_uid" - self.assertEqual(expected_message, message.description) - self.assertEqual(dependency_data1["purl"], message.details["purl"]) - self.assertEqual("", message.details["dependency_uid"]) - self.assertEqual("", message.traceback) + self.assertTrue(dependency2.is_project_dependency) + self.assertFalse(dependency2.is_package_dependency) + self.assertTrue(dependency2.is_resolved_to_package) def test_scanpipe_discovered_package_model_unique_package_uid_in_project(self): project1 = make_project("Analysis")