Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Changelog
=========

v35.4.0 (unreleased)
--------------------

- Resolve and load dependencies from SPDX SBOMs.
https://github.com/aboutcode-org/scancode.io/issues/1145

v35.3.0 (2025-08-20)
--------------------

Expand Down
19 changes: 14 additions & 5 deletions scanpipe/pipelines/load_sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.

from scanpipe.models import DiscoveredDependency
from scanpipe.pipelines.scan_codebase import ScanCodebase
from scanpipe.pipes import resolve

Expand All @@ -44,7 +45,7 @@ def steps(cls):
cls.flag_empty_files,
cls.flag_ignored_resources,
cls.get_sbom_inputs,
cls.get_packages_from_sboms,
cls.get_data_from_sboms,
cls.create_packages_from_sboms,
cls.create_dependencies_from_sboms,
)
Expand All @@ -53,13 +54,13 @@ def get_sbom_inputs(self):
"""Locate all the SBOMs among the codebase resources."""
self.manifest_resources = resolve.get_manifest_resources(self.project)

def get_packages_from_sboms(self):
"""Get packages data from SBOMs."""
self.packages = resolve.get_packages(
def get_data_from_sboms(self):
"""Get data from SBOMs."""
self.packages, self.dependencies = resolve.get_data_from_manifests(
project=self.project,
package_registry=resolve.sbom_registry,
manifest_resources=self.manifest_resources,
model="get_packages_from_sboms",
model="get_data_from_sboms",
)

def create_packages_from_sboms(self):
Expand All @@ -71,4 +72,12 @@ def create_packages_from_sboms(self):

def create_dependencies_from_sboms(self):
"""Create the dependency relationship declared in the SBOMs."""
# CycloneDX support: the dependency data is stored in ``extra_data``.
resolve.create_dependencies_from_packages_extra_data(project=self.project)

# SPDX support: the dependency data is loaded from ``self.dependencies``.
for dependency_data in self.dependencies:
DiscoveredDependency.create_from_data(
project=self.project,
dependency_data=dependency_data,
)
4 changes: 2 additions & 2 deletions scanpipe/pipelines/resolve_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def get_packages_from_manifest(self):
Resolve package data from lockfiles/requirement files with package
requirements/dependencies.
"""
self.resolved_packages = resolve.get_packages(
self.packages, self.dependencies = resolve.get_data_from_manifests(
project=self.project,
package_registry=resolve.resolver_registry,
manifest_resources=self.manifest_resources,
Expand All @@ -99,6 +99,6 @@ def create_resolved_packages(self):
"""
resolve.create_packages_and_dependencies(
project=self.project,
packages=self.resolved_packages,
packages=self.packages,
resolved=True,
)
13 changes: 7 additions & 6 deletions scanpipe/pipes/cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ def get_external_references(component):

references = defaultdict(list)
for reference in external_references:
references[reference.type.value].append(reference.url.uri)
reference_url = reference.url
if reference_url and reference_url.uri:
references[reference.type.value].append(reference_url.uri)

return dict(references)

Expand Down Expand Up @@ -158,12 +160,9 @@ def cyclonedx_component_to_package_data(
vulnerabilities = vulnerabilities or {}
extra_data = {}

# Store the original bom_ref and dependencies for future processing.
bom_ref = str(cdx_component.bom_ref)
if bom_ref:
extra_data["bom_ref"] = bom_ref
if depends_on := dependencies.get(bom_ref):
extra_data["depends_on"] = depends_on
if depends_on := dependencies.get(bom_ref):
extra_data["depends_on"] = depends_on

package_url_dict = {}
if cdx_component.purl:
Expand All @@ -189,6 +188,8 @@ def cyclonedx_component_to_package_data(
)

package_data = {
# Store the original "bom_ref" as package_uid for dependencies resolution.
"package_uid": bom_ref,
"name": cdx_component.name,
"extracted_license_statement": declared_license,
"copyright": cdx_component.copyright,
Expand Down
39 changes: 30 additions & 9 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,28 @@ def _get_spdx_extracted_licenses(license_expressions):
return extracted_licenses


def get_dependency_as_spdx_relationship(dependency, document_spdx_id, packages_as_spdx):
"""Return a spdx.Relationship crafted from the provided ``dependency`` instance."""
if dependency.for_package: # Package dependency
parent_id = dependency.for_package.spdx_id
else: # Project dependency
parent_id = document_spdx_id

if dependency.is_resolved_to_package: # Resolved to a Package
child_id = dependency.resolved_to_package.spdx_id
else: # Not resolved to a Package (only package_url value is available)
dependency_as_package = dependency.as_spdx_package()
packages_as_spdx.append(dependency_as_package)
child_id = dependency_as_package.spdx_id

spdx_relationship = spdx.Relationship(
spdx_id=child_id,
related_spdx_id=parent_id,
relationship="DEPENDENCY_OF",
)
return spdx_relationship


def to_spdx(project, include_files=False):
"""
Generate output for the provided ``project`` in SPDX document format.
Expand All @@ -682,6 +704,7 @@ def to_spdx(project, include_files=False):
discoveredpackage_qs = get_queryset(project, "discoveredpackage")
discovereddependency_qs = get_queryset(project, "discovereddependency")

document_spdx_id = f"SPDXRef-DOCUMENT-{project.uuid}"
packages_as_spdx = []
license_expressions = []
relationships = []
Expand All @@ -692,15 +715,12 @@ def to_spdx(project, include_files=False):
license_expressions.append(license_expression)

for dependency in discovereddependency_qs:
packages_as_spdx.append(dependency.as_spdx_package())
if dependency.for_package:
relationships.append(
spdx.Relationship(
spdx_id=dependency.spdx_id,
related_spdx_id=dependency.for_package.spdx_id,
relationship="DEPENDENCY_OF",
)
)
spdx_relationship = get_dependency_as_spdx_relationship(
dependency,
document_spdx_id,
packages_as_spdx,
)
relationships.append(spdx_relationship)

files_as_spdx = []
if include_files:
Expand All @@ -710,6 +730,7 @@ def to_spdx(project, include_files=False):
]

document = spdx.Document(
spdx_id=document_spdx_id,
name=f"scancodeio_{project.name}",
namespace=f"https://scancode.io/spdxdocs/{project.uuid}",
creation_info=spdx.CreationInfo(tool=f"ScanCode.io-{scancodeio_version}"),
Expand Down
91 changes: 81 additions & 10 deletions scanpipe/pipes/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,33 @@ def resolve_manifest_resources(resource, package_registry):
return packages


def get_packages(project, package_registry, manifest_resources, model=None):
def get_dependencies_from_manifest(resource):
"""
Get package data from package manifests/lockfiles/SBOMs or
get package data for resolved packages from package requirements.
Get dependency data from resource.
This is used for SPDX where the dependency data is stored as its own
entry in the SBOM.
On the CycloneDX side, the dependency data is stored inline in the
component entries, it is stored on the package ``extra_data``.
"""
dependencies = []

default_package_type = get_default_package_type(resource.location)
if not default_package_type:
return []

if default_package_type == "spdx":
dependencies = resolve_spdx_dependencies(input_location=resource.location)

return dependencies


def get_data_from_manifests(project, package_registry, manifest_resources, model=None):
"""
Get package and dependency data from package manifests/lockfiles/SBOMs or
for resolved packages from package requirements.
"""
resolved_packages = []
resolved_dependencies = []
sboms_headers = {}

if not manifest_resources.exists():
Expand All @@ -76,7 +97,8 @@ def get_packages(project, package_registry, manifest_resources, model=None):
return []

for resource in manifest_resources:
if packages := resolve_manifest_resources(resource, package_registry):
packages = resolve_manifest_resources(resource, package_registry)
if packages:
resolved_packages.extend(packages)
if headers := get_manifest_headers(resource):
sboms_headers[resource.name] = headers
Expand All @@ -87,10 +109,14 @@ def get_packages(project, package_registry, manifest_resources, model=None):
object_instance=resource,
)

dependencies = get_dependencies_from_manifest(resource)
if dependencies:
resolved_dependencies.extend(dependencies)

if sboms_headers:
project.update_extra_data({"sboms_headers": sboms_headers})

return resolved_packages
return resolved_packages, resolved_dependencies


def create_packages_and_dependencies(project, packages, resolved=False):
Expand Down Expand Up @@ -139,7 +165,7 @@ def create_dependencies_from_packages_extra_data(project):

for bom_ref in for_package.extra_data.get("depends_on", []):
try:
resolved_to_package = project_packages.get(extra_data__bom_ref=bom_ref)
resolved_to_package = project_packages.get(package_uid=bom_ref)
except (ObjectDoesNotExist, MultipleObjectsReturned):
project.add_error(
description=f"Could not find resolved_to package entry: {bom_ref}.",
Expand Down Expand Up @@ -284,8 +310,12 @@ def convert_spdx_expression(license_expression_spdx):
return get_license_detections_and_expression(license_expression_spdx)[1]


def spdx_package_to_discovered_package_data(spdx_package):
def spdx_package_to_package_data(spdx_package):
"""Convert the provided spdx_package into package_data."""
package_url_dict = {}
# Store the original "SPDXID" as package_uid for dependencies resolution.
package_uid = spdx_package.spdx_id

for ref in spdx_package.external_refs:
if ref.type == "purl":
purl = ref.locator
Expand All @@ -302,6 +332,7 @@ def spdx_package_to_discovered_package_data(spdx_package):
declared_expression = convert_spdx_expression(declared_license_expression_spdx)

package_data = {
"package_uid": package_uid,
"name": spdx_package.name,
"download_url": spdx_package.download_location,
"declared_license_expression": declared_expression,
Expand All @@ -324,8 +355,28 @@ def spdx_package_to_discovered_package_data(spdx_package):
}


def resolve_spdx_packages(input_location):
"""Resolve the packages from the `input_location` SPDX document file."""
def spdx_relationship_to_dependency_data(spdx_relationship):
"""Convert the provided spdx_relationship into dependency_data."""
# spdx_id is a dependency of related_spdx_id
if spdx_relationship.is_dependency_relationship:
for_package_uid = spdx_relationship.related_spdx_id
resolve_to_package_uid = spdx_relationship.spdx_id
else: # spdx_id depends on related_spdx_id
for_package_uid = spdx_relationship.spdx_id
resolve_to_package_uid = spdx_relationship.related_spdx_id

dependency_data = {
"for_package_uid": for_package_uid,
"resolve_to_package_uid": resolve_to_package_uid,
"is_runtime": True,
"is_resolved": True,
"is_direct": True,
}
return dependency_data


def get_spdx_document_from_file(input_location):
"""Return the loaded SPDX document from the `input_location` file."""
input_path = Path(input_location)
spdx_document = json.loads(input_path.read_text())

Expand All @@ -334,12 +385,32 @@ def resolve_spdx_packages(input_location):
except Exception as e:
raise Exception(f'SPDX document "{input_path.name}" is not valid: {e}')

return spdx_document


def resolve_spdx_packages(input_location):
"""Resolve the packages from the `input_location` SPDX document file."""
spdx_document = get_spdx_document_from_file(input_location)
return [
spdx_package_to_discovered_package_data(spdx.Package.from_data(spdx_package))
spdx_package_to_package_data(spdx.Package.from_data(spdx_package))
for spdx_package in spdx_document.get("packages", [])
]


def resolve_spdx_dependencies(input_location):
"""Resolve the dependencies from the `input_location` SPDX document file."""
spdx_document = get_spdx_document_from_file(input_location)
spdx_relationships = [
spdx.Relationship.from_data(spdx_relationship)
for spdx_relationship in spdx_document.get("relationships", [])
]

return [
spdx_relationship_to_dependency_data(spdx_relationship)
for spdx_relationship in spdx_relationships
]


def get_default_package_type(input_location):
"""
Return the package type associated with the provided `input_location`.
Expand Down
12 changes: 12 additions & 0 deletions scanpipe/pipes/spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,18 @@ def from_data(cls, data):
comment=data.get("comment"),
)

@property
def is_dependency_relationship(self):
"""
Return True if this relationship type implies that the spdx_id element
is a dependency of related_spdx_id.
"""
reverse_dependency_types = ["ANCESTOR_OF", "CONTAINS", "DEPENDS_ON"]
# Every others types implies that the spdx_id element is a dependency of
# related_spdx_id. Such as:
# "DEPENDENCY_OF", "DESCENDANT_OF", "PACKAGE_OF", "CONTAINED_BY", ...
return self.relationship.upper() not in reverse_dependency_types


@dataclass
class Document:
Expand Down
2 changes: 1 addition & 1 deletion scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"spdxVersion": "SPDX-2.3",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"SPDXID": "SPDXRef-DOCUMENT-804c3391-e6f9-415f-bb7a-cb6653853a46",
"name": "scancodeio_asgiref",
"documentNamespace": "https://scancode.io/spdxdocs/804c3391-e6f9-415f-bb7a-cb6653853a46",
"creationInfo": {
Expand Down
11 changes: 11 additions & 0 deletions scanpipe/tests/data/cyclonedx/nested.cdx.json
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@
"content": "806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"
}
]
},
{
"type": "build-meta",
"url": "",
"comment": "Missing URL",
"hashes": [
{
"alg": "SHA-1",
"content": "568f3f90c3d6aced58de033a3547ccd2e4e088e8"
}
]
}
],
"licenses": [
Expand Down
Loading