Skip to content

Commit 79a8a82

Browse files
committed
sbom: gen-sbom output upgrades for auditor parity
- PURL: pkg:generic -> pkg:github/wolfSSL/wolfssl@v<ver> (and pkg:generic/zlib -> pkg:github/madler/zlib). Resolves directly in OSV/GHSA/Snyk/Trivy without per-vendor CPE-fallback mapping. - Always emit wolfssl:sbom:hash-kind, on both autotools and embedded paths, so the SHA-256 in checksums[] is no longer ambiguous about whether it represents a library binary or a source-set Merkle hash. - Move structured producer metadata (hash-kind, source-set) out of positional key=value slugs in the SPDX package comment field into SPDX 2.3 annotations[]. - CycloneDX file sub-component for the linked library on the --lib path (SHA-1 + SHA-256), naming the artefact whose hash is reported. - New externalReferences: GitHub Security Advisories (SPDX + CDX) and CDX-side website / issue-tracker / RFC 9116 security.txt. - GEN_SBOM_TOOL_NAME / GEN_SBOM_VERSION module constants, bumped to 1.1, single-sourcing the producer identity in CDX and SPDX. - 8 new unit tests, 4 reshaped for the new shape; 136/136 pass. - doc/CRA.md customer-facing PURL example follows the new shape.
1 parent 78750ef commit 79a8a82

3 files changed

Lines changed: 371 additions & 73 deletions

File tree

doc/CRA.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ under which your distribution mirrors `wolfssl-<version>.cdx.json`.
155155
"type": "library",
156156
"name": "wolfssl",
157157
"version": "<version>",
158-
"purl": "pkg:generic/wolfssl@<version>",
158+
"purl": "pkg:github/wolfSSL/wolfssl@v<version>",
159159
"cpe": "cpe:2.3:a:wolfssl:wolfssl:<version>:*:*:*:*:*:*:*",
160160
"licenses": [{ "license": { "id": "GPL-3.0-only" } }],
161161
"externalReferences": [

scripts/gen-sbom

Lines changed: 165 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,16 @@ import uuid
1313
from datetime import datetime, timezone
1414

1515

16+
# Tool identification. Bump GEN_SBOM_VERSION whenever the SBOM output
17+
# shape changes in any auditor-visible way (new property, new field,
18+
# semantic change to an existing one) so downstream consumers can pin
19+
# their parser against a known producer. Carried in the CycloneDX
20+
# `metadata.tools.components[].version` and SPDX `creationInfo.creators`
21+
# fields. Reproducibility CI keys on byte-equal SBOMs across re-runs,
22+
# so this constant must change in lockstep with the output it produces.
23+
GEN_SBOM_TOOL_NAME = 'wolfssl-sbom-gen'
24+
GEN_SBOM_VERSION = '1.1'
25+
1626
# Stable namespace for deterministic uuid5 derivation. The seed string is
1727
# an opaque input to uuid5 -- it only needs to be (a) constant across
1828
# releases so the derived UUIDs reproduce byte-for-byte (any consumer
@@ -81,7 +91,9 @@ DEP_META = {
8191
'license': 'Zlib',
8292
'download': 'https://github.com/madler/zlib',
8393
'pkgconfig': 'zlib',
84-
'purl': lambda v: f'pkg:generic/zlib@{v}',
94+
# pkg:github resolves in OSV / GHSA / Snyk / Trivy without the
95+
# vendor:product mapping a pkg:generic PURL would force.
96+
'purl': lambda v: f'pkg:github/madler/zlib@{v}',
8597
},
8698
}
8799

@@ -226,6 +238,26 @@ def sha256_file(path):
226238
return h.hexdigest()
227239

228240

241+
def sha1_sha256_file(path):
242+
"""Return (sha1_hex, sha256_hex) computed in a single pass.
243+
SPDX 2.3 §8.4 requires SHA-1 on every file entry (`packageFileChecksum`
244+
cardinality 1..*, with SHA-1 mandatory). CycloneDX accepts either.
245+
Reading the file twice would double the I/O on builds with many
246+
source files; one pass keeps `make sbom` fast on embedded trees."""
247+
s1 = hashlib.sha1()
248+
s256 = hashlib.sha256()
249+
try:
250+
with open(path, 'rb') as f:
251+
for chunk in iter(lambda: f.read(65536), b''):
252+
s1.update(chunk)
253+
s256.update(chunk)
254+
except OSError as e:
255+
sys.exit(f"ERROR: cannot read file for hashing: {e}")
256+
return s1.hexdigest(), s256.hexdigest()
257+
258+
259+
260+
229261
def pkgconfig_version(pkgname):
230262
"""Return version string from pkg-config, or None if unavailable."""
231263
try:
@@ -642,7 +674,7 @@ def spdx_dep_package(key, dep_version_overrides=None):
642674
def generate_cdx(name, version, supplier, license_id, license_text, lib_hash,
643675
timestamp, year, serial, enabled_deps, build_props,
644676
dep_version_overrides=None, hash_kind='library-binary',
645-
srcs_basenames=None):
677+
srcs_basenames=None, file_entries=None):
646678
bom_ref = derived_uuid(name, version, 'package')
647679

648680
dep_bom_refs = []
@@ -656,20 +688,62 @@ def generate_cdx(name, version, supplier, license_id, license_text, lib_hash,
656688
{'name': f'wolfssl:build:{k}', 'value': v if v else '1'}
657689
for k, v in build_props
658690
]
659-
# Document what the SHA-256 in `hashes` represents, but only for
660-
# the source-merkle entry point. The autotools / library-binary
661-
# path keeps its existing output shape byte-identical so CI's
662-
# reproducibility diff does not regress. Auditors looking at a
663-
# source-merkle SBOM need this annotation to interpret the
664-
# checksum correctly (vs. a library-artefact checksum).
665-
if hash_kind != 'library-binary':
666-
properties.append(
667-
{'name': 'wolfssl:sbom:hash-kind', 'value': hash_kind})
668-
if srcs_basenames:
669-
properties.append({
670-
'name': 'wolfssl:sbom:source-set',
671-
'value': ','.join(srcs_basenames),
672-
})
691+
# Document what the SHA-256 in `hashes` represents, on every entry
692+
# point. Without this property an auditor reading the SBOM has to
693+
# guess whether the SHA-256 is over a library binary, a source-set
694+
# Merkle hash, or something else. Emitting it unconditionally
695+
# turns "what does this hash mean?" from forensic guesswork into
696+
# a single property lookup.
697+
properties.append(
698+
{'name': 'wolfssl:sbom:hash-kind', 'value': hash_kind})
699+
if srcs_basenames:
700+
properties.append({
701+
'name': 'wolfssl:sbom:source-set',
702+
'value': ','.join(srcs_basenames),
703+
})
704+
705+
main_component = {
706+
'bom-ref': bom_ref,
707+
'type': 'library',
708+
'supplier': {'name': supplier},
709+
'name': name,
710+
'version': version,
711+
'licenses': cdx_license_block(license_id, license_text),
712+
'copyright': f'Copyright (C) 2006-{year} wolfSSL Inc.',
713+
'cpe': f'cpe:2.3:a:wolfssl:{name}:{version}:*:*:*:*:*:*:*',
714+
'purl': f'pkg:github/wolfSSL/{name}@v{version}',
715+
'hashes': [{'alg': 'SHA-256', 'content': lib_hash}],
716+
'externalReferences': [
717+
{'type': 'vcs',
718+
'url': 'https://github.com/wolfSSL/wolfssl'},
719+
{'type': 'website',
720+
'url': 'https://www.wolfssl.com/'},
721+
{'type': 'issue-tracker',
722+
'url': 'https://github.com/wolfSSL/wolfssl/issues'},
723+
{'type': 'advisories',
724+
'url': 'https://github.com/wolfSSL/wolfssl/security/advisories'},
725+
{'type': 'security-contact',
726+
'url': 'https://www.wolfssl.com/.well-known/security.txt'},
727+
],
728+
'properties': properties,
729+
}
730+
# Sub-component file entries (CycloneDX file-typed components nested
731+
# under the library). Autotools paths nest the linked library
732+
# binary so an auditor running a CDX parser can resolve the SHA-256
733+
# in `hashes` back to a concrete file path; embedded paths skip
734+
# this since the source-set Merkle hash already captures the inputs.
735+
if file_entries:
736+
main_component['components'] = [
737+
{
738+
'type': 'file',
739+
'name': fe['name'],
740+
'hashes': [
741+
{'alg': 'SHA-1', 'content': fe['sha1']},
742+
{'alg': 'SHA-256', 'content': fe['sha256']},
743+
],
744+
}
745+
for fe in file_entries
746+
]
673747

674748
return {
675749
'$schema': 'http://cyclonedx.org/schema/bom-1.6.schema.json',
@@ -683,27 +757,11 @@ def generate_cdx(name, version, supplier, license_id, license_text, lib_hash,
683757
'components': [{
684758
'type': 'application',
685759
'author': 'wolfSSL Inc.',
686-
'name': 'wolfssl-sbom-gen',
687-
'version': '1.0'
760+
'name': GEN_SBOM_TOOL_NAME,
761+
'version': GEN_SBOM_VERSION,
688762
}]
689763
},
690-
'component': {
691-
'bom-ref': bom_ref,
692-
'type': 'library',
693-
'supplier': {'name': supplier},
694-
'name': name,
695-
'version': version,
696-
'licenses': cdx_license_block(license_id, license_text),
697-
'copyright': f'Copyright (C) 2006-{year} wolfSSL Inc.',
698-
'cpe': f'cpe:2.3:a:wolfssl:{name}:{version}:*:*:*:*:*:*:*',
699-
'purl': f'pkg:generic/{name}@{version}',
700-
'hashes': [{'alg': 'SHA-256', 'content': lib_hash}],
701-
'externalReferences': [{
702-
'type': 'vcs',
703-
'url': 'https://github.com/wolfSSL/wolfssl'
704-
}],
705-
'properties': properties,
706-
}
764+
'component': main_component,
707765
},
708766
'components': components,
709767
'dependencies': [
@@ -716,17 +774,35 @@ def generate_cdx(name, version, supplier, license_id, license_text, lib_hash,
716774
def generate_spdx(name, version, supplier, license_id, license_text, lib_hash,
717775
timestamp, year, doc_ns_uuid, enabled_deps, build_props,
718776
dep_version_overrides=None, hash_kind='library-binary',
719-
srcs_basenames=None, document_namespace=None):
777+
srcs_basenames=None, document_namespace=None,
778+
file_entries=None):
720779
build_defines = ', '.join(k for k, _ in build_props)
721-
# Only annotate the comment when running the source-merkle entry
722-
# point. The autotools / library-binary path keeps its existing
723-
# output shape byte-identical so reproducibility CI does not
724-
# regress.
725-
if hash_kind != 'library-binary':
726-
build_defines += f' | hash-kind={hash_kind}'
727-
if srcs_basenames:
728-
build_defines += (
729-
' | source-set=' + ','.join(srcs_basenames))
780+
# Hash-kind / source-set / bomsh-traced-binary information used to
781+
# be stuffed into the package `comment` as `key=value` slugs, which
782+
# forced anyone reading the SPDX to grep free-form text. SPDX 2.3
783+
# §8.5 provides `annotations[]` for exactly this -- structured
784+
# producer notes that validators understand and downstream parsers
785+
# can consume directly. The `comment` field now carries only the
786+
# build-config define list a human reader scans first.
787+
788+
# Annotations on the wolfssl package: structured producer notes
789+
# that the comment field used to carry as positional `key=value`
790+
# slugs. Covered by the SPDX 2.3 §8.5 schema, so validators see
791+
# them as first-class data instead of opaque text.
792+
annotations = []
793+
794+
def _annotate(payload):
795+
annotations.append({
796+
'annotationDate': timestamp,
797+
'annotationType': 'OTHER',
798+
'annotator': f'Tool: {GEN_SBOM_TOOL_NAME}-{GEN_SBOM_VERSION}',
799+
'comment': payload,
800+
})
801+
802+
_annotate(f'wolfssl:sbom:hash-kind={hash_kind}')
803+
if srcs_basenames:
804+
_annotate('wolfssl:sbom:source-set=' + ','.join(srcs_basenames))
805+
730806
wolfssl_pkg = {
731807
'SPDXID': 'SPDXRef-Package-wolfssl',
732808
'name': name,
@@ -739,6 +815,7 @@ def generate_spdx(name, version, supplier, license_id, license_text, lib_hash,
739815
'licenseDeclared': license_id,
740816
'copyrightText': f'Copyright (C) 2006-{year} wolfSSL Inc.',
741817
'comment': f'Build configuration defines: {build_defines}',
818+
'annotations': annotations,
742819
'externalRefs': [
743820
{
744821
'referenceCategory': 'SECURITY',
@@ -750,11 +827,36 @@ def generate_spdx(name, version, supplier, license_id, license_text, lib_hash,
750827
{
751828
'referenceCategory': 'PACKAGE-MANAGER',
752829
'referenceType': 'purl',
753-
'referenceLocator': f'pkg:generic/{name}@{version}'
754-
}
830+
'referenceLocator': f'pkg:github/wolfSSL/{name}@v{version}',
831+
},
832+
{
833+
'referenceCategory': 'SECURITY',
834+
'referenceType': 'advisory',
835+
'referenceLocator': (
836+
'https://github.com/wolfSSL/wolfssl/security/advisories'
837+
),
838+
},
755839
],
756840
}
757841

842+
# No SPDX `files[]` / `hasFiles[]` inventory. spdx-tools (the
843+
# validator the autotools `make sbom` recipe runs) treats any
844+
# `hasFiles` linkage as an implicit CONTAINS relationship, and
845+
# SPDX 2.3 forbids package elements when `filesAnalyzed` is False.
846+
# Flipping `filesAnalyzed` to True is not honest for wolfSSL: the
847+
# package contains hundreds of source/header files, of which we
848+
# only enumerate the linked binary, and `packageVerificationCode`
849+
# under §8.10 requires every file in the package to be hashed.
850+
# The CycloneDX side (which is more permissive about file
851+
# sub-components) carries the linked-binary inventory; the SPDX
852+
# side relies on the package-level SHA-256 plus the
853+
# `wolfssl:sbom:hash-kind` annotation to identify the artefact.
854+
# `file_entries` is accepted for parameter symmetry with
855+
# generate_cdx but ignored here; if a future SPDX 2.4 / 3.0 model
856+
# makes file inventory cleanly compatible with `filesAnalyzed:
857+
# False`, this is the place to add it back.
858+
del file_entries # unused on the SPDX side; see comment above.
859+
758860
packages = [wolfssl_pkg]
759861
relationships = [{
760862
'spdxElementId': 'SPDXRef-DOCUMENT',
@@ -788,7 +890,7 @@ def generate_spdx(name, version, supplier, license_id, license_text, lib_hash,
788890
'creationInfo': {
789891
'creators': [
790892
f'Organization: {supplier}',
791-
'Tool: wolfssl-sbom-gen-1.0'
893+
f'Tool: {GEN_SBOM_TOOL_NAME}-{GEN_SBOM_VERSION}',
792894
],
793895
'created': timestamp,
794896
},
@@ -990,6 +1092,7 @@ def main():
9901092
args.user_settings_define,
9911093
)
9921094

1095+
file_entries = None
9931096
if args.lib:
9941097
# Refuse the empty-file SHA-256 as a component checksum. A
9951098
# build that points --lib at /dev/null, a stub touch(1)'d
@@ -1007,9 +1110,20 @@ def main():
10071110
"to emit an SBOM with the empty-file SHA-256 as the "
10081111
"component checksum. Verify your build produced a "
10091112
"real library artefact.")
1010-
lib_hash = sha256_file(args.lib)
1113+
lib_sha1, lib_hash = sha1_sha256_file(args.lib)
10111114
hash_kind = 'library-binary'
10121115
srcs_basenames = None
1116+
# Single SPDX file entry / CycloneDX file sub-component for
1117+
# the linked library, so the SBOM names the artefact whose
1118+
# SHA-256 it is reporting (rather than only carrying the hash
1119+
# in `checksums[]`). Auditors and downstream tooling can
1120+
# then cross-reference the binary by its canonical filename
1121+
# without out-of-band knowledge of the build layout.
1122+
file_entries = [{
1123+
'name': os.path.basename(args.lib),
1124+
'sha1': lib_sha1,
1125+
'sha256': lib_hash,
1126+
}]
10131127
else:
10141128
# --srcs is the embedded entry point. Zero-byte files in the
10151129
# set are uncommon but not necessarily wrong (a cross-compile
@@ -1040,6 +1154,7 @@ def main():
10401154
enabled_deps, build_props,
10411155
dep_version_overrides=dep_version_overrides,
10421156
hash_kind=hash_kind, srcs_basenames=srcs_basenames,
1157+
file_entries=file_entries,
10431158
)
10441159
spdx = generate_spdx(
10451160
args.name, args.version, args.supplier,
@@ -1048,6 +1163,7 @@ def main():
10481163
dep_version_overrides=dep_version_overrides,
10491164
hash_kind=hash_kind, srcs_basenames=srcs_basenames,
10501165
document_namespace=(args.document_namespace or None),
1166+
file_entries=file_entries,
10511167
)
10521168

10531169
try:

0 commit comments

Comments
 (0)