@@ -13,6 +13,16 @@ import uuid
1313from datetime import datetime , timezone
1414
1515
16+ # Tool identification. Bump GEN_SBOM_VERSION whenever the SBOM output
17+ # shape changes in any auditor-visible way (new property, new field,
18+ # semantic change to an existing one) so downstream consumers can pin
19+ # their parser against a known producer. Carried in the CycloneDX
20+ # `metadata.tools.components[].version` and SPDX `creationInfo.creators`
21+ # fields. Reproducibility CI keys on byte-equal SBOMs across re-runs,
22+ # so this constant must change in lockstep with the output it produces.
23+ GEN_SBOM_TOOL_NAME = 'wolfssl-sbom-gen'
24+ GEN_SBOM_VERSION = '1.1'
25+
1626# Stable namespace for deterministic uuid5 derivation. The seed string is
1727# an opaque input to uuid5 -- it only needs to be (a) constant across
1828# releases so the derived UUIDs reproduce byte-for-byte (any consumer
@@ -81,7 +91,9 @@ DEP_META = {
8191 'license' : 'Zlib' ,
8292 'download' : 'https://github.com/madler/zlib' ,
8393 'pkgconfig' : 'zlib' ,
84- 'purl' : lambda v : f'pkg:generic/zlib@{ v } ' ,
94+ # pkg:github resolves in OSV / GHSA / Snyk / Trivy without the
95+ # vendor:product mapping a pkg:generic PURL would force.
96+ 'purl' : lambda v : f'pkg:github/madler/zlib@{ v } ' ,
8597 },
8698}
8799
@@ -226,6 +238,26 @@ def sha256_file(path):
226238 return h .hexdigest ()
227239
228240
241+ def sha1_sha256_file (path ):
242+ """Return (sha1_hex, sha256_hex) computed in a single pass.
243+ SPDX 2.3 §8.4 requires SHA-1 on every file entry (`packageFileChecksum`
244+ cardinality 1..*, with SHA-1 mandatory). CycloneDX accepts either.
245+ Reading the file twice would double the I/O on builds with many
246+ source files; one pass keeps `make sbom` fast on embedded trees."""
247+ s1 = hashlib .sha1 ()
248+ s256 = hashlib .sha256 ()
249+ try :
250+ with open (path , 'rb' ) as f :
251+ for chunk in iter (lambda : f .read (65536 ), b'' ):
252+ s1 .update (chunk )
253+ s256 .update (chunk )
254+ except OSError as e :
255+ sys .exit (f"ERROR: cannot read file for hashing: { e } " )
256+ return s1 .hexdigest (), s256 .hexdigest ()
257+
258+
259+
260+
229261def pkgconfig_version (pkgname ):
230262 """Return version string from pkg-config, or None if unavailable."""
231263 try :
@@ -642,7 +674,7 @@ def spdx_dep_package(key, dep_version_overrides=None):
642674def generate_cdx (name , version , supplier , license_id , license_text , lib_hash ,
643675 timestamp , year , serial , enabled_deps , build_props ,
644676 dep_version_overrides = None , hash_kind = 'library-binary' ,
645- srcs_basenames = None ):
677+ srcs_basenames = None , file_entries = None ):
646678 bom_ref = derived_uuid (name , version , 'package' )
647679
648680 dep_bom_refs = []
@@ -656,20 +688,62 @@ def generate_cdx(name, version, supplier, license_id, license_text, lib_hash,
656688 {'name' : f'wolfssl:build:{ k } ' , 'value' : v if v else '1' }
657689 for k , v in build_props
658690 ]
659- # Document what the SHA-256 in `hashes` represents, but only for
660- # the source-merkle entry point. The autotools / library-binary
661- # path keeps its existing output shape byte-identical so CI's
662- # reproducibility diff does not regress. Auditors looking at a
663- # source-merkle SBOM need this annotation to interpret the
664- # checksum correctly (vs. a library-artefact checksum).
665- if hash_kind != 'library-binary' :
666- properties .append (
667- {'name' : 'wolfssl:sbom:hash-kind' , 'value' : hash_kind })
668- if srcs_basenames :
669- properties .append ({
670- 'name' : 'wolfssl:sbom:source-set' ,
671- 'value' : ',' .join (srcs_basenames ),
672- })
691+ # Document what the SHA-256 in `hashes` represents, on every entry
692+ # point. Without this property an auditor reading the SBOM has to
693+ # guess whether the SHA-256 is over a library binary, a source-set
694+ # Merkle hash, or something else. Emitting it unconditionally
695+ # turns "what does this hash mean?" from forensic guesswork into
696+ # a single property lookup.
697+ properties .append (
698+ {'name' : 'wolfssl:sbom:hash-kind' , 'value' : hash_kind })
699+ if srcs_basenames :
700+ properties .append ({
701+ 'name' : 'wolfssl:sbom:source-set' ,
702+ 'value' : ',' .join (srcs_basenames ),
703+ })
704+
705+ main_component = {
706+ 'bom-ref' : bom_ref ,
707+ 'type' : 'library' ,
708+ 'supplier' : {'name' : supplier },
709+ 'name' : name ,
710+ 'version' : version ,
711+ 'licenses' : cdx_license_block (license_id , license_text ),
712+ 'copyright' : f'Copyright (C) 2006-{ year } wolfSSL Inc.' ,
713+ 'cpe' : f'cpe:2.3:a:wolfssl:{ name } :{ version } :*:*:*:*:*:*:*' ,
714+ 'purl' : f'pkg:github/wolfSSL/{ name } @v{ version } ' ,
715+ 'hashes' : [{'alg' : 'SHA-256' , 'content' : lib_hash }],
716+ 'externalReferences' : [
717+ {'type' : 'vcs' ,
718+ 'url' : 'https://github.com/wolfSSL/wolfssl' },
719+ {'type' : 'website' ,
720+ 'url' : 'https://www.wolfssl.com/' },
721+ {'type' : 'issue-tracker' ,
722+ 'url' : 'https://github.com/wolfSSL/wolfssl/issues' },
723+ {'type' : 'advisories' ,
724+ 'url' : 'https://github.com/wolfSSL/wolfssl/security/advisories' },
725+ {'type' : 'security-contact' ,
726+ 'url' : 'https://www.wolfssl.com/.well-known/security.txt' },
727+ ],
728+ 'properties' : properties ,
729+ }
730+ # Sub-component file entries (CycloneDX file-typed components nested
731+ # under the library). Autotools paths nest the linked library
732+ # binary so an auditor running a CDX parser can resolve the SHA-256
733+ # in `hashes` back to a concrete file path; embedded paths skip
734+ # this since the source-set Merkle hash already captures the inputs.
735+ if file_entries :
736+ main_component ['components' ] = [
737+ {
738+ 'type' : 'file' ,
739+ 'name' : fe ['name' ],
740+ 'hashes' : [
741+ {'alg' : 'SHA-1' , 'content' : fe ['sha1' ]},
742+ {'alg' : 'SHA-256' , 'content' : fe ['sha256' ]},
743+ ],
744+ }
745+ for fe in file_entries
746+ ]
673747
674748 return {
675749 '$schema' : 'http://cyclonedx.org/schema/bom-1.6.schema.json' ,
@@ -683,27 +757,11 @@ def generate_cdx(name, version, supplier, license_id, license_text, lib_hash,
683757 'components' : [{
684758 'type' : 'application' ,
685759 'author' : 'wolfSSL Inc.' ,
686- 'name' : 'wolfssl-sbom-gen' ,
687- 'version' : '1.0'
760+ 'name' : GEN_SBOM_TOOL_NAME ,
761+ 'version' : GEN_SBOM_VERSION ,
688762 }]
689763 },
690- 'component' : {
691- 'bom-ref' : bom_ref ,
692- 'type' : 'library' ,
693- 'supplier' : {'name' : supplier },
694- 'name' : name ,
695- 'version' : version ,
696- 'licenses' : cdx_license_block (license_id , license_text ),
697- 'copyright' : f'Copyright (C) 2006-{ year } wolfSSL Inc.' ,
698- 'cpe' : f'cpe:2.3:a:wolfssl:{ name } :{ version } :*:*:*:*:*:*:*' ,
699- 'purl' : f'pkg:generic/{ name } @{ version } ' ,
700- 'hashes' : [{'alg' : 'SHA-256' , 'content' : lib_hash }],
701- 'externalReferences' : [{
702- 'type' : 'vcs' ,
703- 'url' : 'https://github.com/wolfSSL/wolfssl'
704- }],
705- 'properties' : properties ,
706- }
764+ 'component' : main_component ,
707765 },
708766 'components' : components ,
709767 'dependencies' : [
@@ -716,17 +774,35 @@ def generate_cdx(name, version, supplier, license_id, license_text, lib_hash,
716774def generate_spdx (name , version , supplier , license_id , license_text , lib_hash ,
717775 timestamp , year , doc_ns_uuid , enabled_deps , build_props ,
718776 dep_version_overrides = None , hash_kind = 'library-binary' ,
719- srcs_basenames = None , document_namespace = None ):
777+ srcs_basenames = None , document_namespace = None ,
778+ file_entries = None ):
720779 build_defines = ', ' .join (k for k , _ in build_props )
721- # Only annotate the comment when running the source-merkle entry
722- # point. The autotools / library-binary path keeps its existing
723- # output shape byte-identical so reproducibility CI does not
724- # regress.
725- if hash_kind != 'library-binary' :
726- build_defines += f' | hash-kind={ hash_kind } '
727- if srcs_basenames :
728- build_defines += (
729- ' | source-set=' + ',' .join (srcs_basenames ))
780+ # Hash-kind / source-set / bomsh-traced-binary information used to
781+ # be stuffed into the package `comment` as `key=value` slugs, which
782+ # forced anyone reading the SPDX to grep free-form text. SPDX 2.3
783+ # §8.5 provides `annotations[]` for exactly this -- structured
784+ # producer notes that validators understand and downstream parsers
785+ # can consume directly. The `comment` field now carries only the
786+ # build-config define list a human reader scans first.
787+
788+ # Annotations on the wolfssl package: structured producer notes
789+ # that the comment field used to carry as positional `key=value`
790+ # slugs. Covered by the SPDX 2.3 §8.5 schema, so validators see
791+ # them as first-class data instead of opaque text.
792+ annotations = []
793+
794+ def _annotate (payload ):
795+ annotations .append ({
796+ 'annotationDate' : timestamp ,
797+ 'annotationType' : 'OTHER' ,
798+ 'annotator' : f'Tool: { GEN_SBOM_TOOL_NAME } -{ GEN_SBOM_VERSION } ' ,
799+ 'comment' : payload ,
800+ })
801+
802+ _annotate (f'wolfssl:sbom:hash-kind={ hash_kind } ' )
803+ if srcs_basenames :
804+ _annotate ('wolfssl:sbom:source-set=' + ',' .join (srcs_basenames ))
805+
730806 wolfssl_pkg = {
731807 'SPDXID' : 'SPDXRef-Package-wolfssl' ,
732808 'name' : name ,
@@ -739,6 +815,7 @@ def generate_spdx(name, version, supplier, license_id, license_text, lib_hash,
739815 'licenseDeclared' : license_id ,
740816 'copyrightText' : f'Copyright (C) 2006-{ year } wolfSSL Inc.' ,
741817 'comment' : f'Build configuration defines: { build_defines } ' ,
818+ 'annotations' : annotations ,
742819 'externalRefs' : [
743820 {
744821 'referenceCategory' : 'SECURITY' ,
@@ -750,11 +827,36 @@ def generate_spdx(name, version, supplier, license_id, license_text, lib_hash,
750827 {
751828 'referenceCategory' : 'PACKAGE-MANAGER' ,
752829 'referenceType' : 'purl' ,
753- 'referenceLocator' : f'pkg:generic/{ name } @{ version } '
754- }
830+ 'referenceLocator' : f'pkg:github/wolfSSL/{ name } @v{ version } ' ,
831+ },
832+ {
833+ 'referenceCategory' : 'SECURITY' ,
834+ 'referenceType' : 'advisory' ,
835+ 'referenceLocator' : (
836+ 'https://github.com/wolfSSL/wolfssl/security/advisories'
837+ ),
838+ },
755839 ],
756840 }
757841
842+ # No SPDX `files[]` / `hasFiles[]` inventory. spdx-tools (the
843+ # validator the autotools `make sbom` recipe runs) treats any
844+ # `hasFiles` linkage as an implicit CONTAINS relationship, and
845+ # SPDX 2.3 forbids package elements when `filesAnalyzed` is False.
846+ # Flipping `filesAnalyzed` to True is not honest for wolfSSL: the
847+ # package contains hundreds of source/header files, of which we
848+ # only enumerate the linked binary, and `packageVerificationCode`
849+ # under §8.10 requires every file in the package to be hashed.
850+ # The CycloneDX side (which is more permissive about file
851+ # sub-components) carries the linked-binary inventory; the SPDX
852+ # side relies on the package-level SHA-256 plus the
853+ # `wolfssl:sbom:hash-kind` annotation to identify the artefact.
854+ # `file_entries` is accepted for parameter symmetry with
855+ # generate_cdx but ignored here; if a future SPDX 2.4 / 3.0 model
856+ # makes file inventory cleanly compatible with `filesAnalyzed:
857+ # False`, this is the place to add it back.
858+ del file_entries # unused on the SPDX side; see comment above.
859+
758860 packages = [wolfssl_pkg ]
759861 relationships = [{
760862 'spdxElementId' : 'SPDXRef-DOCUMENT' ,
@@ -788,7 +890,7 @@ def generate_spdx(name, version, supplier, license_id, license_text, lib_hash,
788890 'creationInfo' : {
789891 'creators' : [
790892 f'Organization: { supplier } ' ,
791- 'Tool: wolfssl-sbom-gen-1.0'
893+ f 'Tool: { GEN_SBOM_TOOL_NAME } - { GEN_SBOM_VERSION } ' ,
792894 ],
793895 'created' : timestamp ,
794896 },
@@ -990,6 +1092,7 @@ def main():
9901092 args .user_settings_define ,
9911093 )
9921094
1095+ file_entries = None
9931096 if args .lib :
9941097 # Refuse the empty-file SHA-256 as a component checksum. A
9951098 # build that points --lib at /dev/null, a stub touch(1)'d
@@ -1007,9 +1110,20 @@ def main():
10071110 "to emit an SBOM with the empty-file SHA-256 as the "
10081111 "component checksum. Verify your build produced a "
10091112 "real library artefact." )
1010- lib_hash = sha256_file (args .lib )
1113+ lib_sha1 , lib_hash = sha1_sha256_file (args .lib )
10111114 hash_kind = 'library-binary'
10121115 srcs_basenames = None
1116+ # Single SPDX file entry / CycloneDX file sub-component for
1117+ # the linked library, so the SBOM names the artefact whose
1118+ # SHA-256 it is reporting (rather than only carrying the hash
1119+ # in `checksums[]`). Auditors and downstream tooling can
1120+ # then cross-reference the binary by its canonical filename
1121+ # without out-of-band knowledge of the build layout.
1122+ file_entries = [{
1123+ 'name' : os .path .basename (args .lib ),
1124+ 'sha1' : lib_sha1 ,
1125+ 'sha256' : lib_hash ,
1126+ }]
10131127 else :
10141128 # --srcs is the embedded entry point. Zero-byte files in the
10151129 # set are uncommon but not necessarily wrong (a cross-compile
@@ -1040,6 +1154,7 @@ def main():
10401154 enabled_deps , build_props ,
10411155 dep_version_overrides = dep_version_overrides ,
10421156 hash_kind = hash_kind , srcs_basenames = srcs_basenames ,
1157+ file_entries = file_entries ,
10431158 )
10441159 spdx = generate_spdx (
10451160 args .name , args .version , args .supplier ,
@@ -1048,6 +1163,7 @@ def main():
10481163 dep_version_overrides = dep_version_overrides ,
10491164 hash_kind = hash_kind , srcs_basenames = srcs_basenames ,
10501165 document_namespace = (args .document_namespace or None ),
1166+ file_entries = file_entries ,
10511167 )
10521168
10531169 try :
0 commit comments