11#!/usr/bin/env python3
22"""End-to-end verifier for the bomsh provenance bundle.
33
4- Three independent self-consistency checks on the artefacts that
4+ Two independent self-consistency checks on the artefacts that
55`make bomsh` produces. The PERSISTENT-ID assertion in the bomsh CI
66job only proves the gitoid externalRef *exists* in the enriched SPDX;
7- none of these follow-up properties are guaranteed by it:
7+ neither of these follow-up properties is guaranteed by it:
88
99 (A) Resolvability -- every gitoid in the SPDX externalRefs resolves
10- to a blob present at omnibor/objects/<aa>/<rest>.
10+ to a blob present at omnibor/objects/<aa>/<rest>. Catches the
11+ `bomsh_sbom.py` regression class that emits a syntactically
12+ well-formed gitoid which does not actually point at anything in
13+ the shipped ADG.
1114
1215 (B) Object-store integrity -- every blob in omnibor/objects/
1316 round-trips through sha1(b"blob <len>\\ 0" + content), so a
1417 corrupt or truncated object store is caught at PR time, not by
1518 a downstream verifier weeks later.
1619
17- (C) Artefact correspondence -- the gitoid recorded against the
18- wolfSSL package equals the gitoid bomsh itself recorded for the
19- library it traced (read from the `_bomsh.artefact` manifest the
20- bomsh: Makefile target writes as '<path>\\ t<gitoid>' BEFORE
21- `make sbom` runs). This is the strongest claim the bomsh
22- pipeline alone can make: the SPDX agrees with what bomsh saw.
23-
24- Comparing against bomsh's own recorded gitoid (rather than
25- against the on-disk file's *current* bytes) is deliberate.
26- `make sbom`'s subsequent `make install` step relinks
27- src/.libs/lib*.so* in place via libtool to fix RPATH, mutating
28- the bytes after bomsh has already gitoid-ed them. The verifier
29- still hashes the on-disk file and emits a NOTE if it has
30- diverged, so the install-time relink remains visible without
31- causing a false negative on the bomsh<->SPDX agreement.
32-
33- Without this, a future `bomsh_sbom.py` change that emits a
34- plausibly-shaped but fictional gitoid (one that does not resolve in
35- the ADG, or resolves but to a different artefact than bomsh recorded)
36- would pass the existing PERSISTENT-ID assertion and ship a provenance
37- bundle whose externalRef is a lie.
38-
3920CLI form (used by `.github/workflows/sbom.yml`):
4021
4122 python3 scripts/bomsh_verify.py \\
4223 --spdx-glob 'omnibor.wolfssl-*.spdx.json' \\
43- --omnibor-dir omnibor \\
44- --artefact-manifest _bomsh.artefact
24+ --omnibor-dir omnibor
4525
4626Library form (used by scripts/test_gen_sbom.py):
4727
5535import json
5636import os
5737import sys
58- from typing import List , Tuple
38+ from typing import List
5939
6040
6141GITOID_LOCATOR_PREFIX = 'gitoid:blob:sha1:'
@@ -159,59 +139,11 @@ def check_object_store_integrity(omnibor_objects_dir):
159139 return obj_count , bad
160140
161141
162- def parse_artefact_manifest (manifest_path ):
163- """Parse the `_bomsh.artefact` manifest written by the bomsh:
164- recipe. Format: a single line, `<absolute-path>\\ t<gitoid-hex>`
165- -- both fields captured by the recipe AFTER bomtrace3 finishes
166- but BEFORE `make sbom` relinks the library.
167-
168- Returns (path, recorded_gid). Raises FileNotFoundError if the
169- manifest does not exist (bomsh: skipped artefact discovery, e.g.
170- no built library); raises ValueError if the line is malformed."""
171- if not os .path .isfile (manifest_path ):
172- raise FileNotFoundError (
173- f'{ manifest_path } not produced by `make bomsh`; cannot '
174- f'verify gitoid <-> artefact correspondence. This usually '
175- f'means the bomsh enrichment step skipped the artefact-'
176- f'discovery loop (no built library).' )
177- with open (manifest_path ) as f :
178- line = f .readline ().rstrip ('\n ' )
179- if not line :
180- raise ValueError (
181- f'{ manifest_path } is empty; bomsh: recipe wrote nothing' )
182- parts = line .split ('\t ' )
183- if len (parts ) != 2 or not all (parts ):
184- raise ValueError (
185- f'{ manifest_path } : expected "<path>\\ t<gitoid>", got { line !r} . '
186- f'Re-run `make bomsh` against an up-to-date Makefile.am.' )
187- return parts [0 ], parts [1 ]
188-
189-
190- def check_artefact_correspondence (spdx_gitoids , recorded_gid ,
191- package_name_substr = 'wolfssl' ):
192- """(C) The gitoid bomsh recorded for the traced library matches a
193- gitoid externalRef on the wolfSSL SPDX package. This is the
194- bomsh<->SPDX agreement check; it does NOT compare against the
195- on-disk file's current bytes (see module docstring).
196-
197- Returns (matched, wolfssl_gids). Raises ValueError if no SPDX
198- gitoid is associated with a wolfSSL-named package."""
199- wolfssl_gids = [gid for name , gid in spdx_gitoids
200- if package_name_substr in name .lower ()]
201- if not wolfssl_gids :
202- raise ValueError (
203- f'no SPDX gitoid externalRef on a package whose name '
204- f'contains { package_name_substr !r} ; cannot verify '
205- f'artefact correspondence' )
206- return recorded_gid in wolfssl_gids , wolfssl_gids
207-
208-
209- def verify (spdx_glob , omnibor_dir , artefact_manifest ,
210- package_name_substr = 'wolfssl' ):
211- """Orchestrate the three checks. Returns (ok: bool, messages:
142+ def verify (spdx_glob , omnibor_dir ):
143+ """Orchestrate the two checks. Returns (ok: bool, messages:
212144 List[str]). `messages` is appended to in success and failure both,
213- so callers can log the success line ('OK: N gitoids verified ...')
214- even when ok is True."""
145+ so callers can log the success lines ('OK: N gitoid(s) verified' +
146+ ' objects round-trip: M blobs') even when ok is True."""
215147 messages : List [str ] = []
216148
217149 spdx_paths = sorted (_glob .glob (spdx_glob ))
@@ -247,50 +179,8 @@ def verify(spdx_glob, omnibor_dir, artefact_manifest,
247179 f'round-trip (object store is corrupt)' )
248180 return False , messages
249181
250- try :
251- artefact , recorded_gid = parse_artefact_manifest (artefact_manifest )
252- except (FileNotFoundError , ValueError ) as e :
253- messages .append (str (e ))
254- return False , messages
255-
256- try :
257- matched , wolfssl_gids = check_artefact_correspondence (
258- spdx_gitoids , recorded_gid , package_name_substr )
259- except ValueError as e :
260- messages .append (str (e ))
261- return False , messages
262-
263- if not matched :
264- messages .append (
265- f'wolfSSL package SPDX gitoids { wolfssl_gids } do not '
266- f'include the gitoid bomsh recorded for the traced '
267- f'artefact { artefact } ({ recorded_gid } ); the SBOM is '
268- f'inconsistent with what bomsh actually saw' )
269- return False , messages
270-
271182 messages .append (f'OK: { len (spdx_gitoids )} gitoid(s) verified' )
272183 messages .append (f' objects round-trip: { obj_count } blobs' )
273- messages .append (
274- f' artefact match: { artefact } -> { recorded_gid } (bomsh-traced)' )
275-
276- # Diagnostic-only: the on-disk file may have been rewritten since
277- # bomsh saw it (the canonical case is `make sbom`'s `make install`
278- # step relinking via libtool to fix RPATH). We do NOT fail on
279- # this -- the SBOM<->bomsh agreement above is what matters for
280- # the provenance proof -- but surfacing it as a NOTE keeps the
281- # divergence visible so it does not silently grow into a
282- # bigger gap (e.g. someone adds a strip step that goes unflagged).
283- if os .path .isfile (artefact ):
284- on_disk = gitoid_sha1 (artefact )
285- if on_disk != recorded_gid :
286- messages .append (
287- f'NOTE: on-disk { artefact } now has gitoid { on_disk } , '
288- f'but bomsh recorded { recorded_gid } . This is expected '
289- f'when `make sbom` runs `make install` (libtool relinks '
290- f'src/.libs/lib*.so* in place to fix RPATH). The SBOM '
291- f'attests to the bomsh-traced bytes; if you need it to '
292- f'attest to the *installed* bytes, the bomsh: recipe '
293- f'must trace `make install` too.' )
294184 return True , messages
295185
296186
@@ -304,17 +194,9 @@ def main():
304194 parser .add_argument ('--omnibor-dir' , default = 'omnibor' ,
305195 help = 'Path to the OmniBOR directory containing '
306196 'objects/ (default: %(default)s)' )
307- parser .add_argument ('--artefact-manifest' , default = '_bomsh.artefact' ,
308- help = 'Path to the file containing the artefact '
309- 'path that bomsh: traced (default: %(default)s)' )
310- parser .add_argument ('--package-name-substr' , default = 'wolfssl' ,
311- help = 'Case-insensitive substring used to identify '
312- 'the wolfSSL SPDX package among any others in '
313- 'the document (default: %(default)s)' )
314197 args = parser .parse_args ()
315198
316- ok , messages = verify (args .spdx_glob , args .omnibor_dir ,
317- args .artefact_manifest , args .package_name_substr )
199+ ok , messages = verify (args .spdx_glob , args .omnibor_dir )
318200 for line in messages :
319201 print (line , file = sys .stderr if not ok else sys .stdout )
320202 sys .exit (0 if ok else 1 )
0 commit comments