From eef9d6c9ed75062763d2e1f3960d366f30384441 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Tue, 21 Apr 2026 19:20:27 -0700 Subject: [PATCH 1/8] ENH: adopt PEP 770, add SBOM to wheel Use auditwheel/delvewheel to generate SBOMs for bundled native libraries. Add scripts/generate_sbom.py driven by LICENSES/vendored.toml to produce a CycloneDX SBOM for pandas' vendored code, and scripts/cibw_repair_wheel.py to inject it into the repaired wheel under .dist-info/sboms/. The custom injection script is transitional; it can be removed once meson-python grows native PEP 770 support (mesonbuild/meson-python#763). --- .gitattributes | 4 +- .github/workflows/wheels.yml | 10 ++ LICENSES/PYUPGRADE_LICENSE | 19 ---- LICENSES/vendored.toml | 97 +++++++++++++++++ MANIFEST.in | 4 + environment.yml | 3 + pyproject.toml | 11 +- requirements-dev.txt | 1 + scripts/cibw_repair_wheel.py | 199 ++++++++++++++++++++++++++++++++++ scripts/generate_sbom.py | 200 +++++++++++++++++++++++++++++++++++ 10 files changed, 526 insertions(+), 22 deletions(-) delete mode 100644 LICENSES/PYUPGRADE_LICENSE create mode 100644 LICENSES/vendored.toml create mode 100644 scripts/cibw_repair_wheel.py create mode 100644 scripts/generate_sbom.py diff --git a/.gitattributes b/.gitattributes index da9f2fe243744..39626f2e882a7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -78,5 +78,7 @@ environment.yml export-ignore # exclude the whole directory to avoid running related tests in sdist pandas/tests/io/parser/data export-ignore -# Include cibw script in sdist since it's needed for building wheels +# Include cibw scripts in sdist since they're needed for building wheels scripts/cibw_before_build.sh -export-ignore +scripts/cibw_repair_wheel.py -export-ignore +scripts/generate_sbom.py -export-ignore diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b359f5b5026ee..27e668ef2c7af 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -145,6 +145,16 @@ jobs: - name: Validate wheel RECORD run: for whl in $(ls wheelhouse); do wheel unpack wheelhouse/$whl -d /tmp; done + - name: Validate SBOM (PEP 770) + shell: bash -el {0} + run: | + pip install check-jsonschema + for whl in wheelhouse/*.whl; do + echo "Validating SBOM in $whl..." + unzip -p "$whl" "*/sboms/pandas.cdx.json" > /tmp/sbom.json + check-jsonschema --schemafile "https://cyclonedx.org/schema/bom-1.6.schema.json" /tmp/sbom.json + done + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 with: name: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} diff --git a/LICENSES/PYUPGRADE_LICENSE b/LICENSES/PYUPGRADE_LICENSE deleted file mode 100644 index edeac73dade04..0000000000000 --- a/LICENSES/PYUPGRADE_LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2017 Anthony Sottile - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/LICENSES/vendored.toml b/LICENSES/vendored.toml new file mode 100644 index 0000000000000..ea4f585bbcd7f --- /dev/null +++ b/LICENSES/vendored.toml @@ -0,0 +1,97 @@ +# Vendored components manifest for SBOM generation +# This file documents code that pandas has derived from or incorporates from other projects. +# Used by scripts/generate_sbom.py to generate CycloneDX SBOM for PEP 770 compliance. +# +# License field supports: +# - Single SPDX ID: "MIT" +# - SPDX expression for dual-license: "Apache-2.0 OR BSD-3-Clause" + +[[component]] +name = "numpy" +license = "BSD-3-Clause" +license_file = "NUMPY_LICENSE" +description = "Derived algorithms and array handling code" +purl = "pkg:pypi/numpy" +website = "https://numpy.org" + +[[component]] +name = "bottleneck" +license = "BSD-2-Clause" +license_file = "BOTTLENECK_LICENCE" +description = "Derived reduction algorithms" +purl = "pkg:pypi/bottleneck" +website = "https://github.com/pydata/bottleneck" + +[[component]] +name = "python-dateutil" +# BSD applies to all code; Apache applies to contributions after 2017-12-01 +license = "Apache-2.0 AND BSD-3-Clause" +license_file = "DATEUTIL_LICENSE" +description = "Derived date parsing routines" +purl = "pkg:pypi/python-dateutil" +website = "https://github.com/dateutil/dateutil" + +[[component]] +name = "klib" +license = "MIT" +license_file = "KLIB_LICENSE" +description = "Derived hash table implementation (khash)" +purl = "pkg:github/attractivechaos/klib" +website = "https://github.com/attractivechaos/klib" + +[[component]] +name = "musl" +license = "MIT" +license_file = "MUSL_LICENSE" +description = "Derived ASCII character classification functions (isdigit, isspace, etc.)" +purl = "pkg:generic/musl" +website = "https://musl.libc.org" + +[[component]] +name = "pyperclip" +license = "BSD-3-Clause" +license_file = "PYPERCLIP_LICENSE" +description = "Derived clipboard utilities" +purl = "pkg:pypi/pyperclip" +website = "https://github.com/asweigart/pyperclip" + +[[component]] +name = "sas7bdat" +license = "MIT" +license_file = "SAS7BDAT_LICENSE" +description = "Derived SAS file reader code" +purl = "pkg:pypi/sas7bdat" +website = "https://github.com/jaredhobbs/sas7bdat" + +[[component]] +name = "ultrajson" +license = "BSD-3-Clause" +license_file = "ULTRAJSON_LICENSE" +description = "Derived JSON parsing code" +purl = "pkg:pypi/ujson" +website = "https://github.com/ultrajson/ultrajson" + +[[component]] +name = "haven" +license = "MIT" +license_file = "HAVEN_LICENSE" +description = "Derived SPSS/Stata reader code" +purl = "pkg:cran/haven" +website = "https://github.com/tidyverse/haven" + +[[component]] +name = "packaging" +# Dual-licensed: user can choose either license +license = "Apache-2.0 OR BSD-2-Clause" +license_file = "PACKAGING_LICENSE" +description = "Derived version parsing code" +purl = "pkg:pypi/packaging" +website = "https://github.com/pypa/packaging" + +[[component]] +name = "cpython" +license = "PSF-2.0" +license_file = "PSF_LICENSE" +description = "Derived Python standard library code" +purl = "pkg:generic/cpython" +website = "https://github.com/python/cpython" diff --git a/MANIFEST.in b/MANIFEST.in index 9894381ed6252..4dc8bedf040f2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -62,3 +62,7 @@ prune pandas/tests/io/parser/data # Selectively re-add *.cxx files that were excluded above graft pandas/_libs/src graft pandas/_libs/include + +# Include cibw scripts in sdist since they're needed for building wheels +include scripts/cibw_repair_wheel.py +include scripts/generate_sbom.py diff --git a/environment.yml b/environment.yml index 4eb9812d20892..5a7a9604f79dd 100644 --- a/environment.yml +++ b/environment.yml @@ -25,6 +25,9 @@ dependencies: - python-dateutil - numpy<3 + # SBOM validation + - jsonschema + # optional dependencies - adbc-driver-postgresql>=1.2.0 - adbc-driver-sqlite>=1.2.0 diff --git a/pyproject.toml b/pyproject.toml index 614581fee266b..0182eb24d236f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,7 @@ matplotlib = "pandas:plotting._matplotlib" [project.optional-dependencies] test = ['hypothesis>=6.116.0', 'pytest>=8.3.4', 'pytest-xdist>=3.6.1'] +sbom = ['check-jsonschema'] pyarrow = ['pyarrow>=13.0.0'] performance = ['bottleneck>=1.4.2', 'numba>=0.60.0', 'numexpr>=2.10.2'] computation = ['scipy>=1.14.1', 'xarray>=2024.10.0'] @@ -166,6 +167,10 @@ test-command = """ pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db", "--numprocesses=2", "--dist=worksteal", "--no-strict-data-files"]); \ pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db", "--no-strict-data-files"]);' \ """ +# Repair wheel and inject SBOM (PEP 770) +# auditwheel 6.5.0+ auto-generates SBOM for bundled .so files +# Our script adds the vendored code SBOM on top of that +repair-wheel-command = "python ./scripts/cibw_repair_wheel.py {wheel} {dest_dir}" [tool.cibuildwheel.windows] config-settings = { setup-args = ["--vsenv"] } @@ -177,7 +182,7 @@ test-command = """ pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '--numprocesses=2', '--dist=worksteal', '--no-strict-data-files']); \ pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files']);" \ """ -repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}" +repair-wheel-command = "python ./scripts/cibw_repair_wheel.py {wheel} {dest_dir}" [[tool.cibuildwheel.overrides]] select = "*-musllinux*" @@ -186,11 +191,13 @@ before-test = "apk update && apk add musl-locales" [[tool.cibuildwheel.overrides]] select = "*-macosx*" environment = {CFLAGS="-g0"} +repair-wheel-command = "python ./scripts/cibw_repair_wheel.py {wheel} {dest_dir}" [[tool.cibuildwheel.overrides]] select = "*pyodide*" # Pyodide repairs wheels on its own, using auditwheel-emscripten -repair-wheel-command = "" +# We just inject SBOM using the macos path (copy + inject, no repair tool) +repair-wheel-command = "python ./scripts/cibw_repair_wheel.py {wheel} {dest_dir}" # https://github.com/pyodide/pyodide/issues/5805 build-verbosity = 1 build-frontend = "build" diff --git a/requirements-dev.txt b/requirements-dev.txt index 024f407500e3f..23fcaa1463133 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,6 +15,7 @@ PyQt5>=5.15.9 coverage python-dateutil numpy<3 +jsonschema adbc-driver-postgresql>=1.2.0 adbc-driver-sqlite>=1.2.0 beautifulsoup4>=4.12.3 diff --git a/scripts/cibw_repair_wheel.py b/scripts/cibw_repair_wheel.py new file mode 100644 index 0000000000000..62d42478b0a49 --- /dev/null +++ b/scripts/cibw_repair_wheel.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +Repair wheel and inject SBOM for PEP 770 compliance. + +This script: +1. Runs the platform-specific wheel repair tool (auditwheel/delvewheel) +2. Injects the vendored code SBOM into the repaired wheel + +Usage: + python scripts/cibw_repair_wheel.py +""" + +import argparse +import base64 +import hashlib +from pathlib import Path +import shutil +import subprocess +import sys +import tempfile +import zipfile + + +def get_wheel_dist_info(wheel_path: Path) -> str: + """Get the .dist-info directory name from a wheel.""" + with zipfile.ZipFile(wheel_path, "r") as zf: + for name in zf.namelist(): + if ".dist-info/" in name: + # Extract just the dist-info directory name + return name.split("/")[0] + raise ValueError(f"No .dist-info directory found in {wheel_path}") + + +def inject_sbom(wheel_path: Path, sbom_path: Path) -> None: + """Inject SBOM into wheel's .dist-info/sboms/ directory.""" + dist_info = get_wheel_dist_info(wheel_path) + sbom_wheel_path = f"{dist_info}/sboms/{sbom_path.name}" + + # Read existing wheel contents + with zipfile.ZipFile(wheel_path, "a") as zf: + # Check if SBOM already exists (e.g., from auditwheel) + existing_sboms = [n for n in zf.namelist() if "/sboms/" in n] + if existing_sboms: + print(f" Existing SBOMs in wheel: {existing_sboms}") + + # Add our vendored code SBOM + print(f" Adding {sbom_wheel_path}") + zf.write(sbom_path, sbom_wheel_path) + + # Update RECORD file + update_record(wheel_path, sbom_wheel_path, sbom_path) + + +def update_record(wheel_path: Path, sbom_wheel_path: str, sbom_path: Path) -> None: + """Update the RECORD file in the wheel to include the SBOM.""" + # Calculate hash of SBOM file + with open(sbom_path, "rb") as f: + content = f.read() + sha256_hash = hashlib.sha256(content).digest() + hash_digest = base64.urlsafe_b64encode(sha256_hash).rstrip(b"=").decode("ascii") + + # Format: path,hash,size + record_line = f"{sbom_wheel_path},sha256={hash_digest},{len(content)}" + + # Read existing RECORD, append new entry + with zipfile.ZipFile(wheel_path, "r") as zf: + dist_info = get_wheel_dist_info(wheel_path) + record_path = f"{dist_info}/RECORD" + record_content = zf.read(record_path).decode("utf-8") + + # Append SBOM entry to RECORD + record_lines = record_content.rstrip("\n").split("\n") + record_lines.append(record_line) + new_record = "\n".join(record_lines) + "\n" + + # Rewrite wheel with updated RECORD + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + + # Extract wheel + with zipfile.ZipFile(wheel_path, "r") as zf: + zf.extractall(tmp_path) + + # Update RECORD + (tmp_path / record_path).write_text(new_record, encoding="utf-8") + + # Repack wheel + wheel_path.unlink() + with zipfile.ZipFile(wheel_path, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for file_path in tmp_path.rglob("*"): + if file_path.is_file(): + arcname = str(file_path.relative_to(tmp_path)) + zf.write(file_path, arcname) + + +def repair_wheel_linux(wheel: Path, dest_dir: Path) -> Path: + """Repair wheel using auditwheel (Linux).""" + # auditwheel 6.5.0+ automatically generates SBOM for bundled libs + subprocess.run( + ["auditwheel", "repair", "-w", str(dest_dir), str(wheel)], + check=True, + ) + # Find the repaired wheel + repaired = list(dest_dir.glob("*.whl")) + if not repaired: + raise RuntimeError("No repaired wheel found") + return repaired[0] + + +def repair_wheel_windows(wheel: Path, dest_dir: Path) -> Path: + """Repair wheel using delvewheel (Windows).""" + subprocess.run( + ["delvewheel", "repair", "-w", str(dest_dir), str(wheel)], + check=True, + ) + # Find the repaired wheel + repaired = list(dest_dir.glob("*.whl")) + if not repaired: + raise RuntimeError("No repaired wheel found") + return repaired[0] + + +def repair_wheel_macos(wheel: Path, dest_dir: Path) -> Path: + """Copy wheel for macOS (no repair needed).""" + dest = dest_dir / wheel.name + shutil.copy(wheel, dest) + return dest + + +def main() -> None: + """Main entry point.""" + parser = argparse.ArgumentParser(description="Repair wheel and inject SBOM") + parser.add_argument("wheel", type=Path, help="Wheel file to repair") + parser.add_argument("dest_dir", type=Path, help="Destination directory") + args = parser.parse_args() + + args.dest_dir.mkdir(parents=True, exist_ok=True) + + print(f"Repairing wheel: {args.wheel}") + print(f"Platform: {sys.platform}") + print(f"Destination: {args.dest_dir}") + + # Check if this is a Pyodide wheel (built in Linux container but for wasm32) + wheel_name = args.wheel.name.lower() + is_pyodide = "pyodide" in wheel_name or "wasm32" in wheel_name + + # Step 1: Run platform-specific repair + if is_pyodide: + # Pyodide wheels are already repaired by auditwheel-emscripten + # Just copy and inject SBOM + print("Detected Pyodide wheel, skipping native repair") + repaired_wheel = repair_wheel_macos(args.wheel, args.dest_dir) + elif sys.platform == "linux": + repaired_wheel = repair_wheel_linux(args.wheel, args.dest_dir) + elif sys.platform in ["win32", "cygwin"]: + repaired_wheel = repair_wheel_windows(args.wheel, args.dest_dir) + elif sys.platform == "darwin": + repaired_wheel = repair_wheel_macos(args.wheel, args.dest_dir) + else: + raise RuntimeError(f"Unsupported platform: {sys.platform}") + + print(f"Repaired wheel: {repaired_wheel}") + + # Step 2: Generate and inject SBOM + script_dir = Path(__file__).parent + sbom_script = script_dir / "generate_sbom.py" + + # Get version from wheel name (e.g., pandas-3.0.0-cp311-...) + version = repaired_wheel.stem.split("-")[1] + + # Generate SBOM to temp file + with tempfile.NamedTemporaryFile(suffix=".cdx.json", delete=False) as f: + sbom_path = Path(f.name) + + final_sbom = sbom_path.parent / "pandas.cdx.json" + try: + print( + f"Running: {sys.executable} {sbom_script} {sbom_path} --version {version}" + ) + subprocess.run( + [sys.executable, str(sbom_script), str(sbom_path), "--version", version], + check=True, + ) + print(f"Generated SBOM: {sbom_path}") + + sbom_path.rename(final_sbom) + + print(f"Injecting SBOM into {repaired_wheel}") + inject_sbom(repaired_wheel, final_sbom) + print("SBOM injection complete") + finally: + if sbom_path.exists(): + sbom_path.unlink() + if final_sbom.exists(): + final_sbom.unlink() + + +if __name__ == "__main__": + main() diff --git a/scripts/generate_sbom.py b/scripts/generate_sbom.py new file mode 100644 index 0000000000000..a03b6079c4347 --- /dev/null +++ b/scripts/generate_sbom.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +""" +Generate a CycloneDX SBOM for pandas vendored components. + +This script generates a Software Bill of Materials (SBOM) in CycloneDX 1.6 format +documenting code that pandas has derived from or incorporates from other projects. +This is in compliance with PEP 770. + +The vendored components are defined in LICENSES/vendored.toml. + +Usage: + python scripts/generate_sbom.py output_path + python scripts/generate_sbom.py - # print to stdout + +To validate the generated SBOM: + check-jsonschema --schemafile \ + https://cyclonedx.org/schema/bom-1.6.schema.json output.json +""" + +import argparse +from datetime import ( + datetime, + timezone, +) +import json +from pathlib import Path +import tomllib +import uuid + + +def is_spdx_expression(license_str: str) -> bool: + """Check if a license string is an SPDX expression (vs a single ID).""" + # SPDX expressions contain operators like OR, AND, WITH + return any(op in license_str for op in (" OR ", " AND ", " WITH ")) + + +def load_vendored_components(manifest_path: Path | None = None) -> list[dict]: + """Load vendored components from LICENSES/vendored.toml manifest.""" + if manifest_path is None: + # Default to LICENSES/vendored.toml relative to repo root + repo_root = Path(__file__).parent.parent + manifest_path = repo_root / "LICENSES" / "vendored.toml" + + with manifest_path.open("rb") as f: + manifest = tomllib.load(f) + + components = [] + for comp in manifest.get("component", []): + license_str = comp["license"] + components.append( + { + "name": comp["name"], + "bom_ref": f"{comp['name']}-derived", + "description": comp["description"], + "license": license_str, + "is_expression": is_spdx_expression(license_str), + "purl": comp["purl"], + "website": comp["website"], + } + ) + return components + + +def get_pandas_version() -> str: + """Get the pandas version from installed package. + + During CI wheel builds, use --version flag instead since pandas + is not installed at that point. + """ + try: + from pandas import __version__ + + return __version__ + except ImportError: + # Return placeholder if pandas is not installed + return "0.0.0.dev0" + + +def generate_sbom( + version: str | None = None, manifest_path: Path | None = None +) -> dict: + """Generate the CycloneDX SBOM document.""" + if version is None: + version = get_pandas_version() + + vendored_components = load_vendored_components(manifest_path) + + timestamp = datetime.now(timezone.utc).isoformat() + serial_number = f"urn:uuid:{uuid.uuid4()}" + + # Build components list + components = [] + dependency_refs = [] + + for comp in vendored_components: + # CycloneDX uses "expression" for SPDX expressions, "id" for single license + if comp["is_expression"]: + license_entry = {"expression": comp["license"]} + else: + license_entry = {"license": {"id": comp["license"]}} + + component = { + "type": "library", + "bom-ref": comp["bom_ref"], + "name": comp["name"], + "description": comp["description"], + "licenses": [license_entry], + "purl": comp["purl"], + "externalReferences": [ + {"type": "website", "url": comp["website"]}, + ], + } + components.append(component) + dependency_refs.append(comp["bom_ref"]) + + sbom = { + "$schema": "https://cyclonedx.org/schema/bom-1.6.schema.json", + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": serial_number, + "version": 1, + "metadata": { + "timestamp": timestamp, + "tools": { + "components": [ + { + "type": "application", + "name": "pandas-sbom-generator", + "version": "1.0.0", + } + ] + }, + "component": { + "type": "library", + "name": "pandas", + "version": version, + "purl": f"pkg:pypi/pandas@{version}", + "description": "Powerful data structures for data analysis, " + "time series, and statistics", + "licenses": [{"license": {"id": "BSD-3-Clause"}}], + "externalReferences": [ + {"type": "website", "url": "https://pandas.pydata.org"}, + { + "type": "vcs", + "url": "https://github.com/pandas-dev/pandas", + }, + { + "type": "documentation", + "url": "https://pandas.pydata.org/docs/", + }, + ], + }, + }, + "components": components, + "dependencies": [ + { + "ref": f"pkg:pypi/pandas@{version}", + "dependsOn": dependency_refs, + } + ], + } + + return sbom + + +def main() -> None: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate CycloneDX SBOM for pandas vendored components" + ) + parser.add_argument( + "output", + nargs="?", + default="-", + help="Output file path (use '-' for stdout, default: stdout)", + ) + parser.add_argument( + "--version", + help="Override pandas version (default: auto-detect)", + ) + parser.add_argument( + "--manifest", + type=Path, + help="Path to vendored.toml manifest (default: LICENSES/vendored.toml)", + ) + args = parser.parse_args() + + sbom = generate_sbom(version=args.version, manifest_path=args.manifest) + sbom_json = json.dumps(sbom, indent=2, ensure_ascii=False) + + if args.output == "-": + print(sbom_json) + else: + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(sbom_json, encoding="utf-8") + + +if __name__ == "__main__": + main() From 060ebd8bf06505dc2ffeb37e3a5fee70f96b867b Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Wed, 22 Apr 2026 13:21:29 -0700 Subject: [PATCH 2/8] REF: route SBOM via py.dist_info_install_dir() instead of repair wheel Switch the PEP 770 SBOM injection mechanism from a custom repair-wheel-command script to Meson's python.dist_info_install_dir() helper (Meson >=1.12.0) wired up via meson-python (>=0.20.0). The build-time SBOM generation now lives in a custom_target() in meson.build. meson-python recognises the {py_distinfo} install placeholder and routes the output into pandas-*.dist-info/sboms/ as the wheel is packed -- no post-build wheel surgery, no custom repair-wheel-command override. Net effect on the PR: - delete scripts/cibw_repair_wheel.py (~200 lines) - drop the four repair-wheel-command overrides from pyproject.toml (revert to upstream cibuildwheel defaults) - add a 10-line custom_target() to meson.build - bump build-system requires for meson and meson-python Addresses the reviewer concern about maintaining custom wheel-injection machinery: the only pandas-side code is now the SBOM generator (scripts/generate_sbom.py) and the vendored-component manifest (LICENSES/vendored.toml). --- .gitattributes | 3 +- MANIFEST.in | 3 +- meson.build | 19 +++- pyproject.toml | 14 +-- scripts/cibw_repair_wheel.py | 199 ----------------------------------- 5 files changed, 22 insertions(+), 216 deletions(-) delete mode 100644 scripts/cibw_repair_wheel.py diff --git a/.gitattributes b/.gitattributes index 39626f2e882a7..65aa648cc791d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -78,7 +78,6 @@ environment.yml export-ignore # exclude the whole directory to avoid running related tests in sdist pandas/tests/io/parser/data export-ignore -# Include cibw scripts in sdist since they're needed for building wheels +# Include cibw script and the SBOM generator in sdist scripts/cibw_before_build.sh -export-ignore -scripts/cibw_repair_wheel.py -export-ignore scripts/generate_sbom.py -export-ignore diff --git a/MANIFEST.in b/MANIFEST.in index 4dc8bedf040f2..c67e0e565a766 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -63,6 +63,5 @@ prune pandas/tests/io/parser/data graft pandas/_libs/src graft pandas/_libs/include -# Include cibw scripts in sdist since they're needed for building wheels -include scripts/cibw_repair_wheel.py +# Include the SBOM generator in sdist; meson.build invokes it via custom_target. include scripts/generate_sbom.py diff --git a/meson.build b/meson.build index d2874e85cc3a7..2b24db8214b3b 100644 --- a/meson.build +++ b/meson.build @@ -6,7 +6,7 @@ project( 'cython', version: run_command(['generate_version.py', '--print'], check: true).stdout().strip(), license: 'BSD-3', - meson_version: '>=1.2.3', + meson_version: '>=1.12.0', default_options: [ 'buildtype=release', 'c_std=c17', @@ -95,4 +95,21 @@ endif # Needed by pandas.test() when it looks for the pytest ini options py.install_sources('pyproject.toml', subdir: 'pandas') +# PEP 770 SBOM for vendored components. Generated at build time from +# LICENSES/vendored.toml; meson-python (>=0.20.0) routes the output into +# the wheel's .dist-info/sboms/ via the {py_distinfo} placeholder. +custom_target( + 'pandas-vendored-sbom', + output: 'pandas-vendored.cdx.json', + command: [ + py, + files('scripts/generate_sbom.py'), + '@OUTPUT@', + '--version', + meson.project_version(), + ], + install: true, + install_dir: py.dist_info_install_dir('sboms'), +) + subdir('pandas') diff --git a/pyproject.toml b/pyproject.toml index 0182eb24d236f..0dcb5d481d445 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,8 @@ # Minimum requirements for the build system to execute. # See https://github.com/scipy/scipy/pull/12940 for the AIX issue. requires = [ - "meson-python>=0.19.0,<1", - "meson>=1.2.3,<2", + "meson-python>=0.20.0,<1", + "meson>=1.12.0,<2", "wheel", "Cython>3.1.0,<4.0.0a0", # Note: sync with environment.yml and asv.conf.json # Force numpy higher than 2.0, so that built wheels are compatible @@ -35,7 +35,6 @@ license-files = [ "LICENSES/PACKAGING_LICENSE", # Apache-2.0 "LICENSES/PSF_LICENSE", # PSF-2.0 "LICENSES/PYPERCLIP_LICENSE", # BSD-3-Clause - "LICENSES/PYUPGRADE_LICENSE", # MIT "LICENSES/SAS7BDAT_LICENSE", # MIT "LICENSES/ULTRAJSON_LICENSE", # BSD-3-Clause AND TCL "subprojects/fast_float-*/LICENSE-APACHE", # Apache-2.0 @@ -167,10 +166,6 @@ test-command = """ pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db", "--numprocesses=2", "--dist=worksteal", "--no-strict-data-files"]); \ pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db", "--no-strict-data-files"]);' \ """ -# Repair wheel and inject SBOM (PEP 770) -# auditwheel 6.5.0+ auto-generates SBOM for bundled .so files -# Our script adds the vendored code SBOM on top of that -repair-wheel-command = "python ./scripts/cibw_repair_wheel.py {wheel} {dest_dir}" [tool.cibuildwheel.windows] config-settings = { setup-args = ["--vsenv"] } @@ -182,7 +177,6 @@ test-command = """ pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '--numprocesses=2', '--dist=worksteal', '--no-strict-data-files']); \ pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files']);" \ """ -repair-wheel-command = "python ./scripts/cibw_repair_wheel.py {wheel} {dest_dir}" [[tool.cibuildwheel.overrides]] select = "*-musllinux*" @@ -191,13 +185,9 @@ before-test = "apk update && apk add musl-locales" [[tool.cibuildwheel.overrides]] select = "*-macosx*" environment = {CFLAGS="-g0"} -repair-wheel-command = "python ./scripts/cibw_repair_wheel.py {wheel} {dest_dir}" [[tool.cibuildwheel.overrides]] select = "*pyodide*" -# Pyodide repairs wheels on its own, using auditwheel-emscripten -# We just inject SBOM using the macos path (copy + inject, no repair tool) -repair-wheel-command = "python ./scripts/cibw_repair_wheel.py {wheel} {dest_dir}" # https://github.com/pyodide/pyodide/issues/5805 build-verbosity = 1 build-frontend = "build" diff --git a/scripts/cibw_repair_wheel.py b/scripts/cibw_repair_wheel.py deleted file mode 100644 index 62d42478b0a49..0000000000000 --- a/scripts/cibw_repair_wheel.py +++ /dev/null @@ -1,199 +0,0 @@ -#!/usr/bin/env python3 -""" -Repair wheel and inject SBOM for PEP 770 compliance. - -This script: -1. Runs the platform-specific wheel repair tool (auditwheel/delvewheel) -2. Injects the vendored code SBOM into the repaired wheel - -Usage: - python scripts/cibw_repair_wheel.py -""" - -import argparse -import base64 -import hashlib -from pathlib import Path -import shutil -import subprocess -import sys -import tempfile -import zipfile - - -def get_wheel_dist_info(wheel_path: Path) -> str: - """Get the .dist-info directory name from a wheel.""" - with zipfile.ZipFile(wheel_path, "r") as zf: - for name in zf.namelist(): - if ".dist-info/" in name: - # Extract just the dist-info directory name - return name.split("/")[0] - raise ValueError(f"No .dist-info directory found in {wheel_path}") - - -def inject_sbom(wheel_path: Path, sbom_path: Path) -> None: - """Inject SBOM into wheel's .dist-info/sboms/ directory.""" - dist_info = get_wheel_dist_info(wheel_path) - sbom_wheel_path = f"{dist_info}/sboms/{sbom_path.name}" - - # Read existing wheel contents - with zipfile.ZipFile(wheel_path, "a") as zf: - # Check if SBOM already exists (e.g., from auditwheel) - existing_sboms = [n for n in zf.namelist() if "/sboms/" in n] - if existing_sboms: - print(f" Existing SBOMs in wheel: {existing_sboms}") - - # Add our vendored code SBOM - print(f" Adding {sbom_wheel_path}") - zf.write(sbom_path, sbom_wheel_path) - - # Update RECORD file - update_record(wheel_path, sbom_wheel_path, sbom_path) - - -def update_record(wheel_path: Path, sbom_wheel_path: str, sbom_path: Path) -> None: - """Update the RECORD file in the wheel to include the SBOM.""" - # Calculate hash of SBOM file - with open(sbom_path, "rb") as f: - content = f.read() - sha256_hash = hashlib.sha256(content).digest() - hash_digest = base64.urlsafe_b64encode(sha256_hash).rstrip(b"=").decode("ascii") - - # Format: path,hash,size - record_line = f"{sbom_wheel_path},sha256={hash_digest},{len(content)}" - - # Read existing RECORD, append new entry - with zipfile.ZipFile(wheel_path, "r") as zf: - dist_info = get_wheel_dist_info(wheel_path) - record_path = f"{dist_info}/RECORD" - record_content = zf.read(record_path).decode("utf-8") - - # Append SBOM entry to RECORD - record_lines = record_content.rstrip("\n").split("\n") - record_lines.append(record_line) - new_record = "\n".join(record_lines) + "\n" - - # Rewrite wheel with updated RECORD - with tempfile.TemporaryDirectory() as tmpdir: - tmp_path = Path(tmpdir) - - # Extract wheel - with zipfile.ZipFile(wheel_path, "r") as zf: - zf.extractall(tmp_path) - - # Update RECORD - (tmp_path / record_path).write_text(new_record, encoding="utf-8") - - # Repack wheel - wheel_path.unlink() - with zipfile.ZipFile(wheel_path, "w", compression=zipfile.ZIP_DEFLATED) as zf: - for file_path in tmp_path.rglob("*"): - if file_path.is_file(): - arcname = str(file_path.relative_to(tmp_path)) - zf.write(file_path, arcname) - - -def repair_wheel_linux(wheel: Path, dest_dir: Path) -> Path: - """Repair wheel using auditwheel (Linux).""" - # auditwheel 6.5.0+ automatically generates SBOM for bundled libs - subprocess.run( - ["auditwheel", "repair", "-w", str(dest_dir), str(wheel)], - check=True, - ) - # Find the repaired wheel - repaired = list(dest_dir.glob("*.whl")) - if not repaired: - raise RuntimeError("No repaired wheel found") - return repaired[0] - - -def repair_wheel_windows(wheel: Path, dest_dir: Path) -> Path: - """Repair wheel using delvewheel (Windows).""" - subprocess.run( - ["delvewheel", "repair", "-w", str(dest_dir), str(wheel)], - check=True, - ) - # Find the repaired wheel - repaired = list(dest_dir.glob("*.whl")) - if not repaired: - raise RuntimeError("No repaired wheel found") - return repaired[0] - - -def repair_wheel_macos(wheel: Path, dest_dir: Path) -> Path: - """Copy wheel for macOS (no repair needed).""" - dest = dest_dir / wheel.name - shutil.copy(wheel, dest) - return dest - - -def main() -> None: - """Main entry point.""" - parser = argparse.ArgumentParser(description="Repair wheel and inject SBOM") - parser.add_argument("wheel", type=Path, help="Wheel file to repair") - parser.add_argument("dest_dir", type=Path, help="Destination directory") - args = parser.parse_args() - - args.dest_dir.mkdir(parents=True, exist_ok=True) - - print(f"Repairing wheel: {args.wheel}") - print(f"Platform: {sys.platform}") - print(f"Destination: {args.dest_dir}") - - # Check if this is a Pyodide wheel (built in Linux container but for wasm32) - wheel_name = args.wheel.name.lower() - is_pyodide = "pyodide" in wheel_name or "wasm32" in wheel_name - - # Step 1: Run platform-specific repair - if is_pyodide: - # Pyodide wheels are already repaired by auditwheel-emscripten - # Just copy and inject SBOM - print("Detected Pyodide wheel, skipping native repair") - repaired_wheel = repair_wheel_macos(args.wheel, args.dest_dir) - elif sys.platform == "linux": - repaired_wheel = repair_wheel_linux(args.wheel, args.dest_dir) - elif sys.platform in ["win32", "cygwin"]: - repaired_wheel = repair_wheel_windows(args.wheel, args.dest_dir) - elif sys.platform == "darwin": - repaired_wheel = repair_wheel_macos(args.wheel, args.dest_dir) - else: - raise RuntimeError(f"Unsupported platform: {sys.platform}") - - print(f"Repaired wheel: {repaired_wheel}") - - # Step 2: Generate and inject SBOM - script_dir = Path(__file__).parent - sbom_script = script_dir / "generate_sbom.py" - - # Get version from wheel name (e.g., pandas-3.0.0-cp311-...) - version = repaired_wheel.stem.split("-")[1] - - # Generate SBOM to temp file - with tempfile.NamedTemporaryFile(suffix=".cdx.json", delete=False) as f: - sbom_path = Path(f.name) - - final_sbom = sbom_path.parent / "pandas.cdx.json" - try: - print( - f"Running: {sys.executable} {sbom_script} {sbom_path} --version {version}" - ) - subprocess.run( - [sys.executable, str(sbom_script), str(sbom_path), "--version", version], - check=True, - ) - print(f"Generated SBOM: {sbom_path}") - - sbom_path.rename(final_sbom) - - print(f"Injecting SBOM into {repaired_wheel}") - inject_sbom(repaired_wheel, final_sbom) - print("SBOM injection complete") - finally: - if sbom_path.exists(): - sbom_path.unlink() - if final_sbom.exists(): - final_sbom.unlink() - - -if __name__ == "__main__": - main() From 764d4376faf0d07d9081fe564884905c7c9ff5b3 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Wed, 22 Apr 2026 13:55:21 -0700 Subject: [PATCH 3/8] FIX: SBOM filename + restore pyodide repair-wheel-command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two blockers the earlier rework introduced: 1. The custom_target output was named 'pandas-vendored.cdx.json', but the Validate SBOM CI step in .github/workflows/wheels.yml looks for '*/sboms/pandas.cdx.json'. Rename the generator output to match — one canonical location for the vendored-code SBOM. 2. Dropping the custom repair-wheel-command override from the [*pyodide*] cibuildwheel override accidentally took the `repair-wheel-command = ""` line upstream had for Pyodide with it. Without that empty override, cibuildwheel falls back to its Linux default (auditwheel), which cannot repair emscripten wheels. Restore the empty override with a comment explaining why. --- meson.build | 2 +- pyproject.toml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 2b24db8214b3b..6bd6e2f62a82d 100644 --- a/meson.build +++ b/meson.build @@ -100,7 +100,7 @@ py.install_sources('pyproject.toml', subdir: 'pandas') # the wheel's .dist-info/sboms/ via the {py_distinfo} placeholder. custom_target( 'pandas-vendored-sbom', - output: 'pandas-vendored.cdx.json', + output: 'pandas.cdx.json', command: [ py, files('scripts/generate_sbom.py'), diff --git a/pyproject.toml b/pyproject.toml index 0dcb5d481d445..a2bc8e7f93f81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -188,6 +188,8 @@ environment = {CFLAGS="-g0"} [[tool.cibuildwheel.overrides]] select = "*pyodide*" +# Pyodide repairs wheels on its own, using auditwheel-emscripten. +repair-wheel-command = "" # https://github.com/pyodide/pyodide/issues/5805 build-verbosity = 1 build-frontend = "build" From d5f6a56e8579c56283b0eadd5c6b1fec8c7667a9 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Wed, 22 Apr 2026 23:55:48 -0700 Subject: [PATCH 4/8] BUG: address SBOM review findings Five review-round-two findings, fixed together because they all touch the SBOM pipeline: - Restore repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}" under [tool.cibuildwheel.windows]. The rework dropped the custom script override without restoring the upstream default, so DLL-dependent Windows wheels were going un-bundled. - Correct the ultrajson license expression in LICENSES/vendored.toml from "BSD-3-Clause" to "BSD-3-Clause AND TCL". The vendored ujson code carries TCL-licensed material from its double-to-ascii routine, as already noted in pyproject.toml's license-files list and LICENSES/ULTRAJSON_LICENSE. - Make the generated SBOM reproducible: honour SOURCE_DATE_EPOCH for the metadata timestamp (matching meson-python's wheel timestamp convention), and derive the serialNumber deterministically from a SHA-256 of the manifest bytes + pandas version rather than uuid4(). Same vendored.toml + same version now yields byte-identical SBOM output across builds. - Add a bom-ref to metadata.component and reuse that exact value in dependencies[0].ref. CycloneDX dependency-graph consumers expect dependencies[].ref to resolve to a bom-ref in the BOM. - Restore LICENSES/PYUPGRADE_LICENSE and its pyproject license-files entry, plus a pyupgrade component in LICENSES/vendored.toml. The earlier deletion was premature -- pyupgrade-derived code still lives in scripts/validate_unwanted_patterns.py, so the license file needs to ship alongside it. Audit and removal of that code is a separate exercise. --- LICENSES/PYUPGRADE_LICENSE | 19 +++++++++++++++ LICENSES/vendored.toml | 13 +++++++++- pyproject.toml | 2 ++ scripts/generate_sbom.py | 49 ++++++++++++++++++++++++++++++++++---- 4 files changed, 77 insertions(+), 6 deletions(-) create mode 100644 LICENSES/PYUPGRADE_LICENSE diff --git a/LICENSES/PYUPGRADE_LICENSE b/LICENSES/PYUPGRADE_LICENSE new file mode 100644 index 0000000000000..edeac73dade04 --- /dev/null +++ b/LICENSES/PYUPGRADE_LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2017 Anthony Sottile + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/LICENSES/vendored.toml b/LICENSES/vendored.toml index ea4f585bbcd7f..dd1a0544a2120 100644 --- a/LICENSES/vendored.toml +++ b/LICENSES/vendored.toml @@ -63,9 +63,20 @@ description = "Derived SAS file reader code" purl = "pkg:pypi/sas7bdat" website = "https://github.com/jaredhobbs/sas7bdat" +[[component]] +name = "pyupgrade" +license = "MIT" +license_file = "PYUPGRADE_LICENSE" +description = "Ported unwanted-pattern check in scripts/validate_unwanted_patterns.py" +purl = "pkg:pypi/pyupgrade" +website = "https://github.com/asottile/pyupgrade" + [[component]] name = "ultrajson" -license = "BSD-3-Clause" +# Per LICENSES/ULTRAJSON_LICENSE: BSD-3-Clause for ultrajson itself, +# plus TCL-licensed portions derived from the double-to-ascii routine +# (see header in pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c). +license = "BSD-3-Clause AND TCL" license_file = "ULTRAJSON_LICENSE" description = "Derived JSON parsing code" purl = "pkg:pypi/ujson" diff --git a/pyproject.toml b/pyproject.toml index a2bc8e7f93f81..48da5e16611ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ license-files = [ "LICENSES/PACKAGING_LICENSE", # Apache-2.0 "LICENSES/PSF_LICENSE", # PSF-2.0 "LICENSES/PYPERCLIP_LICENSE", # BSD-3-Clause + "LICENSES/PYUPGRADE_LICENSE", # MIT "LICENSES/SAS7BDAT_LICENSE", # MIT "LICENSES/ULTRAJSON_LICENSE", # BSD-3-Clause AND TCL "subprojects/fast_float-*/LICENSE-APACHE", # Apache-2.0 @@ -177,6 +178,7 @@ test-command = """ pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '--numprocesses=2', '--dist=worksteal', '--no-strict-data-files']); \ pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files']);" \ """ +repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}" [[tool.cibuildwheel.overrides]] select = "*-musllinux*" diff --git a/scripts/generate_sbom.py b/scripts/generate_sbom.py index a03b6079c4347..2ea911691648e 100644 --- a/scripts/generate_sbom.py +++ b/scripts/generate_sbom.py @@ -22,10 +22,11 @@ datetime, timezone, ) +import hashlib import json +import os from pathlib import Path import tomllib -import uuid def is_spdx_expression(license_str: str) -> bool: @@ -76,17 +77,48 @@ def get_pandas_version() -> str: return "0.0.0.dev0" +def _reproducible_timestamp() -> str: + """Return an ISO-8601 timestamp honoring SOURCE_DATE_EPOCH if set. + + Matches the reproducible-builds convention already honored by + meson-python for wheel file mtimes. Falls back to wall-clock UTC + only when no SOURCE_DATE_EPOCH is provided. + """ + sde = os.environ.get("SOURCE_DATE_EPOCH") + if sde: + return datetime.fromtimestamp(int(sde), timezone.utc).isoformat() + return datetime.now(timezone.utc).isoformat() + + +def _deterministic_serial(version: str, manifest_path: Path) -> str: + """Build a urn:uuid serialNumber deterministic in pandas version + manifest. + + CycloneDX requires serialNumber to be unique per BOM, but for + reproducible builds we derive it from inputs so repeated invocations + produce byte-identical output. Hashing manifest bytes + pandas + version yields a stable UUID that still changes when either input + changes. + """ + manifest_bytes = manifest_path.read_bytes() + digest = hashlib.sha256(manifest_bytes + version.encode("utf-8")).hexdigest() + # Lay out the 32-hex digest as a canonical UUID string (8-4-4-4-12). + u = f"{digest[0:8]}-{digest[8:12]}-{digest[12:16]}-{digest[16:20]}-{digest[20:32]}" + return f"urn:uuid:{u}" + + def generate_sbom( version: str | None = None, manifest_path: Path | None = None ) -> dict: """Generate the CycloneDX SBOM document.""" if version is None: version = get_pandas_version() + if manifest_path is None: + manifest_path = Path(__file__).parent.parent / "LICENSES" / "vendored.toml" vendored_components = load_vendored_components(manifest_path) - timestamp = datetime.now(timezone.utc).isoformat() - serial_number = f"urn:uuid:{uuid.uuid4()}" + timestamp = _reproducible_timestamp() + serial_number = _deterministic_serial(version, manifest_path) # Build components list components = [] @@ -113,6 +145,12 @@ def generate_sbom( components.append(component) dependency_refs.append(comp["bom_ref"]) + # Single bom-ref shared by metadata.component and dependencies[0] + # so CycloneDX consumers can resolve the root of the dependency + # graph. See + # https://cyclonedx.org/use-cases/software-dependencies/. + root_bom_ref = f"pkg:pypi/pandas@{version}" + sbom = { "$schema": "https://cyclonedx.org/schema/bom-1.6.schema.json", "bomFormat": "CycloneDX", @@ -132,9 +170,10 @@ def generate_sbom( }, "component": { "type": "library", + "bom-ref": root_bom_ref, "name": "pandas", "version": version, - "purl": f"pkg:pypi/pandas@{version}", + "purl": root_bom_ref, "description": "Powerful data structures for data analysis, " "time series, and statistics", "licenses": [{"license": {"id": "BSD-3-Clause"}}], @@ -154,7 +193,7 @@ def generate_sbom( "components": components, "dependencies": [ { - "ref": f"pkg:pypi/pandas@{version}", + "ref": root_bom_ref, "dependsOn": dependency_refs, } ], From d8357fdea47fe87918ac2e4b2865690424d8e55e Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 23 Apr 2026 20:34:15 -0700 Subject: [PATCH 5/8] CLN: drop meson>=1.12.0 pin; use py.get_install_dir() for SBOM routing Earlier iteration of this PR required a new meson helper (python.dist_info_install_dir()) that would have landed in meson 1.12.0. Per reviewer feedback on the meson side, that helper is being dropped in favour of a meson-python-only mechanism: meson-python now detects files staged under {py_purelib}/-.dist-info/... in the install plan and reroutes them into the wheel's own .dist-info/. Pandas-side changes: - meson.build: compute the distinfo directory locally from meson.project_name() / meson.project_version() and install the SBOM custom_target output to py.get_install_dir() / distinfo / 'sboms'. - pyproject.toml: revert the meson>=1.12.0 pin; works with any meson version pandas already supports. No new upstream meson dependency. Still requires meson-python >= 0.20.0 for the dist-info prefix detection. --- meson.build | 10 ++++++---- pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/meson.build b/meson.build index 6bd6e2f62a82d..f645ebd842d0a 100644 --- a/meson.build +++ b/meson.build @@ -6,7 +6,7 @@ project( 'cython', version: run_command(['generate_version.py', '--print'], check: true).stdout().strip(), license: 'BSD-3', - meson_version: '>=1.12.0', + meson_version: '>=1.2.3', default_options: [ 'buildtype=release', 'c_std=c17', @@ -96,8 +96,10 @@ endif py.install_sources('pyproject.toml', subdir: 'pandas') # PEP 770 SBOM for vendored components. Generated at build time from -# LICENSES/vendored.toml; meson-python (>=0.20.0) routes the output into -# the wheel's .dist-info/sboms/ via the {py_distinfo} placeholder. +# LICENSES/vendored.toml; meson-python (>=0.20.0) recognises the +# -.dist-info/ prefix under {py_purelib} and routes the +# output into the wheel's own .dist-info/sboms/ at pack time. +distinfo = meson.project_name() + '-' + meson.project_version() + '.dist-info' custom_target( 'pandas-vendored-sbom', output: 'pandas.cdx.json', @@ -109,7 +111,7 @@ custom_target( meson.project_version(), ], install: true, - install_dir: py.dist_info_install_dir('sboms'), + install_dir: py.get_install_dir() / distinfo / 'sboms', ) subdir('pandas') diff --git a/pyproject.toml b/pyproject.toml index 48da5e16611ba..a5b54f4f2cde0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ # See https://github.com/scipy/scipy/pull/12940 for the AIX issue. requires = [ "meson-python>=0.20.0,<1", - "meson>=1.12.0,<2", + "meson>=1.2.3,<2", "wheel", "Cython>3.1.0,<4.0.0a0", # Note: sync with environment.yml and asv.conf.json # Force numpy higher than 2.0, so that built wheels are compatible From eaed75069f8814eb2b86f22b959daed7172eb725 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 23 Apr 2026 22:07:48 -0700 Subject: [PATCH 6/8] DOC: add PEP 770 SBOM entry to v3.1.0 whatsnew Record the new SBOM shipping at -.dist-info/sboms/pandas.cdx.json for pandas 3.1.0. Generated at build time from LICENSES/vendored.toml. --- doc/source/whatsnew/v3.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index f17809a216351..cab6eba7d5eb3 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -37,6 +37,7 @@ Other enhancements - Improved the precision of float parsing in :func:`read_csv` (:issue:`64395`) - Improved the string ``repr`` of :class:`pd.core.arrays.SparseArray` (:issue:`64547`) - MSVC is no longer required to build on Windows, and build errors when using the MinGW compiler have been fixed (:issue:`63160`) +- Wheels now include a `PEP 770 `_ Software Bill of Materials (SBOM) in CycloneDX format at ``-.dist-info/sboms/pandas.cdx.json``, describing vendored source-level components. Generated at build time from ``LICENSES/vendored.toml`` (:issue:`63479`) .. --------------------------------------------------------------------------- .. _whatsnew_310.notable_bug_fixes: From 0d4d4ee7bcb2fc733746de0cfb273ab1709cf574 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 23 Apr 2026 22:29:21 -0700 Subject: [PATCH 7/8] DOC: trim whatsnew SBOM entry to match neighbor style The original entry was 45 words; pandas whatsnew bullets are typically one short sentence (~20 words). Compact to fit. --- doc/source/whatsnew/v3.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index cab6eba7d5eb3..5c538025113e8 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -37,7 +37,7 @@ Other enhancements - Improved the precision of float parsing in :func:`read_csv` (:issue:`64395`) - Improved the string ``repr`` of :class:`pd.core.arrays.SparseArray` (:issue:`64547`) - MSVC is no longer required to build on Windows, and build errors when using the MinGW compiler have been fixed (:issue:`63160`) -- Wheels now include a `PEP 770 `_ Software Bill of Materials (SBOM) in CycloneDX format at ``-.dist-info/sboms/pandas.cdx.json``, describing vendored source-level components. Generated at build time from ``LICENSES/vendored.toml`` (:issue:`63479`) +- Wheels now include a `PEP 770 `_ CycloneDX Software Bill of Materials (SBOM) at ``.dist-info/sboms/pandas.cdx.json`` describing vendored components (:issue:`63479`) .. --------------------------------------------------------------------------- .. _whatsnew_310.notable_bug_fixes: From 5584c433b589a4fee8acea61980761735c5381a2 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Fri, 24 Apr 2026 00:05:48 -0700 Subject: [PATCH 8/8] BLD: temporarily pin meson-python to fork branch for CI This is a scaffold commit so CI can resolve the build dependency against mesonbuild/meson-python#843 while that PR is under review. The pandas SBOM routing relies on path-prefix detection added in that meson-python branch; once it lands in a released meson-python (0.20.0 or later), this commit should be dropped and the "meson-python>=0.20.0,<1" pin restored. NOT FOR MERGE. --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a5b54f4f2cde0..80ae49a81125f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,10 @@ # Minimum requirements for the build system to execute. # See https://github.com/scipy/scipy/pull/12940 for the AIX issue. requires = [ - "meson-python>=0.20.0,<1", + # TEMPORARY: pin to the meson-python fork branch that implements + # PEP 770 SBOM routing (mesonbuild/meson-python#843). Revert to + # "meson-python>=0.20.0,<1" once that PR lands in a released version. + "meson-python @ git+https://github.com/fangchenli/meson-python.git@feat/distinfo-placeholder", "meson>=1.2.3,<2", "wheel", "Cython>3.1.0,<4.0.0a0", # Note: sync with environment.yml and asv.conf.json