|
| 1 | +#!/usr/bin/env bash |
| 2 | +# |
| 3 | +# libabigail: deterministic strip-and-repack of upstream `libabigail-2.9.tar.xz` |
| 4 | +# with the PR30329 testsuite fixture set (which trips anti-malware scanning on |
| 5 | +# the AZL RPM-signing pipeline) removed. The corresponding two entries in |
| 6 | +# `tests/test-abidiff-exit.cc` that exercise the removed fixture are dropped |
| 7 | +# by a companion overlay patch (see `libabigail.comp.toml`); this script does |
| 8 | +# file removal only, no in-tarball source patching. |
| 9 | +# Rationale lives in the comp.toml `replace-reason` field. |
| 10 | +# |
| 11 | +# Usage: bash base/comps/libabigail/modify_source.sh |
| 12 | +# Output: base/build/work/scratch/libabigail/libabigail-2.9.tar.xz (+ .sha512) |
| 13 | +# The upstream tarball is cached under a `.upstream` suffix; re-runs reuse it. |
| 14 | + |
| 15 | +set -euo pipefail |
| 16 | + |
| 17 | +# Pin umask so the extraction step below produces the same mode bits |
| 18 | +# regardless of the caller's umask. With `--no-same-permissions`, tar ANDs |
| 19 | +# each entry's mode against `~umask`, so e.g. umask 077 would silently strip |
| 20 | +# group/other read bits and change the bytes of the repacked tarball. The |
| 21 | +# repack step does not re-assert per-file modes (only owner/group/mtime), so |
| 22 | +# this pin is what guarantees a byte-identical output across machines. |
| 23 | +umask 022 |
| 24 | + |
| 25 | +# --- Constants -------------------------------------------------------------- |
| 26 | + |
| 27 | +readonly COMPONENT="libabigail" |
| 28 | +readonly UPSTREAM_VERSION="2.9" |
| 29 | +readonly UPSTREAM_FILENAME="${COMPONENT}-${UPSTREAM_VERSION}.tar.xz" |
| 30 | +readonly UPSTREAM_TOPDIR="${COMPONENT}-${UPSTREAM_VERSION}" |
| 31 | +readonly UPSTREAM_URL="https://mirrors.kernel.org/sourceware/libabigail/${UPSTREAM_FILENAME}" |
| 32 | + |
| 33 | +readonly UPSTREAM_SHA512="5bdf5ec49a5931a61bf28317b41eee583d6277d00ac621b2d2a97bbc0d816c3662bcfe13a5ac7aeee11c947afb69a5a0a9a8015fcebad09965b45af9b1e23606" |
| 34 | + |
| 35 | +# Directory (relative to ${UPSTREAM_TOPDIR}) to strip in its entirety. The |
| 36 | +# PR30329 fixture set is a libabigail abidiff regression test built around a |
| 37 | +# pair of stripped sqlite3 shared libraries + their separated debuginfo + |
| 38 | +# dwz-multifile components. The two `libsqlite3.so.0.8.6.debug` separated- |
| 39 | +# debuginfo files inside it are flagged as encrypted/unscannable payloads by |
| 40 | +# the AV scanner ("packer_high_entropy:eod") in the AZL RPM-signing pipeline. |
| 41 | +# We strip the whole PR30329/ directory (not just the two .debug files) so |
| 42 | +# nothing in the tarball still references the missing pieces; the two |
| 43 | +# corresponding `InOutSpec` entries in tests/test-abidiff-exit.cc are dropped |
| 44 | +# by the companion overlay patch |
| 45 | +# `tests-drop-PR30329-fixture-entries.patch` (see libabigail.comp.toml) so |
| 46 | +# `make check` still passes. |
| 47 | +readonly REMOVE_DIRS=( |
| 48 | + "tests/data/test-abidiff-exit/PR30329" |
| 49 | +) |
| 50 | + |
| 51 | +# Deterministic-repack mtime: 2020-01-01T00:00:00Z (1577836800). |
| 52 | +# Any fixed epoch works; do not change without also bumping the |
| 53 | +# `hash` in libabigail.comp.toml. |
| 54 | +readonly DETERMINISTIC_MTIME="@1577836800" |
| 55 | + |
| 56 | +# --- Work directory --------------------------------------------------------- |
| 57 | + |
| 58 | +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 59 | +REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" |
| 60 | +WORKDIR="${REPO_ROOT}/base/build/work/scratch/${COMPONENT}" |
| 61 | + |
| 62 | +mkdir -p "${WORKDIR}" |
| 63 | +cd "${WORKDIR}" |
| 64 | + |
| 65 | +echo "[1/5] Working in ${WORKDIR}" |
| 66 | + |
| 67 | +# --- Download upstream ------------------------------------------------------ |
| 68 | +# |
| 69 | +# The upstream tarball is cached under a `.upstream` suffix so that |
| 70 | +# the repacked output written at the canonical `${UPSTREAM_FILENAME}` |
| 71 | +# path below cannot clobber the cache on re-runs. Treat the cache |
| 72 | +# as authoritative only after SHA-512 verification. |
| 73 | + |
| 74 | +UPSTREAM_CACHE="${WORKDIR}/${UPSTREAM_FILENAME}.upstream" |
| 75 | + |
| 76 | +if [[ ! -f "${UPSTREAM_CACHE}" ]]; then |
| 77 | + echo "[2/5] Downloading ${UPSTREAM_FILENAME} from ${UPSTREAM_URL}" |
| 78 | + # `--proto` / `--proto-redir` restrict the initial request *and* any |
| 79 | + # redirect target to HTTPS, so a downgrade to plain HTTP is refused. |
| 80 | + curl -fsSL --retry 3 \ |
| 81 | + --proto '=https' --proto-redir '=https' \ |
| 82 | + -o "${UPSTREAM_CACHE}.part" "${UPSTREAM_URL}" |
| 83 | + mv "${UPSTREAM_CACHE}.part" "${UPSTREAM_CACHE}" |
| 84 | +else |
| 85 | + echo "[2/5] Using cached upstream tarball ${UPSTREAM_CACHE}" |
| 86 | +fi |
| 87 | + |
| 88 | +# --- Verify upstream SHA-512 ------------------------------------------------ |
| 89 | + |
| 90 | +echo "[3/5] Verifying upstream SHA-512" |
| 91 | +COMPUTED_UPSTREAM_SHA512="$(sha512sum "${UPSTREAM_CACHE}" | awk '{print $1}')" |
| 92 | +if [[ "${COMPUTED_UPSTREAM_SHA512}" != "${UPSTREAM_SHA512}" ]]; then |
| 93 | + echo "ERROR: upstream SHA-512 mismatch (cache may be corrupt; delete ${UPSTREAM_CACHE} and re-run)" >&2 |
| 94 | + echo " expected: ${UPSTREAM_SHA512}" >&2 |
| 95 | + echo " computed: ${COMPUTED_UPSTREAM_SHA512}" >&2 |
| 96 | + exit 1 |
| 97 | +fi |
| 98 | + |
| 99 | +# --- Extract + strip -------------------------------------------------------- |
| 100 | + |
| 101 | +echo "[4/5] Extracting and stripping ${#REMOVE_DIRS[@]} fixture dir(s) from ${UPSTREAM_TOPDIR}" |
| 102 | +rm -rf "${WORKDIR}/${UPSTREAM_TOPDIR}" |
| 103 | +# `--no-same-owner` / `--no-same-permissions` prevent tar from applying the |
| 104 | +# archive's uid/gid/mode bits to the extracted tree. They are already the |
| 105 | +# default for non-root users, but explicit hardening makes the script safe |
| 106 | +# to run under sudo (where the defaults flip) and defends against any |
| 107 | +# setuid/setgid bits or unexpected ownership in the upstream tarball. |
| 108 | +# Deterministic owner/group is re-asserted in the repack step below. |
| 109 | +tar -C "${WORKDIR}" --no-same-owner --no-same-permissions -xf "${UPSTREAM_CACHE}" |
| 110 | +for REMOVE_DIR in "${REMOVE_DIRS[@]}"; do |
| 111 | + if [[ ! -d "${WORKDIR}/${UPSTREAM_TOPDIR}/${REMOVE_DIR}" ]]; then |
| 112 | + echo "ERROR: expected '${UPSTREAM_TOPDIR}/${REMOVE_DIR}' not present in upstream tarball" >&2 |
| 113 | + exit 1 |
| 114 | + fi |
| 115 | + echo " stripping ${UPSTREAM_TOPDIR}/${REMOVE_DIR}" |
| 116 | + rm -rf "${WORKDIR}/${UPSTREAM_TOPDIR}/${REMOVE_DIR}" |
| 117 | +done |
| 118 | + |
| 119 | +# --- Repack deterministically ----------------------------------------------- |
| 120 | + |
| 121 | +echo "[5/5] Repacking deterministically as ${UPSTREAM_FILENAME}" |
| 122 | +# Deterministic flags: |
| 123 | +# --sort=name stable entry order |
| 124 | +# --owner=0 --group=0 no host uid/gid leakage |
| 125 | +# --numeric-owner force numeric uid/gid |
| 126 | +# --mtime=@<epoch> fixed mtime |
| 127 | +# --format=gnu handles long paths deterministically |
| 128 | +# LC_ALL=C pins sort collation so --sort=name is locale-independent. |
| 129 | +# xz -9e -T1 picks max compression with single-threaded output (multi-threaded |
| 130 | +# xz produces non-deterministic byte streams). The upstream tarball is .xz so |
| 131 | +# we re-emit .xz to keep the filename and Source0 unchanged. |
| 132 | +# |
| 133 | +# Heads-up: this step is slow. libabigail-2.9 unpacks to ~990 MiB (the source |
| 134 | +# tree is dominated by abidiff regression-test fixtures), so the single- |
| 135 | +# threaded `xz -9e` pass below is on the order of minutes, not seconds. |
| 136 | +# Reference timing on a 12th-gen Intel desktop (i9-12900K, 12 vCPUs): ~6-7 |
| 137 | +# minutes wall time for the full tar+xz pipeline (xz dominates; tar itself |
| 138 | +# is a few seconds). The download (~500 MiB) and extract/strip steps before |
| 139 | +# this finish in well under a minute on the same hardware. Slower CPUs can |
| 140 | +# easily push this past 10 minutes -- so if it looks hung, give it time. |
| 141 | +MODIFIED_TARBALL="${WORKDIR}/${UPSTREAM_FILENAME}" |
| 142 | +rm -f "${MODIFIED_TARBALL}" |
| 143 | +LC_ALL=C tar \ |
| 144 | + -C "${WORKDIR}" \ |
| 145 | + --sort=name \ |
| 146 | + --owner=0 --group=0 --numeric-owner \ |
| 147 | + --mtime="${DETERMINISTIC_MTIME}" \ |
| 148 | + --format=gnu \ |
| 149 | + -cf - "${UPSTREAM_TOPDIR}" \ |
| 150 | + | xz -9e -T1 -c > "${MODIFIED_TARBALL}" |
| 151 | + |
| 152 | +MODIFIED_SHA512="$(sha512sum "${MODIFIED_TARBALL}" | awk '{print $1}')" |
| 153 | +echo "${MODIFIED_SHA512} ${UPSTREAM_FILENAME}" > "${MODIFIED_TARBALL}.sha512" |
| 154 | + |
| 155 | +echo |
| 156 | +echo "================================================================" |
| 157 | +echo "DONE" |
| 158 | +echo " modified tarball: ${WORKDIR}/${UPSTREAM_FILENAME}" |
| 159 | +echo " SHA512: ${MODIFIED_SHA512}" |
| 160 | +echo "================================================================" |
| 161 | +echo |
| 162 | +echo " To upload the modified tarball to the lookaside:" |
| 163 | +echo " az storage blob upload \\" |
| 164 | +echo " --auth-mode login \\" |
| 165 | +echo " --account-name azltempstaginglookaside \\" |
| 166 | +echo " --container-name repo \\" |
| 167 | +echo " --name \"pkgs_modified/${COMPONENT}/${UPSTREAM_FILENAME}/sha512/${MODIFIED_SHA512}/${UPSTREAM_FILENAME}\" \\" |
| 168 | +echo " --file \"${WORKDIR}/${UPSTREAM_FILENAME}\"" |
0 commit comments