From b32702a5399f32712fbcd2d5f73e58ecbf48a70b Mon Sep 17 00:00:00 2001 From: Lawrence Elitzer Date: Sun, 12 Apr 2026 09:16:43 -0500 Subject: [PATCH 1/6] feat: add GHA workflow to build opencv-python-headless wheels without ffmpeg Builds opencv-python-headless from source with WITH_FFMPEG=OFF on both amd64 and arm64 using Chainguard wolfi-base. Wheels are uploaded as a GitHub release so Dockerfiles can pull them at build time, eliminating the 14 bundled ffmpeg CVEs from the stock PyPI wheels. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-opencv-wheels.yml | 113 ++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 .github/workflows/build-opencv-wheels.yml diff --git a/.github/workflows/build-opencv-wheels.yml b/.github/workflows/build-opencv-wheels.yml new file mode 100644 index 0000000000..8645f9228a --- /dev/null +++ b/.github/workflows/build-opencv-wheels.yml @@ -0,0 +1,113 @@ +name: Build OpenCV Wheels (No FFmpeg) + +# Produces opencv-python-headless wheels compiled from source with +# WITH_FFMPEG=OFF to eliminate bundled ffmpeg CVEs from PyPI wheels. +# Wheels are uploaded as a GitHub release so both unstructured and +# unstructured-api Dockerfiles can download them at build time. + +on: + workflow_dispatch: + inputs: + opencv_version: + description: "opencv-python-headless version to build (must have sdist on PyPI)" + required: true + default: "4.12.0.88" + release_tag: + description: "GitHub release tag for the wheels (e.g. opencv-4.12.0.88)" + required: true + default: "opencv-4.12.0.88" + +jobs: + build-wheel: + strategy: + fail-fast: false + matrix: + include: + - arch: amd64 + runs-on: ubuntu-latest-8-cores + docker-platform: linux/amd64 + - arch: arm64 + runs-on: ubuntu-latest-arm-8-cores + docker-platform: linux/arm64 + runs-on: ${{ matrix.runs-on }} + env: + OPENCV_VERSION: ${{ inputs.opencv_version }} + DOCKER_PLATFORM: ${{ matrix.docker-platform }} + steps: + - name: Build opencv-python-headless from source + run: | + mkdir -p wheels + docker run --rm \ + --platform="$DOCKER_PLATFORM" \ + -e "OPENCV_VERSION=$OPENCV_VERSION" \ + -v "$PWD/wheels:/out" \ + cgr.dev/chainguard/wolfi-base:latest sh -c ' + set -euo pipefail + apk update + apk add python-3.12 python-3.12-dev python-3.12-base-dev \ + opencv-dev cmake gcc glibc-dev libstdc++-dev make pkgconf \ + py3.12-pip py3.12-numpy + + CMAKE_ARGS="-DWITH_FFMPEG=OFF" \ + ENABLE_HEADLESS=1 \ + python3.12 -m pip wheel \ + --no-binary opencv-python-headless \ + --no-deps \ + "opencv-python-headless==${OPENCV_VERSION}" \ + -w /out + + echo "=== Validate no bundled ffmpeg ===" + python3.12 -m pip install /out/opencv_python_headless-*.whl + python3.12 -c " + import cv2, pathlib + d = pathlib.Path(cv2.__file__).parent + libs = d / \".libs\" + assert not libs.exists(), f\"Unexpected .libs dir: {list(libs.iterdir())}\" + print(f\"OK: cv2 {cv2.__version__}, no bundled ffmpeg\") + " + ' + ls -lh wheels/ + + - name: Upload wheel artifact + uses: actions/upload-artifact@v4 + with: + name: opencv-wheel-${{ matrix.arch }} + path: wheels/opencv_python_headless-*.whl + retention-days: 90 + + create-release: + needs: build-wheel + runs-on: ubuntu-latest + permissions: + contents: write + env: + OPENCV_VERSION: ${{ inputs.opencv_version }} + RELEASE_TAG: ${{ inputs.release_tag }} + steps: + - name: Download all wheel artifacts + uses: actions/download-artifact@v4 + with: + path: wheels + merge-multiple: true + + - name: List wheels + run: ls -lh wheels/ + + - name: Create GitHub Release + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release create "$RELEASE_TAG" \ + --repo "$GITHUB_REPOSITORY" \ + --title "OpenCV Wheels ${OPENCV_VERSION} (no ffmpeg)" \ + --notes "$(cat < Date: Mon, 13 Apr 2026 09:19:57 -0500 Subject: [PATCH 2/6] ci: skip PyPI release workflow for opencv-* release tags The build-opencv-wheels workflow creates GitHub releases tagged `opencv-` to publish prebuilt wheels for Docker consumption. Those releases would otherwise trigger release.yml and fail the package-version validation, producing a spurious PyPI publish failure for every OpenCV wheel build. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/release.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6b72a202eb..4273fc0cc6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,6 +18,10 @@ env: jobs: release: + # Skip opencv wheel releases (tagged 'opencv-*') produced by the + # build-opencv-wheels workflow - those aren't package releases and would + # otherwise trigger a spurious PyPI publish failure. + if: ${{ !startsWith(github.event.release.tag_name, 'opencv-') }} runs-on: ubuntu-latest steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 From 2d16d9469412f0d423aaa6dca740a011218ac12f Mon Sep 17 00:00:00 2001 From: Lawrence Elitzer Date: Mon, 13 Apr 2026 14:37:19 -0500 Subject: [PATCH 3/6] feat(ci): build opencv-contrib-python-headless instead of opencv-python-headless The PyPI opencv-python and opencv-contrib-python packages, which are pulled in transitively via unstructured-paddleocr (and unstructured- inference on py<3.12), ship the same bundled ffmpeg CVEs that the workflow was originally built to eliminate. Building only the plain opencv-python-headless variant left ~2/3 of the CVE surface untouched. Switch the build to opencv-contrib-python-headless, which is a strict superset of the other three Python-level APIs (core + contrib modules, no GUI / X11). A single wheel can then be used to replace all four opencv-* package names in downstream Dockerfiles, eliminating every bundled ffmpeg CVE. Validated locally on arm64 against wolfi-base: - wheel is 22MB (vs 60-76MB PyPI wheels), linked against glibc 2.43 - cv2.getBuildInformation() reports GUI: NONE, no FFMPEG section, GStreamer: NO, only cv2.abi3.so on disk (no .libs/) - all contrib modules present (ximgproc, aruco, xfeatures2d, text, bgsegm, dnn_superres) - full PaddleOCR smoke test passes end-to-end (detection + recognition + angle classification on a real document image), with the wheel substituted for opencv-python, opencv-python-headless, and opencv-contrib-python Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-opencv-wheels.yml | 37 +++++++++++++++-------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build-opencv-wheels.yml b/.github/workflows/build-opencv-wheels.yml index 8645f9228a..efbc0aa75e 100644 --- a/.github/workflows/build-opencv-wheels.yml +++ b/.github/workflows/build-opencv-wheels.yml @@ -1,7 +1,11 @@ name: Build OpenCV Wheels (No FFmpeg) -# Produces opencv-python-headless wheels compiled from source with +# Produces opencv-contrib-python-headless wheels compiled from source with # WITH_FFMPEG=OFF to eliminate bundled ffmpeg CVEs from PyPI wheels. +# The contrib-headless variant is a strict superset of opencv-python, +# opencv-python-headless, and opencv-contrib-python: same cv2 module with +# core + contrib modules and no GUI / no X11. One wheel can therefore +# satisfy all four opencv-* package names in downstream Dockerfiles. # Wheels are uploaded as a GitHub release so both unstructured and # unstructured-api Dockerfiles can download them at build time. @@ -9,7 +13,7 @@ on: workflow_dispatch: inputs: opencv_version: - description: "opencv-python-headless version to build (must have sdist on PyPI)" + description: "opencv-contrib-python-headless version to build (must have sdist on PyPI)" required: true default: "4.12.0.88" release_tag: @@ -34,7 +38,7 @@ jobs: OPENCV_VERSION: ${{ inputs.opencv_version }} DOCKER_PLATFORM: ${{ matrix.docker-platform }} steps: - - name: Build opencv-python-headless from source + - name: Build opencv-contrib-python-headless from source run: | mkdir -p wheels docker run --rm \ @@ -49,21 +53,25 @@ jobs: py3.12-pip py3.12-numpy CMAKE_ARGS="-DWITH_FFMPEG=OFF" \ + ENABLE_CONTRIB=1 \ ENABLE_HEADLESS=1 \ python3.12 -m pip wheel \ - --no-binary opencv-python-headless \ + --no-binary opencv-contrib-python-headless \ --no-deps \ - "opencv-python-headless==${OPENCV_VERSION}" \ + "opencv-contrib-python-headless==${OPENCV_VERSION}" \ -w /out - echo "=== Validate no bundled ffmpeg ===" - python3.12 -m pip install /out/opencv_python_headless-*.whl + echo "=== Validate no bundled ffmpeg and contrib modules present ===" + python3.12 -m pip install /out/opencv_contrib_python_headless-*.whl python3.12 -c " import cv2, pathlib d = pathlib.Path(cv2.__file__).parent libs = d / \".libs\" assert not libs.exists(), f\"Unexpected .libs dir: {list(libs.iterdir())}\" - print(f\"OK: cv2 {cv2.__version__}, no bundled ffmpeg\") + # Sanity check that contrib modules made it into the build + assert hasattr(cv2, \"ximgproc\"), \"contrib module cv2.ximgproc missing\" + assert hasattr(cv2, \"aruco\"), \"contrib module cv2.aruco missing\" + print(f\"OK: cv2 {cv2.__version__}, contrib modules present, no bundled ffmpeg\") " ' ls -lh wheels/ @@ -72,7 +80,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: opencv-wheel-${{ matrix.arch }} - path: wheels/opencv_python_headless-*.whl + path: wheels/opencv_contrib_python_headless-*.whl retention-days: 90 create-release: @@ -101,13 +109,18 @@ jobs: --repo "$GITHUB_REPOSITORY" \ --title "OpenCV Wheels ${OPENCV_VERSION} (no ffmpeg)" \ --notes "$(cat < Date: Mon, 13 Apr 2026 15:13:51 -0500 Subject: [PATCH 4/6] ci: pass --latest=false when creating opencv wheel release Without this flag gh defaults to marking the release as "Latest", which would cause these auxiliary wheel releases to displace the actual unstructured package release on the repo's Releases page and confuse downstream tools that key off the "latest" release endpoint. Addresses cursor bugbot feedback on PR #4335. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-opencv-wheels.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-opencv-wheels.yml b/.github/workflows/build-opencv-wheels.yml index efbc0aa75e..6cba85e7a3 100644 --- a/.github/workflows/build-opencv-wheels.yml +++ b/.github/workflows/build-opencv-wheels.yml @@ -105,9 +105,12 @@ jobs: env: GH_TOKEN: ${{ github.token }} run: | + # --latest=false keeps this auxiliary wheel release from displacing + # the actual package release on the repo's Releases page. gh release create "$RELEASE_TAG" \ --repo "$GITHUB_REPOSITORY" \ --title "OpenCV Wheels ${OPENCV_VERSION} (no ffmpeg)" \ + --latest=false \ --notes "$(cat < Date: Mon, 13 Apr 2026 15:57:21 -0500 Subject: [PATCH 5/6] ci: install docker on arm64 runners ubuntu-latest-arm-X-cores doesn't ship docker preinstalled. Install via the official get.docker.com script and chmod the socket so the runner user can use docker without a session re-login. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-opencv-wheels.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/build-opencv-wheels.yml b/.github/workflows/build-opencv-wheels.yml index 6cba85e7a3..d2895d9d98 100644 --- a/.github/workflows/build-opencv-wheels.yml +++ b/.github/workflows/build-opencv-wheels.yml @@ -38,6 +38,19 @@ jobs: OPENCV_VERSION: ${{ inputs.opencv_version }} DOCKER_PLATFORM: ${{ matrix.docker-platform }} steps: + # Docker is preinstalled on amd64 ubuntu-latest runners but not on + # the arm64 ubuntu-latest-arm-X-cores image. Install it on demand + # and make the socket accessible to the runner user without needing + # to re-login for the docker group to take effect. + - name: Install Docker (arm64 runner) + if: matrix.arch == 'arm64' + run: | + set -euo pipefail + curl -fsSL https://get.docker.com | sudo sh + sudo systemctl start docker + sudo chmod 666 /var/run/docker.sock + docker version --format '{{.Server.Version}}' + - name: Build opencv-contrib-python-headless from source run: | mkdir -p wheels From f03df11f0e35e226737d9146f3ac2145999fcdbe Mon Sep 17 00:00:00 2001 From: Lawrence Elitzer Date: Mon, 13 Apr 2026 16:14:03 -0500 Subject: [PATCH 6/6] ci: retry apk install on transient chainguard mirror errors The chainguard apk mirror occasionally returns mid-install errors when fetching packages (e.g. py3.12-numpy). Wrap apk update/add in a 3- attempt retry loop, mirroring the pattern already used in unstructured/Dockerfile. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-opencv-wheels.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-opencv-wheels.yml b/.github/workflows/build-opencv-wheels.yml index d2895d9d98..13201fcb69 100644 --- a/.github/workflows/build-opencv-wheels.yml +++ b/.github/workflows/build-opencv-wheels.yml @@ -60,10 +60,19 @@ jobs: -v "$PWD/wheels:/out" \ cgr.dev/chainguard/wolfi-base:latest sh -c ' set -euo pipefail - apk update - apk add python-3.12 python-3.12-dev python-3.12-base-dev \ - opencv-dev cmake gcc glibc-dev libstdc++-dev make pkgconf \ - py3.12-pip py3.12-numpy + # Retry apk install: the chainguard mirror occasionally returns + # transient errors mid-install (matches the pattern in unstructured/Dockerfile). + apk_ok=false + for attempt in 1 2 3; do + apk update && \ + apk add python-3.12 python-3.12-dev python-3.12-base-dev \ + opencv-dev cmake gcc glibc-dev libstdc++-dev make pkgconf \ + py3.12-pip py3.12-numpy && \ + apk_ok=true && break + echo "apk install failed (attempt $attempt/3), retrying in 5s..." + sleep 5 + done + $apk_ok || { echo "apk install failed after 3 attempts"; exit 1; } CMAKE_ARGS="-DWITH_FFMPEG=OFF" \ ENABLE_CONTRIB=1 \