Skip to content

Add in some testing matrix updates (simple version) #2

Add in some testing matrix updates (simple version)

Add in some testing matrix updates (simple version) #2

name: "CI: Test wheels"
on:
workflow_call:
inputs:
build-type:
type: string
required: true
host-platform:
type: string
required: true
build-ctk-ver:
type: string
required: true
local-ctk:
type: string
required: true
jobs:
compute-matrix:
runs-on: ubuntu-latest
env:
BUILD_TYPE: ${{ inputs.build_type }}
ARCH: ${{ (inputs.host-platform == 'linux-64' && 'amd64') ||
(inputs.host-platform == 'linux-aarch64' && 'arm64) }}
outputs:
MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }}
steps:
- name: Validate Test Type
run: |
if [[ "$BUILD_TYPE" != "pull-request" ]] && [[ "$BUILD_TYPE" != "nightly" ]] && [[ "$BUILD_TYPE" != "branch" ]]; then
echo "Invalid build type! Must be one of 'nightly', 'pull-request', or 'branch'."
exit 1
fi
- name: Compute Python Test Matrix
id: compute-matrix
run: |
set -eo pipefail
# Please keep the matrices sorted in ascending order by the following:
#
# [PY_VER, CUDA_VER, LINUX_VER, GPU, DRIVER]
#
gpu="l4"
if [[ "${ARCH}" == "arm64" ]]; then
gpu="a100"
fi
export MATRICES="
pull-request:
- { ARCH=${ARCH}, PY_VER: '3.9', CUDA_VER: '11.8.0', LINUX_VER: 'rockylinux8', GPU: ${gpu}, DRIVER: 'earliest' }
- { ARCH=${ARCH}, PY_VER: '3.9', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu24.04', GPU: ${gpu}, DRIVER: 'latest' }
- { ARCH=${ARCH}, PY_VER: '3.13', CUDA_VER: '12.8.0', LINUX_VER: 'ubuntu22.04', GPU: ${gpu}, DRIVER: 'latest' }
nightly:
- { ARCH=${ARCH}, PY_VER: '3.9', CUDA_VER: '11.8.0', LINUX_VER: 'rockylinux8', GPU: ${gpu}, DRIVER: 'earliest' }
- { ARCH=${ARCH}, PY_VER: '3.13', CUDA_VER: '12.8.0', LINUX_VER: 'ubuntu22.04', GPU: ${gpu}, DRIVER: 'latest' }
"
# Use the nightly matrix for branch tests
MATRIX_TYPE="${BUILD_TYPE}"
if [[ "${MATRIX_TYPE}" == "branch" ]]; then
MATRIX_TYPE="nightly"
fi
export MATRIX_TYPE
TEST_MATRIX=$(yq -n 'env(MATRICES) | .[strenv(MATRIX_TYPE)]')
export TEST_MATRIX
MATRIX="$(
yq -n -o json 'env(TEST_MATRIX)' | \
jq -c '${{ inputs.matrix_filter }} | if (. | length) > 0 then {include: .} else "Error: Empty matrix\n" | halt_error(1) end'
)"
echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}"
test:
needs: compute-matrix
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
runs-on: "linux-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1"
# The build stage could fail but we want the CI to keep moving.
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
# Our self-hosted runners require a container
# TODO: use a different (nvidia?) container
container:
options: -u root --security-opt seccomp=unconfined --shm-size 16g
image: ubuntu:22.04
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
defaults:
run:
shell: bash --noprofile --norc -xeuo pipefail {0}
steps:
- name: Ensure GPU is working
run: nvidia-smi
- name: Checkout ${{ github.event.repository.name }}
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set environment variables
run: |
PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.PY_VER }}' | tr -d '.')
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
REPO_DIR=$(pwd)
elif [[ "${{ inputs.host-platform }}" == win* ]]; then
PWD=$(pwd)
REPO_DIR=$(cygpath -w $PWD)
fi
BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.build-ctk-ver }})"
TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.CUDA_VER }})"
if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then
SKIP_CUDA_BINDINGS_TEST=1
SKIP_CUDA_CORE_CYTHON_TEST=0
else
SKIP_CUDA_BINDINGS_TEST=0
BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ inputs.build-ctk-ver }})"
TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ matrix.CUDA_VER }})"
if [[ $BUILD_CUDA_MINOR != $TEST_CUDA_MINOR ]]; then
SKIP_CUDA_CORE_CYTHON_TEST=1
else
SKIP_CUDA_CORE_CYTHON_TEST=0
fi
fi
# make outputs from the previous job as env vars
CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}"
echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $GITHUB_ENV
echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}"
echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV
echo "SKIP_CUDA_CORE_CYTHON_TEST=${SKIP_CUDA_CORE_CYTHON_TEST}" >> $GITHUB_ENV
- name: Install dependencies
uses: ./.github/actions/install_unix_deps
continue-on-error: false
with:
# gcc for Cython tests, jq/wget for artifact fetching
dependencies: "build-essential jq wget"
dependent_exes: "gcc jq wget"
- name: Download cuda-python build artifacts
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}}
uses: actions/download-artifact@v4
with:
name: cuda-python-wheel
path: .
- name: Download cuda.bindings build artifacts
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}}
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
- name: Download cuda-python & cuda.bindings build artifacts from the prior branch
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '1'}}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# See https://github.com/cli/cli/blob/trunk/docs/install_linux.md#debian-ubuntu-linux-raspberry-pi-os-apt.
# gh is needed for artifact fetching.
mkdir -p -m 755 /etc/apt/keyrings \
&& out=$(mktemp) && wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg \
&& cat $out | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
&& chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
&& apt update \
&& apt install gh -y
OLD_BRANCH=$(cat .github/BACKPORT_BRANCH)
OLD_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*"
LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId')
if [[ "$LATEST_PRIOR_RUN_ID" == "" ]]; then
echo "LATEST_PRIOR_RUN_ID not found!"
exit 1
fi
gh run download $LATEST_PRIOR_RUN_ID -p ${OLD_BASENAME} -R NVIDIA/cuda-python
ls -al $OLD_BASENAME
mkdir -p "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"
mv $OLD_BASENAME/*.whl "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"/
rmdir $OLD_BASENAME
gh run download $LATEST_PRIOR_RUN_ID -p cuda-python-wheel -R NVIDIA/cuda-python
ls -al cuda-python-wheel
mv cuda-python-wheel/*.whl .
rmdir cuda-python-wheel
- name: Display structure of downloaded cuda-python artifacts
run: |
pwd
ls -lahR .
- name: Display structure of downloaded cuda.bindings artifacts
run: |
pwd
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
- name: Download cuda.core build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
- name: Display structure of downloaded cuda.core build artifacts
run: |
pwd
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
- name: Set up Python ${{ matrix.PY_VER }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.PY_VER }}
env:
# we use self-hosted runners on which setup-python behaves weirdly...
AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache"
- name: Set up mini CTK
if: ${{ inputs.local-ctk == '1' }}
uses: ./.github/actions/fetch_ctk
continue-on-error: false
with:
host-platform: ${{ inputs.host-platform }}
cuda-version: ${{ matrix.CUDA_VER }}
- name: Run cuda.bindings tests
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
run: |
pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}"
if [[ "${{ inputs.local-ctk }}" == 1 ]]; then
ls $CUDA_PATH
pip install *.whl
else
pip install $(ls *.whl)[all]
fi
popd
pushd ./cuda_bindings
pip install -r requirements.txt
pytest -rxXs -v tests/
# It is a bit convoluted to run the Cython tests against CTK wheels,
# so let's just skip them.
if [[ "${{ inputs.local-ctk }}" == 1 ]]; then
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
bash tests/cython/build_tests.sh
elif [[ "${{ inputs.host-platform }}" == win* ]]; then
# TODO: enable this once win-64 runners are up
exit 1
fi
pytest -rxXs -v tests/cython
fi
popd
- name: Run cuda.core tests
run: |
# If build/test majors match: cuda.bindings is installed in the previous step.
# If mismatch: cuda.bindings is installed from the backport branch.
if [[ "${SKIP_CUDA_BINDINGS_TEST}" == 1 ]]; then
pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}"
if [[ "${{ inputs.local-ctk }}" == 1 ]]; then
pip install *.whl
else
pip install $(ls *.whl)[all]
fi
popd
fi
TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.CUDA_VER }})"
pushd "${CUDA_CORE_ARTIFACTS_DIR}"
pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"]
popd
pushd ./cuda_core
pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt"
pytest -rxXs -v tests/
# It is a bit convoluted to run the Cython tests against CTK wheels,
# so let's just skip them. Also, currently our CI always installs the
# latest bindings (from either major version). This is not compatible
# with the test requirements.
if [[ "${{ inputs.local-ctk }}" == 1 && "${SKIP_CUDA_CORE_CYTHON_TEST}" == 0 ]]; then
pip install cython setuptools # setuptools needed starting PY312
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
bash tests/cython/build_tests.sh
elif [[ "${{ inputs.host-platform }}" == win* ]]; then
# TODO: enable this once win-64 runners are up
exit 1
fi
pytest -rxXs -v tests/cython
fi
popd
- name: Ensure cuda-python installable
run: |
if [[ "${{ inputs.local-ctk }}" == 1 ]]; then
pip install cuda_python*.whl
else
pip install $(ls cuda_python*.whl)[all]
fi