Add in some testing matrix updates (simple version) #2
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: "CI: Test wheels" | |
| on: | |
| workflow_call: | |
| inputs: | |
| build-type: | |
| type: string | |
| required: true | |
| host-platform: | |
| type: string | |
| required: true | |
| build-ctk-ver: | |
| type: string | |
| required: true | |
| local-ctk: | |
| type: string | |
| required: true | |
| jobs: | |
| compute-matrix: | |
| runs-on: ubuntu-latest | |
| env: | |
| BUILD_TYPE: ${{ inputs.build_type }} | |
| ARCH: ${{ (inputs.host-platform == 'linux-64' && 'amd64') || | |
| (inputs.host-platform == 'linux-aarch64' && 'arm64) }} | |
| outputs: | |
| MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }} | |
| steps: | |
| - name: Validate Test Type | |
| run: | | |
| if [[ "$BUILD_TYPE" != "pull-request" ]] && [[ "$BUILD_TYPE" != "nightly" ]] && [[ "$BUILD_TYPE" != "branch" ]]; then | |
| echo "Invalid build type! Must be one of 'nightly', 'pull-request', or 'branch'." | |
| exit 1 | |
| fi | |
| - name: Compute Python Test Matrix | |
| id: compute-matrix | |
| run: | | |
| set -eo pipefail | |
| # Please keep the matrices sorted in ascending order by the following: | |
| # | |
| # [PY_VER, CUDA_VER, LINUX_VER, GPU, DRIVER] | |
| # | |
| gpu="l4" | |
| if [[ "${ARCH}" == "arm64" ]]; then | |
| gpu="a100" | |
| fi | |
| export MATRICES=" | |
| pull-request: | |
| - { ARCH=${ARCH}, PY_VER: '3.9', CUDA_VER: '11.8.0', LINUX_VER: 'rockylinux8', GPU: ${gpu}, DRIVER: 'earliest' } | |
| - { ARCH=${ARCH}, PY_VER: '3.9', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu24.04', GPU: ${gpu}, DRIVER: 'latest' } | |
| - { ARCH=${ARCH}, PY_VER: '3.13', CUDA_VER: '12.8.0', LINUX_VER: 'ubuntu22.04', GPU: ${gpu}, DRIVER: 'latest' } | |
| nightly: | |
| - { ARCH=${ARCH}, PY_VER: '3.9', CUDA_VER: '11.8.0', LINUX_VER: 'rockylinux8', GPU: ${gpu}, DRIVER: 'earliest' } | |
| - { ARCH=${ARCH}, PY_VER: '3.13', CUDA_VER: '12.8.0', LINUX_VER: 'ubuntu22.04', GPU: ${gpu}, DRIVER: 'latest' } | |
| " | |
| # Use the nightly matrix for branch tests | |
| MATRIX_TYPE="${BUILD_TYPE}" | |
| if [[ "${MATRIX_TYPE}" == "branch" ]]; then | |
| MATRIX_TYPE="nightly" | |
| fi | |
| export MATRIX_TYPE | |
| TEST_MATRIX=$(yq -n 'env(MATRICES) | .[strenv(MATRIX_TYPE)]') | |
| export TEST_MATRIX | |
| MATRIX="$( | |
| yq -n -o json 'env(TEST_MATRIX)' | \ | |
| jq -c '${{ inputs.matrix_filter }} | if (. | length) > 0 then {include: .} else "Error: Empty matrix\n" | halt_error(1) end' | |
| )" | |
| echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" | |
| test: | |
| needs: compute-matrix | |
| strategy: | |
| fail-fast: false | |
| matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }} | |
| runs-on: "linux-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1" | |
| # The build stage could fail but we want the CI to keep moving. | |
| if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} | |
| # Our self-hosted runners require a container | |
| # TODO: use a different (nvidia?) container | |
| container: | |
| options: -u root --security-opt seccomp=unconfined --shm-size 16g | |
| image: ubuntu:22.04 | |
| env: | |
| NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} | |
| defaults: | |
| run: | |
| shell: bash --noprofile --norc -xeuo pipefail {0} | |
| steps: | |
| - name: Ensure GPU is working | |
| run: nvidia-smi | |
| - name: Checkout ${{ github.event.repository.name }} | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set environment variables | |
| run: | | |
| PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.PY_VER }}' | tr -d '.') | |
| if [[ "${{ inputs.host-platform }}" == linux* ]]; then | |
| REPO_DIR=$(pwd) | |
| elif [[ "${{ inputs.host-platform }}" == win* ]]; then | |
| PWD=$(pwd) | |
| REPO_DIR=$(cygpath -w $PWD) | |
| fi | |
| BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.build-ctk-ver }})" | |
| TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.CUDA_VER }})" | |
| if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then | |
| SKIP_CUDA_BINDINGS_TEST=1 | |
| SKIP_CUDA_CORE_CYTHON_TEST=0 | |
| else | |
| SKIP_CUDA_BINDINGS_TEST=0 | |
| BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ inputs.build-ctk-ver }})" | |
| TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ matrix.CUDA_VER }})" | |
| if [[ $BUILD_CUDA_MINOR != $TEST_CUDA_MINOR ]]; then | |
| SKIP_CUDA_CORE_CYTHON_TEST=1 | |
| else | |
| SKIP_CUDA_CORE_CYTHON_TEST=0 | |
| fi | |
| fi | |
| # make outputs from the previous job as env vars | |
| CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" | |
| echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $GITHUB_ENV | |
| echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV | |
| echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV | |
| echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV | |
| CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}" | |
| echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV | |
| echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV | |
| echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV | |
| echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV | |
| echo "SKIP_CUDA_CORE_CYTHON_TEST=${SKIP_CUDA_CORE_CYTHON_TEST}" >> $GITHUB_ENV | |
| - name: Install dependencies | |
| uses: ./.github/actions/install_unix_deps | |
| continue-on-error: false | |
| with: | |
| # gcc for Cython tests, jq/wget for artifact fetching | |
| dependencies: "build-essential jq wget" | |
| dependent_exes: "gcc jq wget" | |
| - name: Download cuda-python build artifacts | |
| if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: cuda-python-wheel | |
| path: . | |
| - name: Download cuda.bindings build artifacts | |
| if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} | |
| path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} | |
| - name: Download cuda-python & cuda.bindings build artifacts from the prior branch | |
| if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '1'}} | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| # See https://github.com/cli/cli/blob/trunk/docs/install_linux.md#debian-ubuntu-linux-raspberry-pi-os-apt. | |
| # gh is needed for artifact fetching. | |
| mkdir -p -m 755 /etc/apt/keyrings \ | |
| && out=$(mktemp) && wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg \ | |
| && cat $out | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ | |
| && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ | |
| && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ | |
| && apt update \ | |
| && apt install gh -y | |
| OLD_BRANCH=$(cat .github/BACKPORT_BRANCH) | |
| OLD_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*" | |
| LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId') | |
| if [[ "$LATEST_PRIOR_RUN_ID" == "" ]]; then | |
| echo "LATEST_PRIOR_RUN_ID not found!" | |
| exit 1 | |
| fi | |
| gh run download $LATEST_PRIOR_RUN_ID -p ${OLD_BASENAME} -R NVIDIA/cuda-python | |
| ls -al $OLD_BASENAME | |
| mkdir -p "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" | |
| mv $OLD_BASENAME/*.whl "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"/ | |
| rmdir $OLD_BASENAME | |
| gh run download $LATEST_PRIOR_RUN_ID -p cuda-python-wheel -R NVIDIA/cuda-python | |
| ls -al cuda-python-wheel | |
| mv cuda-python-wheel/*.whl . | |
| rmdir cuda-python-wheel | |
| - name: Display structure of downloaded cuda-python artifacts | |
| run: | | |
| pwd | |
| ls -lahR . | |
| - name: Display structure of downloaded cuda.bindings artifacts | |
| run: | | |
| pwd | |
| ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR | |
| - name: Download cuda.core build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} | |
| path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} | |
| - name: Display structure of downloaded cuda.core build artifacts | |
| run: | | |
| pwd | |
| ls -lahR $CUDA_CORE_ARTIFACTS_DIR | |
| - name: Set up Python ${{ matrix.PY_VER }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.PY_VER }} | |
| env: | |
| # we use self-hosted runners on which setup-python behaves weirdly... | |
| AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" | |
| - name: Set up mini CTK | |
| if: ${{ inputs.local-ctk == '1' }} | |
| uses: ./.github/actions/fetch_ctk | |
| continue-on-error: false | |
| with: | |
| host-platform: ${{ inputs.host-platform }} | |
| cuda-version: ${{ matrix.CUDA_VER }} | |
| - name: Run cuda.bindings tests | |
| if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} | |
| run: | | |
| pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" | |
| if [[ "${{ inputs.local-ctk }}" == 1 ]]; then | |
| ls $CUDA_PATH | |
| pip install *.whl | |
| else | |
| pip install $(ls *.whl)[all] | |
| fi | |
| popd | |
| pushd ./cuda_bindings | |
| pip install -r requirements.txt | |
| pytest -rxXs -v tests/ | |
| # It is a bit convoluted to run the Cython tests against CTK wheels, | |
| # so let's just skip them. | |
| if [[ "${{ inputs.local-ctk }}" == 1 ]]; then | |
| if [[ "${{ inputs.host-platform }}" == linux* ]]; then | |
| bash tests/cython/build_tests.sh | |
| elif [[ "${{ inputs.host-platform }}" == win* ]]; then | |
| # TODO: enable this once win-64 runners are up | |
| exit 1 | |
| fi | |
| pytest -rxXs -v tests/cython | |
| fi | |
| popd | |
| - name: Run cuda.core tests | |
| run: | | |
| # If build/test majors match: cuda.bindings is installed in the previous step. | |
| # If mismatch: cuda.bindings is installed from the backport branch. | |
| if [[ "${SKIP_CUDA_BINDINGS_TEST}" == 1 ]]; then | |
| pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" | |
| if [[ "${{ inputs.local-ctk }}" == 1 ]]; then | |
| pip install *.whl | |
| else | |
| pip install $(ls *.whl)[all] | |
| fi | |
| popd | |
| fi | |
| TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.CUDA_VER }})" | |
| pushd "${CUDA_CORE_ARTIFACTS_DIR}" | |
| pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] | |
| popd | |
| pushd ./cuda_core | |
| pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" | |
| pytest -rxXs -v tests/ | |
| # It is a bit convoluted to run the Cython tests against CTK wheels, | |
| # so let's just skip them. Also, currently our CI always installs the | |
| # latest bindings (from either major version). This is not compatible | |
| # with the test requirements. | |
| if [[ "${{ inputs.local-ctk }}" == 1 && "${SKIP_CUDA_CORE_CYTHON_TEST}" == 0 ]]; then | |
| pip install cython setuptools # setuptools needed starting PY312 | |
| if [[ "${{ inputs.host-platform }}" == linux* ]]; then | |
| bash tests/cython/build_tests.sh | |
| elif [[ "${{ inputs.host-platform }}" == win* ]]; then | |
| # TODO: enable this once win-64 runners are up | |
| exit 1 | |
| fi | |
| pytest -rxXs -v tests/cython | |
| fi | |
| popd | |
| - name: Ensure cuda-python installable | |
| run: | | |
| if [[ "${{ inputs.local-ctk }}" == 1 ]]; then | |
| pip install cuda_python*.whl | |
| else | |
| pip install $(ls cuda_python*.whl)[all] | |
| fi |