shared-workflows/.github/workflows/conda-python-build.yaml at 5387999734b4457677baba4e9683d33c6f33feb1 · rapidsai/shared-workflows · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
on:
  workflow_call:
    inputs:
      build_type:
        description: "One of: [branch, nightly, pull-request]"
        required: true
        type: string
      branch:
        description: |
          Git branch the workflow run targets.
          This is required even when 'sha' is provided because it is also used for organizing artifacts.
        type: string
      date:
        description: "Date (YYYY-MM-DD) this run is for. Used to organize artifacts produced by nightly builds"
        type: string
      sha:
        description: "Full git commit SHA to check out"
        type: string
      repo:
        description: "Git repo to check out, in '{org}/{repo}' form, e.g. 'rapidsai/cudf'"
        type: string
      node_type:
        description: |
          Suffix, without leading '-', indicating the type of machine to run jobs on (e.g., 'cpu4' or 'gpu-l4-latest-1').
          Runner labels are of the form '{operating_system}-{arch}-{node_type}'.
          See https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md for a list
          of valid values.
        type: string
        default: "cpu8"
      script:
        type: string
        required: true
        description: "Shell code to be executed in a step. Ideally this should just invoke a script managed in the repo the workflow runs from, like 'ci/build_python.sh'."
      upload-artifacts:
        type: boolean
        default: true
        required: false
        description: "One of [true, false], true if artifacts should be uploaded to GitHub's artifact store"
      matrix_filter:
        description: |
          jq expression which modifies the matrix.
          For example, 'map(select(.ARCH == "amd64"))' to achieve "only run amd64 jobs".
        type: string
        default: "."
      sccache-dist-request-timeout:
        type: string
        default: 7140
        description: |
          The maximum time (in seconds) the sccache client should wait for a distributed compilation to complete.
      sccache-dist-token-secret-name:
        type: string
        required: false
        description: |
          The name of the secret that contains the token used to authenticate with the RAPIDS Build Engineering sccache-dist build cluster.
      alternative-gh-token-secret-name:
        type: string
        required: false
        description: |
          If provided, should contain the name of a secret in the repo which holds a GitHub API token.
          When this is non-empty, that secret's value is used in place of the default repo-level token
          anywhere that environment variable GH_TOKEN is set. This is especially useful for downloading
          artifacts from other private repos, which repo tokens do not have access to.
      pure-conda:
        required: false
        type: string
        default: "false"
        description: |
          "One of [true, false, cuda_major]. 'true' if the conda package is not
          dependent on operating system, Python minor version, CPU architecture,
          or CUDA version. 'cuda_major' if the package is not dependent on
          operating system, Python minor version, or CPU architecture, but is
          dependent on CUDA major version."

defaults:
  run:
    shell: bash

permissions:
  actions: read
  checks: none
  contents: read
  deployments: none
  discussions: none
  id-token: write
  issues: none
  packages: read
  pages: none
  pull-requests: read
  repository-projects: none
  security-events: none
  statuses: none

jobs:
  compute-matrix:
    runs-on: ubuntu-latest
    outputs:
      MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }}
    steps:
      - name: Compute Build Matrix
        id: compute-matrix
        env:
          MATRIX_FILTER: ${{ inputs.matrix_filter }}
          PURE_CONDA: ${{ inputs.pure-conda }}
        run: |
          set -eo pipefail

          # please keep the matrices sorted in ascending order by the following:
          #
          #     [ARCH, PY_VER, CUDA_VER, LINUX_VER]
          #
          export MATRIX="
          # amd64
          - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LINUX_VER: 'rockylinux8' }
          # arm64
          - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '12.9.1', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.1.0', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.1.0', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.1.0', LINUX_VER: 'rockylinux8' }
          - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.1.0', LINUX_VER: 'rockylinux8' }
          "


          # When pure-conda is true and matrix_filter is default, override to build one conda package with amd64, latest CUDA_VER, and the latest PY_VER
          if [ "${PURE_CONDA}" = "true" ] && [ "${MATRIX_FILTER}" = "." ]; then
            MATRIX_FILTER="map(select(.ARCH == \"amd64\")) | sort_by(.CUDA_VER, .PY_VER) | [last]"
          elif [ "${PURE_CONDA}" = "cuda_major" ] && [ "${MATRIX_FILTER}" = "." ]; then
            MATRIX_FILTER="map(select(.ARCH == \"amd64\")) | group_by(.CUDA_VER|split(\".\")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(\".\")|map(tonumber)), (.CUDA_VER|split(\".\")|map(tonumber))]))"
          fi

          MATRIX="$(
            yq -n -o json 'env(MATRIX)' | \
            jq -c "${MATRIX_FILTER} | if (. | length) > 0 then {include: .} else \"Error: Empty matrix\n\" | halt_error(1) end"
          )"

          echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}"
  build:
    name:  ${{ matrix.CUDA_VER }}, ${{ matrix.PY_VER }}, ${{ matrix.ARCH }}, ${{ matrix.LINUX_VER }}
    needs: compute-matrix
    strategy:
      fail-fast: false
      matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
    runs-on: "linux-${{ matrix.ARCH }}-${{ inputs.node_type }}"
    env:
      RAPIDS_ARTIFACTS_DIR: ${{ github.workspace }}/artifacts
    container:
      image: rapidsai/ci-conda:26.04-cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}
      env:
        RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
    steps:
      - uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1
        with:
          role-to-assume: ${{ vars.AWS_ROLE_ARN }}
          aws-region: ${{ vars.AWS_REGION }}
          role-duration-seconds: 43200 # 12h
      - uses: actions/checkout@v6
        with:
          repository: ${{ inputs.repo }}
          ref: ${{ inputs.sha }}
          fetch-depth: 0
          persist-credentials: true
      - name: Standardize repository information
        env:
          RAPIDS_REPOSITORY: ${{ inputs.repo || github.repository }}
          RAPIDS_REF_NAME: ${{ inputs.branch || github.ref_name }}
          RAPIDS_NIGHTLY_DATE: ${{ inputs.date }}
        run: |
          {
            echo "RAPIDS_REPOSITORY=${RAPIDS_REPOSITORY}"
            echo "RAPIDS_SHA=$(git rev-parse HEAD)"
            echo "RAPIDS_REF_NAME=${RAPIDS_REF_NAME}"
            echo "RAPIDS_NIGHTLY_DATE=${RAPIDS_NIGHTLY_DATE}"
          } >> "${GITHUB_ENV}"
      - name: Telemetry setup
        uses: rapidsai/shared-actions/telemetry-dispatch-setup@main
        continue-on-error: true
        if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
        env:
          # DOES NOT NEED alternative-gh-token-secret-name - github.token is enough and more limited
          GH_TOKEN: ${{ github.token }}
        with:
          extra_attributes: "rapids.PACKAGER=conda,rapids.CUDA_VER=${{ matrix.CUDA_VER }},rapids.PY_VER=${{ matrix.PY_VER }},rapids.ARCH=${{ matrix.ARCH }},rapids.LINUX_VER=${{ matrix.LINUX_VER }}"
      - name: Setup proxy cache
        uses: nv-gha-runners/setup-proxy-cache@main
      # Install latest rapidsai/sccache client and configure sccache-dist
      - name: Setup sccache-dist
        uses: rapidsai/shared-actions/setup-sccache-dist@main
        if: ${{ inputs.sccache-dist-token-secret-name != '' }}
        env:
          AWS_REGION: "${{env.AWS_REGION}}"
          AWS_ACCESS_KEY_ID: "${{env.AWS_ACCESS_KEY_ID}}"
          AWS_SECRET_ACCESS_KEY: "${{env.AWS_SECRET_ACCESS_KEY}}"
        with:
          auth: "${{ secrets[inputs.sccache-dist-token-secret-name] }}" # zizmor: ignore[overprovisioned-secrets]
          log-file: "${{ env.RAPIDS_ARTIFACTS_DIR }}/sccache.log"
          request-timeout: ${{ inputs.sccache-dist-request-timeout }}
      # Per the docs at https://docs.github.com/en/rest/rate-limit/rate-limit?apiVersion=2022-11-28#get-rate-limit-status-for-the-authenticated-user,
      # checking '/rate_limit | jq .' should not itself count against any rate limits.
      #
      # gh CLI is pre-installed on Github-hosted runners, but may not be on self-hosted runners.
      - name: Check GitHub API rate limits
        run: |
          if ! type gh >/dev/null; then
              echo "'gh' CLI is not installed... skipping rate-limits check"
          else
              gh api /rate_limit | jq .
          fi
        env:
          # NEEDS alternative-gh-token-secret_name - API limits need to be for whatever token is used for upload/download. Repo token may be a different pool for rate limits.
          GH_TOKEN: ${{ inputs.alternative-gh-token-secret-name && secrets[inputs.alternative-gh-token-secret-name] || github.token }} # zizmor: ignore[overprovisioned-secrets]
      - name: Python build
        id: python-build
        run: |
          ulimit -n "$(ulimit -Hn)"
          # shellcheck disable=SC1090
          source "${INPUTS_SCRIPT}"

          # Capture RAPIDS_PACKAGE_NAME if set by the build script
          if [[ -n "${RAPIDS_PACKAGE_NAME:-}" ]]; then
            echo "rapids-package-name=${RAPIDS_PACKAGE_NAME}" >> "${GITHUB_OUTPUT}"
          fi
        env:
          # NEEDS alternative-gh-token-secret-name - may require a token with more permissions
          GH_TOKEN: ${{ inputs.alternative-gh-token-secret-name && secrets[inputs.alternative-gh-token-secret-name] || github.token }} # zizmor: ignore[overprovisioned-secrets]
          INPUTS_SCRIPT: ${{ inputs.script }}
      - name: Get Package Name and Location
        if: ${{ inputs.upload-artifacts }}
        env:
          # Pass RAPIDS_PACKAGE_NAME from python-build step if available
          RAPIDS_PACKAGE_NAME: ${{ steps.python-build.outputs.rapids-package-name }}
        run: |
          # Use RAPIDS_PACKAGE_NAME from build step if available, otherwise generate default
          if [[ -n "${RAPIDS_PACKAGE_NAME:-}" ]]; then
            echo "RAPIDS_PACKAGE_NAME=${RAPIDS_PACKAGE_NAME}" >> "${GITHUB_OUTPUT}"
          else
            echo "RAPIDS_PACKAGE_NAME=$(rapids-package-name conda_python)" >> "${GITHUB_OUTPUT}"
          fi
          echo "CONDA_OUTPUT_DIR=${RAPIDS_CONDA_BLD_OUTPUT_DIR}" >> "${GITHUB_OUTPUT}"
        id: package-name
      - name: Show files to be uploaded
        if: ${{ inputs.upload-artifacts }}
        env:
          CONDA_OUTPUT_DIR: ${{ steps.package-name.outputs.CONDA_OUTPUT_DIR }}
        run: |
          echo "Contents of directory to be uploaded:"
          ls -R "${CONDA_OUTPUT_DIR}"
      - uses: actions/upload-artifact@v6
        if: ${{ inputs.upload-artifacts }}
        with:
          if-no-files-found: 'error'
          name: ${{ steps.package-name.outputs.RAPIDS_PACKAGE_NAME }}
          path: ${{ steps.package-name.outputs.CONDA_OUTPUT_DIR }}
      - name: Upload additional artifacts
        if: "!cancelled()"
        run: rapids-upload-artifacts-dir "cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch)_py${RAPIDS_PY_VERSION//.}"
      - name: Telemetry upload attributes
        uses: rapidsai/shared-actions/telemetry-dispatch-stash-job-artifacts@main
        continue-on-error: true
        if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
        env:
          # DOES NOT NEED alternative-gh-token-secret-name - github.token is enough and more limited
          GH_TOKEN: ${{ github.token }}