Skip to content

Commit caeef80

Browse files
simplify gpu_tests.yml
Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
1 parent 76ba585 commit caeef80

1 file changed

Lines changed: 36 additions & 31 deletions

File tree

.github/workflows/gpu_tests.yml

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -40,56 +40,61 @@ jobs:
4040
include:
4141
- example: gpu
4242
timeout: 75
43-
container_image: pytorch:26.04-py3
43+
container_image: nvcr.io/nvidia/pytorch:26.04-py3
4444
- example: gpu_megatron
4545
timeout: 45
46-
container_image: nemo:26.04
46+
container_image: nvcr.io/nvidia/nemo:26.04
4747
- example: gpu_trtllm
4848
timeout: 30
49-
container_image: tensorrt-llm/release:1.3.0rc16
49+
container_image: nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc16
50+
- example: gpu_vllm
51+
timeout: 30
52+
container_image: docker.io/vllm/vllm-openai:v0.20.0
5053
runs-on: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
5154
timeout-minutes: ${{ matrix.timeout }}
5255
container:
53-
image: nvcr.io/nvidia/${{ matrix.container_image }}
56+
image: ${{ matrix.container_image }}
57+
# nvcr.io images require NGC auth; public docker.io images (e.g. vllm) are pulled
58+
# anonymously (the runner skips docker login when username/password are empty).
5459
credentials:
55-
username: $oauthtoken
56-
password: ${{ secrets.NGC_API_KEY }}
57-
env:
58-
GIT_DEPTH: 1000 # For correct version
59-
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
60-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
61-
steps:
62-
- uses: actions/checkout@v6
63-
- uses: ./.github/actions/gpu-test-run
64-
with:
65-
example: ${{ matrix.example }}
66-
codecov_token: ${{ secrets.CODECOV_TOKEN }}
67-
68-
# Docker Hub image: anonymous pull (no ``credentials:``) and no coverage
69-
gpu-tests-vllm:
70-
needs: [pr-gate]
71-
if: needs.pr-gate.outputs.any_changed == 'true'
72-
runs-on: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
73-
timeout-minutes: 30
74-
container:
75-
image: docker.io/vllm/vllm-openai:v0.20.0
60+
username: ${{ startsWith(matrix.container_image, 'nvcr.io') && '$oauthtoken' || '' }}
61+
password: ${{ startsWith(matrix.container_image, 'nvcr.io') && secrets.NGC_API_KEY || '' }}
7662
env:
7763
GIT_DEPTH: 1000 # For correct version
7864
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
7965
HF_TOKEN: ${{ secrets.HF_TOKEN }}
8066
steps:
67+
- name: Install git
68+
# The vllm container ships without git; needed for a real checkout (correct
69+
# setuptools-scm version) and for the Codecov upload below.
70+
if: matrix.example == 'gpu_vllm'
71+
run: apt-get update && apt-get install -y git
8172
- uses: actions/checkout@v6
82-
- uses: ./.github/actions/gpu-test-run
73+
- uses: nv-gha-runners/setup-proxy-cache@main
74+
- name: Setup environment variables
75+
run: |
76+
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
77+
- name: Run gpu tests
78+
env:
79+
COVERAGE_PROCESS_START: ${{ github.workspace }}/pyproject.toml
80+
COVERAGE_FILE: ${{ github.workspace }}/.coverage
81+
run: |
82+
python -m pip install nox && nox -s ${{ matrix.example }}
83+
- name: Upload GPU coverage to Codecov
84+
uses: codecov/codecov-action@v5
8385
with:
84-
example: gpu_vllm
85-
with_coverage: "false"
86+
token: ${{ secrets.CODECOV_TOKEN }}
87+
files: coverage.xml
88+
flags: gpu
89+
fail_ci_if_error: false # test may be skipped if relevant file changes are not detected
90+
verbose: true
8691

8792
gpu-pr-required-check:
88-
# Run even if any of the gpu jobs is skipped
93+
# Run even if gpu-tests is skipped
8994
if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
90-
needs: [pr-gate, gpu-tests, gpu-tests-vllm]
95+
needs: [pr-gate, gpu-tests]
9196
runs-on: ubuntu-latest
9297
steps:
9398
- name: Required GPU tests did not succeed
94-
if: ${{ needs.pr-gate.result != 'success' || (needs.pr-gate.outputs.any_changed == 'true' && (needs.gpu-tests.result != 'success' || needs.gpu-tests-vllm.result != 'success')) }}
99+
if: ${{ needs.pr-gate.result != 'success' || (needs.pr-gate.outputs.any_changed == 'true' && needs.gpu-tests.result != 'success') }}
95100
run: exit 1

0 commit comments

Comments
 (0)