@@ -40,56 +40,61 @@ jobs:
4040 include :
4141 - example : gpu
4242 timeout : 75
43- container_image : pytorch:26.04-py3
43+ container_image : nvcr.io/nvidia/ pytorch:26.04-py3
4444 - example : gpu_megatron
4545 timeout : 45
46- container_image : nemo:26.04
46+ container_image : nvcr.io/nvidia/ nemo:26.04
4747 - example : gpu_trtllm
4848 timeout : 30
49- container_image : tensorrt-llm/release:1.3.0rc16
49+ container_image : nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc16
50+ - example : gpu_vllm
51+ timeout : 30
52+ container_image : docker.io/vllm/vllm-openai:v0.20.0
5053 runs-on : ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
5154 timeout-minutes : ${{ matrix.timeout }}
5255 container :
53- image : nvcr.io/nvidia/${{ matrix.container_image }}
56+ image : ${{ matrix.container_image }}
57+ # nvcr.io images require NGC auth; public docker.io images (e.g. vllm) are pulled
58+ # anonymously (the runner skips docker login when username/password are empty).
5459 credentials :
55- username : $oauthtoken
56- password : ${{ secrets.NGC_API_KEY }}
57- env :
58- GIT_DEPTH : 1000 # For correct version
59- PIP_CONSTRAINT : " " # Disable pip constraint for upgrading packages
60- HF_TOKEN : ${{ secrets.HF_TOKEN }}
61- steps :
62- - uses : actions/checkout@v6
63- - uses : ./.github/actions/gpu-test-run
64- with :
65- example : ${{ matrix.example }}
66- codecov_token : ${{ secrets.CODECOV_TOKEN }}
67-
68- # Docker Hub image: anonymous pull (no ``credentials:``) and no coverage
69- gpu-tests-vllm :
70- needs : [pr-gate]
71- if : needs.pr-gate.outputs.any_changed == 'true'
72- runs-on : ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
73- timeout-minutes : 30
74- container :
75- image : docker.io/vllm/vllm-openai:v0.20.0
60+ username : ${{ startsWith(matrix.container_image, 'nvcr.io') && '$oauthtoken' || '' }}
61+ password : ${{ startsWith(matrix.container_image, 'nvcr.io') && secrets.NGC_API_KEY || '' }}
7662 env :
7763 GIT_DEPTH : 1000 # For correct version
7864 PIP_CONSTRAINT : " " # Disable pip constraint for upgrading packages
7965 HF_TOKEN : ${{ secrets.HF_TOKEN }}
8066 steps :
67+ - name : Install git
68+ # The vllm container ships without git; needed for a real checkout (correct
69+ # setuptools-scm version) and for the Codecov upload below.
70+ if : matrix.example == 'gpu_vllm'
71+ run : apt-get update && apt-get install -y git
8172 - uses : actions/checkout@v6
82- - uses : ./.github/actions/gpu-test-run
73+ - uses : nv-gha-runners/setup-proxy-cache@main
74+ - name : Setup environment variables
75+ run : |
76+ echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
77+ - name : Run gpu tests
78+ env :
79+ COVERAGE_PROCESS_START : ${{ github.workspace }}/pyproject.toml
80+ COVERAGE_FILE : ${{ github.workspace }}/.coverage
81+ run : |
82+ python -m pip install nox && nox -s ${{ matrix.example }}
83+ - name : Upload GPU coverage to Codecov
84+ uses : codecov/codecov-action@v5
8385 with :
84- example : gpu_vllm
85- with_coverage : " false"
86+ token : ${{ secrets.CODECOV_TOKEN }}
87+ files : coverage.xml
88+ flags : gpu
89+ fail_ci_if_error : false # test may be skipped if relevant file changes are not detected
90+ verbose : true
8691
8792 gpu-pr-required-check :
88- # Run even if any of the gpu jobs is skipped
93+ # Run even if gpu-tests is skipped
8994 if : ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
90- needs : [pr-gate, gpu-tests, gpu-tests-vllm ]
95+ needs : [pr-gate, gpu-tests]
9196 runs-on : ubuntu-latest
9297 steps :
9398 - name : Required GPU tests did not succeed
94- if : ${{ needs.pr-gate.result != 'success' || (needs.pr-gate.outputs.any_changed == 'true' && ( needs.gpu-tests.result != 'success' || needs.gpu-tests-vllm.result != 'success') ) }}
99+ if : ${{ needs.pr-gate.result != 'success' || (needs.pr-gate.outputs.any_changed == 'true' && needs.gpu-tests.result != 'success') }}
95100 run : exit 1
0 commit comments