Skip to content

Commit 6830ff7

Browse files
authored
Add CUDA Plugin EP CI and fix Windows plugin build support (#27959)
## Summary This PR improves CUDA Plugin EP development and validation in three areas: Fixes the Windows CUDA Plugin EP build so the plugin can be compiled successfully with MSVC. Adds dedicated Windows and Linux GitHub Actions workflows for building and testing the CUDA Plugin EP. Expands the quick start documentation with instructions for running the CUDA Plugin EP Python tests locally. ## Changes ### Windows build fixes - Update the CUDA plugin CMake configuration to use the correct forced-include flags on Windows/MSVC. - Keep the existing forced-include behavior for non-MSVC toolchains. - Add the missing GetEnvironmentVar(const std::string&) forward declaration needed by plugin builds on Windows. ### CI coverage for CUDA Plugin EP Add a Windows CUDA Plugin EP workflow that: - builds ONNX Runtime with onnxruntime_BUILD_CUDA_EP_AS_PLUGIN=ON - uploads build artifacts - installs the built wheel - sets ORT_CUDA_PLUGIN_PATH - runs test_cuda_plugin_ep.py Add a similar Linux CUDA Plugin EP workflow. ### Documentation updates - Add a Running Tests section to the CUDA Plugin EP quick start. - Document test prerequisites, dependency installation, and ORT_CUDA_PLUGIN_PATH. - Clarify that CPU-only PyTorch is sufficient for test_cuda_plugin_ep.py because it is used for CPU-side reference computations.
1 parent c37c8b8 commit 6830ff7

15 files changed

Lines changed: 505 additions & 51 deletions

File tree

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
name: CUDA Plugin Linux CI
2+
3+
on:
4+
push:
5+
branches: [main, 'rel-*']
6+
pull_request:
7+
branches: [main, 'rel-*']
8+
workflow_dispatch:
9+
10+
concurrency:
11+
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
12+
cancel-in-progress: true
13+
14+
permissions:
15+
contents: read
16+
packages: write
17+
attestations: write
18+
id-token: write
19+
20+
jobs:
21+
build-linux-cuda-plugin-x64-release:
22+
name: Build Linux CUDA Plugin EP x64 Release
23+
uses: ./.github/workflows/reusable_linux_build.yml
24+
with:
25+
pool_name: "onnxruntime-github-Ubuntu2204-AMD-CPU"
26+
build_config: Release
27+
architecture: x64
28+
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
29+
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
30+
docker_image_repo: onnxruntimecuda12manylinuxbuild
31+
extra_build_flags: >-
32+
--use_binskim_compliant_compile_flags
33+
--build_wheel
34+
--parallel
35+
--nvcc_threads 1
36+
--cuda_version=12.8
37+
--cuda_home=/usr/local/cuda-12.8
38+
--cudnn_home=/usr/local/cuda-12.8
39+
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
40+
--cmake_extra_defines onnxruntime_BUILD_CUDA_EP_AS_PLUGIN=ON
41+
python_path_prefix: 'PATH=/opt/python/cp312-cp312/bin:$PATH'
42+
run_tests: false
43+
upload_build_output: true
44+
execution_providers: 'cuda'
45+
job_identifier: build-linux-cuda-plugin-x64-release
46+
secrets:
47+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
48+
49+
test-linux-cuda-plugin-x64-release:
50+
name: Test Linux CUDA Plugin EP x64 Release
51+
needs: build-linux-cuda-plugin-x64-release
52+
runs-on:
53+
- self-hosted
54+
- "1ES.Pool=onnxruntime-github-linux-a10"
55+
- "JobId=test-linux-cuda-plugin-x64-release-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
56+
permissions:
57+
contents: read
58+
packages: read
59+
steps:
60+
- name: Checkout code
61+
uses: actions/checkout@v6
62+
63+
- uses: microsoft/onnxruntime-github-actions/build-docker-image@8bad63a3c05d448311dfa8e5f531171c97471aa1 # v0.0.12
64+
id: build_docker_image_step
65+
with:
66+
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
67+
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild
68+
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
69+
push: true
70+
azure-container-registry-name: onnxruntimebuildcache
71+
env:
72+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
73+
74+
# --- Download Build Artifact to Runner Temp Directory ---
75+
- name: Download Build Artifact
76+
uses: actions/download-artifact@v7
77+
with:
78+
name: build-output-x64-Release
79+
path: ${{ runner.temp }}/Release
80+
81+
# --- Restore Permissions in the Temp Directory ---
82+
- name: Restore Executable Permissions
83+
if: success()
84+
working-directory: ${{ runner.temp }}/Release
85+
run: |
86+
if [ -f perms.txt ]; then
87+
echo "Restoring executable permissions in ${{ runner.temp }}/Release ..."
88+
while IFS= read -r file; do
89+
if [ -f "$file" ]; then
90+
chmod +x "$file"
91+
else
92+
echo "Warning: File '$file' listed in perms.txt not found."
93+
fi
94+
done < perms.txt
95+
echo "Permissions restored."
96+
else
97+
echo "Warning: perms.txt not found in artifact."
98+
fi
99+
100+
# --- Install the ORT wheel and run CUDA plugin EP tests ---
101+
- name: Run CUDA Plugin EP Python Tests
102+
run: |
103+
docker run --rm --gpus all \
104+
-v ${{ github.workspace }}:/onnxruntime_src \
105+
-v ${{ runner.temp }}/Release:/build/Release \
106+
-e NVIDIA_VISIBLE_DEVICES=all \
107+
${{ steps.build_docker_image_step.outputs.full-image-name }} \
108+
bash -c "
109+
set -ex
110+
export PATH=/opt/python/cp312-cp312/bin:\$PATH
111+
112+
# Install the ORT wheel
113+
python -m pip install /build/Release/Release/dist/onnxruntime*.whl
114+
115+
# Install test dependencies
116+
python -m pip install numpy onnx
117+
python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
118+
119+
# Set plugin path and run tests
120+
export ORT_CUDA_PLUGIN_PATH=/build/Release/Release/libonnxruntime_providers_cuda_plugin.so
121+
echo \"ORT_CUDA_PLUGIN_PATH=\$ORT_CUDA_PLUGIN_PATH\"
122+
ls -la \$ORT_CUDA_PLUGIN_PATH
123+
124+
cd /onnxruntime_src/onnxruntime/test/python/transformers
125+
python test_cuda_plugin_ep.py
126+
"
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
name: CUDA Plugin Windows CI
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- rel-*
8+
pull_request:
9+
branches:
10+
- main
11+
- rel-*
12+
workflow_dispatch:
13+
14+
concurrency:
15+
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
16+
cancel-in-progress: true
17+
18+
jobs:
19+
build:
20+
name: Windows CUDA Plugin EP Build
21+
runs-on: [
22+
"self-hosted",
23+
"1ES.Pool=onnxruntime-github-vs2022-latest",
24+
"JobId=windows-cuda-plugin-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
25+
]
26+
steps:
27+
- uses: actions/checkout@v6
28+
with:
29+
fetch-depth: 0
30+
submodules: 'none'
31+
32+
- uses: actions/setup-python@v6
33+
with:
34+
python-version: '3.14'
35+
architecture: x64
36+
37+
- name: Locate vcvarsall and Setup Env
38+
uses: ./.github/actions/locate-vcvarsall-and-setup-env
39+
with:
40+
architecture: x64
41+
42+
- name: Install python modules
43+
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
44+
working-directory: ${{ github.workspace }}
45+
shell: cmd
46+
47+
- name: Download CUDA SDK v12.8
48+
working-directory: ${{ runner.temp }}
49+
run: |
50+
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
51+
dir
52+
shell: pwsh
53+
54+
- name: Add CUDA to PATH
55+
shell: powershell
56+
run: |
57+
Write-Host "Adding CUDA to PATH"
58+
Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
59+
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
60+
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"
61+
62+
- name: Set OnnxRuntimeBuildDirectory
63+
shell: pwsh
64+
run: |
65+
$buildDir = Join-Path ${{ runner.temp }} "build"
66+
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
67+
68+
- name: Build ONNX Runtime with CUDA Plugin EP
69+
working-directory: ${{ runner.temp }}
70+
run: |
71+
python.exe ${{ github.workspace }}\tools\ci_build\build.py `
72+
--update --build --config Release `
73+
--build_dir build `
74+
--skip_submodule_sync `
75+
--parallel `
76+
--nvcc_threads 1 `
77+
--use_binskim_compliant_compile_flags `
78+
--cmake_generator "Visual Studio 17 2022" `
79+
--build_shared_lib `
80+
--build_wheel `
81+
--use_cuda `
82+
--cuda_home="$env:RUNNER_TEMP\v12.8" `
83+
--skip_tests `
84+
--use_vcpkg `
85+
--use_vcpkg_ms_internal_asset_cache `
86+
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 `
87+
--cmake_extra_defines onnxruntime_BUILD_CUDA_EP_AS_PLUGIN=ON
88+
89+
if ($lastExitCode -ne 0) {
90+
exit $lastExitCode
91+
}
92+
93+
# Clean up intermediate files before uploading artifacts
94+
$outputDir = "${{ runner.temp }}\build\Release"
95+
Write-Host "Cleaning up files from $outputDir..."
96+
97+
Remove-Item -Path "$outputDir\onnxruntime" -Recurse -Force -ErrorAction SilentlyContinue
98+
Remove-Item -Path "$outputDir\pybind11" -Recurse -Force -ErrorAction SilentlyContinue
99+
Remove-Item -Path "$outputDir\models" -Recurse -Force -ErrorAction SilentlyContinue
100+
Remove-Item -Path "$outputDir\vcpkg_installed" -Recurse -Force -ErrorAction SilentlyContinue
101+
Remove-Item -Path "$outputDir\_deps" -Recurse -Force -ErrorAction SilentlyContinue
102+
Remove-Item -Path "$outputDir\CMakeCache.txt" -Force -ErrorAction SilentlyContinue
103+
Remove-Item -Path "$outputDir\CMakeFiles" -Recurse -Force -ErrorAction SilentlyContinue
104+
Remove-Item -Path $outputDir -Include "*.obj" -Recurse
105+
shell: pwsh
106+
107+
- name: Upload build artifacts
108+
uses: actions/upload-artifact@v6
109+
with:
110+
name: cuda-plugin-build-artifacts
111+
path: ${{ runner.temp }}\build
112+
env:
113+
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
114+
setVcvars: true
115+
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
116+
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
117+
AZCOPY_AUTO_LOGIN_TYPE: MSI
118+
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
119+
120+
test:
121+
name: Windows CUDA Plugin EP Test
122+
needs: build
123+
timeout-minutes: 120
124+
runs-on: [
125+
"self-hosted",
126+
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
127+
"JobId=windows-cuda-plugin-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
128+
]
129+
steps:
130+
- uses: actions/checkout@v6
131+
with:
132+
fetch-depth: 0
133+
submodules: 'none'
134+
135+
- name: Download build artifacts
136+
uses: actions/download-artifact@v7
137+
with:
138+
name: cuda-plugin-build-artifacts
139+
path: ${{ runner.temp }}\build
140+
141+
- uses: actions/setup-python@v6
142+
with:
143+
python-version: '3.14'
144+
architecture: x64
145+
146+
- name: Locate vcvarsall and Setup Env
147+
uses: ./.github/actions/locate-vcvarsall-and-setup-env
148+
with:
149+
architecture: x64
150+
151+
- name: Install python modules
152+
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
153+
working-directory: ${{ github.workspace }}
154+
shell: cmd
155+
156+
- name: Install torch for CPU only
157+
run: python -m pip install torch
158+
working-directory: ${{ github.workspace }}
159+
shell: cmd
160+
161+
- name: Download CUDA SDK v12.8
162+
working-directory: ${{ runner.temp }}
163+
run: |
164+
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
165+
dir
166+
shell: pwsh
167+
168+
- name: Add CUDA to PATH
169+
shell: powershell
170+
run: |
171+
Write-Host "Adding CUDA to PATH"
172+
Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
173+
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
174+
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"
175+
176+
- name: Set OnnxRuntimeBuildDirectory
177+
shell: pwsh
178+
run: |
179+
$buildDir = Join-Path ${{ runner.temp }} "build"
180+
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
181+
182+
- name: Install ONNX Runtime Wheel
183+
uses: ./.github/actions/install-onnxruntime-wheel
184+
with:
185+
whl-directory: ${{ runner.temp }}\build\Release\Release\dist
186+
187+
- name: Run CUDA Plugin EP Python Tests
188+
working-directory: ${{ github.workspace }}\onnxruntime\test\python\transformers
189+
shell: pwsh
190+
run: |
191+
$env:ORT_CUDA_PLUGIN_PATH = "${{ runner.temp }}\build\Release\Release\onnxruntime_providers_cuda_plugin.dll"
192+
Write-Host "ORT_CUDA_PLUGIN_PATH=$env:ORT_CUDA_PLUGIN_PATH"
193+
if (-not (Test-Path $env:ORT_CUDA_PLUGIN_PATH)) {
194+
Write-Error "CUDA plugin EP library not found at $env:ORT_CUDA_PLUGIN_PATH"
195+
exit 1
196+
}
197+
python test_cuda_plugin_ep.py
198+
if ($lastExitCode -ne 0) {
199+
exit $lastExitCode
200+
}
201+
env:
202+
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
203+
setVcvars: true
204+
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
205+
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
206+
AZCOPY_AUTO_LOGIN_TYPE: MSI
207+
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4

cmake/onnxruntime_providers_cuda_plugin.cmake

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,22 @@ target_compile_options(onnxruntime_providers_cuda_plugin PRIVATE
143143
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--std c++20>"
144144
"$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr;-Xcudafe;--diag_suppress=550>"
145145
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcudafe --diag_suppress=2810>"
146-
"$<$<COMPILE_LANGUAGE:CXX>:-include;${REPO_ROOT}/include/onnxruntime/ep/adapters.h>"
147-
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:-include ${CUDA_PLUGIN_EP_DIR}/cuda_kernel_adapter.h>"
148146
)
149147

148+
# Force-include adapter headers for CXX files.
149+
# MSVC uses /FI; GCC/Clang use -include.
150+
if (MSVC)
151+
target_compile_options(onnxruntime_providers_cuda_plugin PRIVATE
152+
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:/FI \"${REPO_ROOT}/include/onnxruntime/ep/adapters.h\">"
153+
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:/FI \"${CUDA_PLUGIN_EP_DIR}/cuda_kernel_adapter.h\">"
154+
)
155+
else()
156+
target_compile_options(onnxruntime_providers_cuda_plugin PRIVATE
157+
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:-include ${REPO_ROOT}/include/onnxruntime/ep/adapters.h>"
158+
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:-include ${CUDA_PLUGIN_EP_DIR}/cuda_kernel_adapter.h>"
159+
)
160+
endif()
161+
150162
if (MSVC)
151163
target_compile_options(onnxruntime_providers_cuda_plugin PRIVATE
152164
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /permissive>"

0 commit comments

Comments
 (0)