Skip to content

Commit 365f588

Browse files
committed
Vulkan: support Linux/Windows desktop GPUs and opt-in wheel builds
The Vulkan backend was developed for Android GPUs. This makes it build and run on Linux/Windows desktop discrete GPUs (NVIDIA/AMD/Intel) and adds opt-in pre-built Vulkan wheels, with no change to Android behavior (build divergence is behind compile-time guards; runtime changes key off queried capabilities). Covers build portability, discrete-GPU correctness fixes (real-GPU device/ICD selection, shaderInt16/Int64/Float64 enablement, a blit queue guard, and texel-rounded buffer allocations to avoid out-of-bounds vec4 reads), and EXECUTORCH_BUILD_VULKAN-gated CI/packaging (a new vulkan.yml runs the real-GPU NVIDIA and Windows MSVC jobs, plus opt-in wheel plumbing). Tested on an NVIDIA A100; the SwiftShader CI path is unchanged. This change was authored with Claude.
1 parent e285edf commit 365f588

26 files changed

Lines changed: 746 additions & 86 deletions

.ci/scripts/setup-vulkan-linux-deps.sh

Lines changed: 121 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
#!/bin/bash
32
# Copyright (c) Meta Platforms, Inc. and affiliates.
43
# All rights reserved.
@@ -22,7 +21,7 @@ install_swiftshader() {
2221
tar -C "${_swiftshader_dir}" -xzf "${_tmp_archive}"
2322

2423
export VK_ICD_FILENAMES="${_swiftshader_dir}/swiftshader/build/Linux/vk_swiftshader_icd.json"
25-
export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/"
24+
export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/:${LD_LIBRARY_PATH:-}"
2625
export ETVK_USING_SWIFTSHADER=1
2726
}
2827

@@ -43,7 +42,125 @@ install_vulkan_sdk() {
4342
export PATH="${PATH}:${_vulkan_sdk_dir}/${VULKAN_SDK_VERSION}/x86_64/bin/"
4443
}
4544

45+
_maybe_sudo() {
46+
if [ "$(id -u)" -eq 0 ]; then
47+
"$@"
48+
else
49+
sudo "$@"
50+
fi
51+
}
52+
53+
install_glslc() {
54+
# The glslc shipped in the LunarG SDK is dynamically linked against a newer
55+
# glibc/libstdc++ than the manylinux_2_28 / AlmaLinux 8 CUDA runner image
56+
# provides (glibc 2.28), where it fails to load with "GLIBC_2.29 not found".
57+
# conda-forge's shaderc is built against an old sysroot, runs there, and is
58+
# recent enough for the GL_EXT_integer_dot_product / GL_KHR_cooperative_matrix
59+
# extensions the Vulkan shaders use. Install it into an isolated prefix so the
60+
# base conda env that builds ExecuTorch is left untouched, then put it on PATH.
61+
_glslc_prefix=/tmp/shaderc
62+
conda create -y -p "${_glslc_prefix}" -c conda-forge shaderc
63+
export PATH="${_glslc_prefix}/bin:${PATH}"
64+
}
65+
66+
install_vulkan_loader() {
67+
# libvulkan.so.1 (the Khronos loader that volk dlopen()s at runtime) is not part
68+
# of the NVIDIA driver and is absent from the CUDA builder image; vulkan-tools
69+
# provides vulkaninfo for the device sanity check. Both ship as native el8 RPMs.
70+
if command -v dnf >/dev/null 2>&1; then
71+
_maybe_sudo dnf install -y vulkan-loader vulkan-tools
72+
fi
73+
}
74+
75+
_find_nvidia_vulkan_library() {
76+
# NVIDIA implements its Vulkan ICD inside libGLX_nvidia.so.0. The NVIDIA
77+
# container runtime mounts this library into the container (it is pulled from
78+
# the driver's ldcache when NVIDIA_DRIVER_CAPABILITIES includes graphics/all),
79+
# so prefer ldconfig and fall back to the usual mount locations.
80+
local lib cand
81+
lib="$(ldconfig -p 2>/dev/null | awk '/libGLX_nvidia\.so\.0/ {print $NF; exit}')"
82+
if [ -z "${lib}" ]; then
83+
for cand in /usr/lib64/libGLX_nvidia.so.0 \
84+
/usr/lib/x86_64-linux-gnu/libGLX_nvidia.so.0 \
85+
/usr/lib/libGLX_nvidia.so.0; do
86+
if [ -e "${cand}" ]; then
87+
lib="${cand}"
88+
break
89+
fi
90+
done
91+
fi
92+
printf '%s' "${lib}"
93+
}
94+
95+
_vulkan_has_real_device() {
96+
# True if the loader enumerates a hardware GPU. vulkaninfo can exit non-zero
97+
# for unrelated reasons (no display/WSI), so key off the reported deviceType.
98+
command -v vulkaninfo >/dev/null 2>&1 || return 0
99+
vulkaninfo --summary 2>/dev/null |
100+
grep -qE 'PHYSICAL_DEVICE_TYPE_(DISCRETE|INTEGRATED|VIRTUAL)_GPU'
101+
}
102+
103+
setup_real_gpu_icd() {
104+
# Select a Vulkan ICD so the runtime exercises the real GPU when one is usable.
105+
# Two quirks of the CUDA CI image make this non-trivial:
106+
# 1. The NVIDIA container runtime mounts the driver's Vulkan library but does
107+
# not register its ICD manifest, so the loader never discovers the GPU on
108+
# its own. We synthesize the manifest and pin the loader to it.
109+
# 2. Installing vulkan-loader/vulkan-tools pulls in mesa-vulkan-drivers,
110+
# which drop Intel/AMD/lavapipe manifests for absent hardware. lavapipe
111+
# fails vkCreateInstance on this image and, because the loader walks every
112+
# manifest in icd.d, that poisons device enumeration for the whole
113+
# process. Pinning VK_ICD_FILENAMES makes the loader ignore icd.d, so the
114+
# broken stubs cannot interfere.
115+
local nvidia_lib
116+
nvidia_lib="$(_find_nvidia_vulkan_library)"
117+
if [ -n "${nvidia_lib}" ]; then
118+
local icd=/tmp/nvidia_icd.json
119+
cat >"${icd}" <<JSON
120+
{
121+
"file_format_version": "1.0.0",
122+
"ICD": {
123+
"library_path": "${nvidia_lib}",
124+
"api_version": "1.3.0"
125+
}
126+
}
127+
JSON
128+
export VK_ICD_FILENAMES="${icd}"
129+
unset ETVK_USING_SWIFTSHADER || true
130+
if _vulkan_has_real_device; then
131+
echo "Real NVIDIA GPU selected; pinned Vulkan ICD to ${nvidia_lib}"
132+
return
133+
fi
134+
echo "WARNING: ${nvidia_lib} present but no GPU enumerated; using SwiftShader."
135+
# Surface why the NVIDIA driver did not enumerate (e.g. a missing dependency
136+
# of libGLX_nvidia, or no render node) so the fallback is diagnosable in CI.
137+
if command -v vulkaninfo >/dev/null 2>&1; then
138+
echo "--- NVIDIA Vulkan ICD diagnostic ---"
139+
VK_LOADER_DEBUG=warn vulkaninfo --summary 2>&1 | head -40 || true
140+
echo "--- end diagnostic ---"
141+
fi
142+
unset VK_ICD_FILENAMES
143+
else
144+
echo "WARNING: no NVIDIA Vulkan driver library found; using SwiftShader."
145+
fi
146+
install_swiftshader
147+
}
148+
46149
VULKAN_SDK_VERSION="1.4.321.1"
47150

48-
install_swiftshader
49-
install_vulkan_sdk "${VULKAN_SDK_VERSION}"
151+
# The no-argument default installs SwiftShader so the existing CPU-runner CI is
152+
# unchanged. Pass "real-gpu" to prefer a real system ICD when one is present.
153+
case "${1:-swiftshader}" in
154+
real-gpu)
155+
# Do not download the LunarG SDK here: its prebuilt glslc cannot run on the
156+
# old-glibc CUDA image. glslc comes from conda-forge and the loader from the
157+
# system package manager instead.
158+
install_vulkan_loader
159+
install_glslc
160+
setup_real_gpu_icd
161+
;;
162+
swiftshader | *)
163+
install_swiftshader
164+
install_vulkan_sdk "${VULKAN_SDK_VERSION}"
165+
;;
166+
esac
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# Install glslc (the Vulkan shader compiler) on Windows via conda-forge's
8+
# shaderc package, and make sure it is on PATH. glslc is the only build-time
9+
# Vulkan dependency -- the Vulkan headers and the volk loader come from the
10+
# in-tree submodules -- so this avoids depending on the heavyweight LunarG SDK
11+
# installer. Requires conda to be available (the callers create/activate an env).
12+
13+
$ErrorActionPreference = "Stop"
14+
15+
Write-Host "Installing shaderc (provides glslc) from conda-forge..."
16+
conda install -y -c conda-forge shaderc
17+
if ($LASTEXITCODE -ne 0) {
18+
Write-Error "Failed to install shaderc from conda-forge (exit ${LASTEXITCODE})"
19+
exit 1
20+
}
21+
22+
$glslc = Get-Command glslc -ErrorAction SilentlyContinue
23+
if (-not $glslc) {
24+
Write-Error "glslc not found on PATH after installing shaderc"
25+
exit 1
26+
}
27+
28+
# Expose glslc to the current process and, when running as a GitHub Actions step,
29+
# to subsequent steps.
30+
$glslcDir = Split-Path -Parent $glslc.Source
31+
$env:PATH = "${glslcDir};${env:PATH}"
32+
if ($env:GITHUB_PATH) {
33+
Add-Content -Path $env:GITHUB_PATH -Value $glslcDir
34+
}
35+
36+
Write-Host "glslc available at $($glslc.Source)"
37+
& glslc --version
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# Build-validation for the Vulkan backend under MSVC on Windows. Mirrors
8+
# setup-windows-msvc.ps1 but installs glslc (the Vulkan shader compiler) and
9+
# configures/builds the vulkan_backend target. This is a bring-up job: it exists
10+
# to surface MSVC portability issues in the Vulkan/volk/VMA code, so it may need
11+
# iteration.
12+
13+
conda create --yes --quiet -n et python=3.12
14+
conda activate et
15+
16+
# Install cmake
17+
conda install -y cmake
18+
19+
# Activate the VS environment - this is required for MSVC to work.
20+
& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
21+
22+
# Install glslc (via conda-forge shaderc) and put it on PATH in this process.
23+
.ci/scripts/setup-vulkan-windows-deps.ps1
24+
25+
# Install CI requirements
26+
pip install -r .ci/docker/requirements-ci.txt
27+
28+
$buildDir = "cmake-out-vulkan"
29+
if (Test-Path -Path $buildDir) {
30+
Remove-Item -Path $buildDir -Recurse -Force
31+
}
32+
New-Item -Path $buildDir -ItemType Directory
33+
34+
cmake -S . -B $buildDir `
35+
-DCMAKE_BUILD_TYPE=Release `
36+
-DEXECUTORCH_BUILD_VULKAN=ON `
37+
-DPYTHON_EXECUTABLE=python
38+
39+
if ($LASTEXITCODE -ne 0) {
40+
Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE."
41+
exit $LASTEXITCODE
42+
}
43+
44+
cmake --build $buildDir --config Release --target vulkan_backend -j16
45+
46+
if ($LASTEXITCODE -ne 0) {
47+
Write-Host "Vulkan backend MSVC build failed. Exit code: $LASTEXITCODE."
48+
exit $LASTEXITCODE
49+
}
50+
51+
Write-Host "Vulkan backend MSVC build completed successfully!"

.ci/scripts/test_backend.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,15 @@ if [[ "$FLOW" == *qnn* ]]; then
5151
fi
5252

5353
if [[ "$FLOW" == *vulkan* ]]; then
54-
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate.
55-
source .ci/scripts/setup-vulkan-linux-deps.sh
54+
# Setup the Vulkan SDK and select an ICD: use the real system GPU ICD when one
55+
# is present (real-GPU runner), otherwise fall back to SwiftShader (CPU
56+
# runner). The Vulkan loader searches both standard ICD directories.
57+
if ls /etc/vulkan/icd.d/*.json /usr/share/vulkan/icd.d/*.json \
58+
>/dev/null 2>&1; then
59+
source .ci/scripts/setup-vulkan-linux-deps.sh "real-gpu"
60+
else
61+
source .ci/scripts/setup-vulkan-linux-deps.sh "swiftshader"
62+
fi
5663

5764
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
5865
fi

.ci/scripts/wheel/pre_build_script.sh

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,34 @@ if [[ "$(uname -s)" == "Linux" && "$(uname -m)" == "x86_64" ]]; then
6969
echo "QNN_SDK_ROOT=${QNN_SDK_ROOT}" >> "${GITHUB_ENV}"
7070
echo "QNN SDK downloaded to ${QNN_SDK_ROOT}"
7171
fi
72+
73+
# Provision the Vulkan SDK (glslc) and submodules ONLY when explicitly requested
74+
# via EXECUTORCH_BUILD_VULKAN. The default wheel build leaves this unset, so it
75+
# does no extra work (no submodule fetch, no SDK download) and is unaffected.
76+
if [[ "${EXECUTORCH_BUILD_VULKAN:-0}" != "0" \
77+
&& "${EXECUTORCH_BUILD_VULKAN:-OFF}" != "OFF" ]]; then
78+
echo "Initializing Vulkan backend third-party submodules..."
79+
VULKAN_SUBMODULES=(
80+
backends/vulkan/third-party/Vulkan-Headers
81+
backends/vulkan/third-party/volk
82+
backends/vulkan/third-party/VulkanMemoryAllocator
83+
)
84+
if [[ $UNAME_S == *"MINGW"* || $UNAME_S == *"MSYS"* ]]; then
85+
git -c http.sslBackend=openssl submodule update --init "${VULKAN_SUBMODULES[@]}"
86+
echo "Installing Vulkan SDK for Windows wheel build..."
87+
powershell -ExecutionPolicy Bypass -File .ci/scripts/setup-vulkan-windows-deps.ps1
88+
else
89+
git submodule update --init "${VULKAN_SUBMODULES[@]}"
90+
# Install glslc from conda-forge rather than the LunarG SDK: the manylinux
91+
# wheel image uses an old glibc where the SDK's prebuilt glslc cannot run
92+
# ("GLIBC_2.29 not found"). conda-forge's shaderc is built against an old
93+
# sysroot and runs there. Vulkan headers come from the submodules above and
94+
# volk dlopen()s the loader at runtime, so only glslc is needed to build.
95+
echo "Installing glslc (conda-forge shaderc) for Linux wheel build..."
96+
_glslc_prefix="${HOME}/.shaderc"
97+
conda create -y -p "${_glslc_prefix}" -c conda-forge shaderc
98+
export PATH="${_glslc_prefix}/bin:${PATH}"
99+
echo "${_glslc_prefix}/bin" >> "${GITHUB_PATH}"
100+
echo "glslc installed: $(command -v glslc)"
101+
fi
102+
fi

.ci/scripts/wheel/test_linux.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@
3131
), f"OpenvinoBackend not found in registered backends: {registered}"
3232
print("✓ OpenvinoBackend is registered")
3333

34+
# Vulkan backend is optional: only present when the wheel was built with
35+
# EXECUTORCH_BUILD_VULKAN=1 and the Vulkan SDK (glslc) was available.
36+
if "VulkanBackend" in registered:
37+
print("✓ VulkanBackend is registered")
38+
else:
39+
print("⚠ VulkanBackend not registered (expected for the default wheel)")
40+
3441
test_base.run_tests(
3542
model_tests=[
3643
test_base.ModelTest(

.ci/scripts/wheel/test_windows.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8+
import platform
89
from typing import List
910

1011
import torch
@@ -15,6 +16,7 @@
1516
from executorch.examples.xnnpack.quantization.utils import quantize as quantize_xnn
1617
from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
1718
from executorch.extension.pybindings.portable_lib import (
19+
_get_registered_backend_names,
1820
_load_for_executorch_from_buffer,
1921
)
2022
from test_base import ModelTest
@@ -63,6 +65,15 @@ def run_tests(model_tests: List[ModelTest]) -> None:
6365

6466

6567
if __name__ == "__main__":
68+
if platform.system() == "Windows":
69+
registered = _get_registered_backend_names()
70+
# Vulkan backend is optional: only present when the wheel was built with
71+
# EXECUTORCH_BUILD_VULKAN=1 and the Vulkan SDK (glslc) was available.
72+
if "VulkanBackend" in registered:
73+
print("✓ VulkanBackend is registered")
74+
else:
75+
print("⚠ VulkanBackend not registered (expected for the default wheel)")
76+
6677
run_tests(
6778
model_tests=[
6879
ModelTest(

.github/workflows/test-backend-vulkan.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ concurrency:
1717
cancel-in-progress: true
1818

1919
jobs:
20+
# Default coverage: builds + runs on SwiftShader (software Vulkan) on CPU
21+
# runners. Runs on every PR and nightly.
2022
test-vulkan:
2123
uses: ./.github/workflows/_test_backend.yml
2224
with:
@@ -28,3 +30,6 @@ jobs:
2830
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
2931
timeout: 120
3032
run-linux: true
33+
34+
# Real-GPU (NVIDIA) and Windows MSVC coverage live in vulkan.yml, which gates
35+
# those scarce/expensive runners behind path filtering.

0 commit comments

Comments
 (0)