Skip to content

Commit ea8037c

Browse files
[ExecuTorch][WebGPU] Enable backend test suite + x86 CI
Pull Request resolved: pytorch#19964 Wires the WebGPU backend into the standard ExecuTorch backend test suite and adds an x86 Linux CI job, mirroring the Vulkan delegate: `backends/test/suite/flows/webgpu.py` plus a `WebGPUTester`, run by `oss/.github/workflows/test-backend-webgpu.yml` on SwiftShader (a software Vulkan adapter, via `wgpu-native`, minimal dependencies, no GPU). Two fixes were needed for SwiftShader's downlevel limits: request the adapter's full `requiredLimits` at device creation (software adapters default storage-buffer limits to 0), and make the `add` op's workgroup size dynamic instead of a hardcoded constant. The WGSL now declares a pipeline-overridable `override wg_size: u32 = 256` and the host clamps it to the device's `maxComputeInvocationsPerWorkgroup` (256 on real GPUs and lavapipe, 128 on SwiftShader), so SwiftShader's 128-invocation cap no longer forces a smaller workgroup size on real hardware. This mirrors the dynamic-workgroup-sizing approach in D107259348 and opens the door to selecting device/algorithm-optimal sizes later. The `add` op also validates its 1D dispatch count before allocating any GPU objects, against the device's queried `maxComputeWorkgroupsPerDimension` (falling back to the WebGPU spec-default floor of 65535 only when the limit query fails). Per Stephen's review, the workgroup-size clamp and the dispatch-count computation are factored into reusable `inline` helpers in `runtime/WebGPUUtils.h` (`clamp_workgroup_size` and `compute_1d_workgroup_count`, mirroring the Vulkan delegate's `utils::div_up`) so the other ops can share them rather than re-inlining the logic. The editable CMake build additionally marks the `vulkan_schema` subdirectory `EXCLUDE_FROM_ALL` so the WebGPU `ALL` build does not pull in targets that need glslc. ghstack-source-id: 389636486 @exported-using-ghexport Differential Revision: [D107288999](https://our.internmc.facebook.com/intern/diff/D107288999/)
1 parent c036150 commit ea8037c

16 files changed

Lines changed: 272 additions & 8 deletions

File tree

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -ex
9+
10+
# SwiftShader: software Vulkan adapter for GPU-less CI (LunarG SDK not needed).
11+
install_swiftshader() {
12+
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
13+
_swiftshader_archive=swiftshader-abe07b943-prebuilt.tar.gz
14+
_swiftshader_dir=/tmp/swiftshader
15+
mkdir -p $_swiftshader_dir
16+
17+
_tmp_archive="/tmp/${_swiftshader_archive}"
18+
19+
curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
20+
--output "${_tmp_archive}" "$_https_amazon_aws/${_swiftshader_archive}"
21+
22+
tar -C "${_swiftshader_dir}" -xzf "${_tmp_archive}"
23+
24+
export VK_ICD_FILENAMES="${_swiftshader_dir}/swiftshader/build/Linux/vk_swiftshader_icd.json"
25+
export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/:${LD_LIBRARY_PATH}"
26+
export ETVK_USING_SWIFTSHADER=1
27+
}
28+
29+
install_swiftshader
30+
bash backends/webgpu/scripts/setup-wgpu-native.sh

.ci/scripts/test_backend.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ if [[ "$FLOW" == *vulkan* ]]; then
5757
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
5858
fi
5959

60+
if [[ "$FLOW" == *webgpu* ]]; then
61+
# Setup swiftshader (software Vulkan adapter for GPU-less runners) and wgpu-native,
62+
# which are required to build and run the WebGPU delegate.
63+
source .ci/scripts/setup-webgpu-linux-deps.sh
64+
65+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_WEBGPU=ON"
66+
fi
67+
6068
if [[ "$FLOW" == *arm* ]]; then
6169
if [[ "$SUITE" == "operators" ]]; then
6270
PYTEST_RETRY_ARGS=(--reruns 2 --reruns-delay 1)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Test WebGPU Backend
2+
3+
on:
4+
schedule:
5+
- cron: 0 2 * * *
6+
push:
7+
branches:
8+
- main
9+
- release/*
10+
tags:
11+
- ciflow/nightly/*
12+
pull_request:
13+
workflow_dispatch:
14+
15+
concurrency:
16+
group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
17+
cancel-in-progress: true
18+
19+
jobs:
20+
test-webgpu:
21+
uses: ./.github/workflows/_test_backend.yml
22+
with:
23+
backend: webgpu
24+
flows: '["webgpu"]'
25+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
26+
timeout: 120
27+
run-linux: true

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,10 @@ if(EXECUTORCH_BUILD_PYBIND)
10611061
list(APPEND _dep_libs vulkan_backend)
10621062
endif()
10631063

1064+
if(EXECUTORCH_BUILD_WEBGPU)
1065+
list(APPEND _dep_libs webgpu_backend)
1066+
endif()
1067+
10641068
# compile options for pybind
10651069
set(_pybind_compile_options
10661070
$<$<CXX_COMPILER_ID:MSVC>:/EHsc

backends/test/suite/flow.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ def _load_vulkan() -> list[TestFlow]:
117117
return [VULKAN_TEST_FLOW, VULKAN_STATIC_INT8_PER_CHANNEL_TEST_FLOW]
118118

119119

120+
def _load_webgpu() -> list[TestFlow]:
121+
from executorch.backends.test.suite.flows.webgpu import WEBGPU_TEST_FLOW
122+
123+
return [WEBGPU_TEST_FLOW]
124+
125+
120126
def _load_openvino() -> list[TestFlow]:
121127
from executorch.backends.test.suite.flows.openvino import (
122128
OPENVINO_INT8_TEST_FLOW,
@@ -178,6 +184,7 @@ def all_flows() -> dict[str, TestFlow]:
178184
+ _register_flow(_load_xnnpack, "XNNPACK")
179185
+ _register_flow(_load_coreml, "Core ML")
180186
+ _register_flow(_load_vulkan, "Vulkan")
187+
+ _register_flow(_load_webgpu, "WebGPU")
181188
+ _register_flow(_load_openvino, "OpenVINO")
182189
+ _register_flow(_load_qnn, "QNN")
183190
+ _register_flow(_load_arm, "ARM")
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from executorch.backends.test.suite.flow import TestFlow
8+
from executorch.backends.webgpu.test.tester import WebGPUTester
9+
10+
11+
def _create_webgpu_flow() -> TestFlow:
12+
return TestFlow(
13+
"webgpu",
14+
backend="webgpu",
15+
tester_factory=WebGPUTester,
16+
skip_patterns=["float16", "float64"], # Not supported in swiftshader
17+
)
18+
19+
20+
WEBGPU_TEST_FLOW = _create_webgpu_flow()

backends/webgpu/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if(NOT TARGET vulkan_schema)
2121
# target), but vulkan_schema is unconditionally defined.
2222
add_subdirectory(
2323
${CMAKE_CURRENT_SOURCE_DIR}/../vulkan
24-
${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema
24+
${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema EXCLUDE_FROM_ALL
2525
)
2626
endif()
2727

backends/webgpu/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.

backends/webgpu/runtime/WebGPUDevice.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,13 @@ WebGPUContext create_webgpu_context() {
121121
device_cb.callback = on_device_request;
122122
device_cb.userdata1 = &device_result;
123123

124+
// Request the adapter's full limits; software adapters default many to 0.
125+
WGPULimits supported_limits = {};
124126
WGPUDeviceDescriptor device_desc = {};
127+
if (wgpuAdapterGetLimits(ctx.adapter, &supported_limits) ==
128+
WGPUStatus_Success) {
129+
device_desc.requiredLimits = &supported_limits;
130+
}
125131
device_desc.uncapturedErrorCallbackInfo.callback = on_device_error;
126132

127133
wgpuAdapterRequestDevice(ctx.adapter, &device_desc, device_cb);
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <webgpu/webgpu.h>
12+
13+
#include <algorithm>
14+
#include <cstdint>
15+
#include <stdexcept>
16+
#include <string>
17+
18+
namespace executorch::backends::webgpu::utils {
19+
20+
// Clamp workgroup size to device limit (SwiftShader caps at 128).
21+
inline uint32_t clamp_workgroup_size(WGPUDevice device, uint32_t desired) {
22+
WGPULimits limits = {};
23+
if (wgpuDeviceGetLimits(device, &limits) == WGPUStatus_Success &&
24+
limits.maxComputeInvocationsPerWorkgroup > 0) {
25+
return std::min(desired, limits.maxComputeInvocationsPerWorkgroup);
26+
}
27+
return desired;
28+
}
29+
30+
// 1D dispatch count (mirrors Vulkan div_up); throws if > device limit.
31+
inline uint32_t compute_1d_workgroup_count(
32+
WGPUDevice device,
33+
uint32_t num_threads,
34+
uint32_t workgroup_size,
35+
const char* op_name) {
36+
uint32_t count = (num_threads + workgroup_size - 1) / workgroup_size;
37+
WGPULimits limits = {};
38+
uint32_t max_count =
39+
wgpuDeviceGetLimits(device, &limits) == WGPUStatus_Success &&
40+
limits.maxComputeWorkgroupsPerDimension > 0
41+
? limits.maxComputeWorkgroupsPerDimension
42+
: 65535u; // WebGPU spec-default floor
43+
if (count > max_count) {
44+
throw std::runtime_error(
45+
std::string("WebGPU ") + op_name +
46+
": workgroup count exceeds the 1D dispatch limit");
47+
}
48+
return count;
49+
}
50+
51+
} // namespace executorch::backends::webgpu::utils

0 commit comments

Comments
 (0)