Skip to content

Commit 173c9e2

Browse files
authored
[ETVK] WebGPU runtime (#18808)
wgpu prototype
1 parent eb19f24 commit 173c9e2

26 files changed

Lines changed: 1971 additions & 1 deletion

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cmake-android-out/
1616
cmake-ios-out/
1717
cmake-out*
1818
cmake-out-android/
19+
backends/webgpu/third-party/
1920
build-android/
2021
build-x86/
2122
build-hexagon/

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,11 @@ if(EXECUTORCH_BUILD_VULKAN)
11561156
list(APPEND _executorch_backends vulkan_backend vulkan_schema)
11571157
endif()
11581158

1159+
if(EXECUTORCH_BUILD_WEBGPU)
1160+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/webgpu)
1161+
list(APPEND _executorch_backends webgpu_backend)
1162+
endif()
1163+
11591164
if(EXECUTORCH_BUILD_VGF)
11601165
list(APPEND _executorch_backends vgf_backend)
11611166
endif()

backends/vulkan/cmake/ShaderLibrary.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ endif()
2626

2727
find_program(GLSLC_PATH glslc PATHS $ENV{PATH})
2828

29-
if(NOT GLSLC_PATH)
29+
if(NOT GLSLC_PATH AND EXECUTORCH_BUILD_VULKAN)
3030
message(
3131
FATAL_ERROR
3232
"glslc from the Vulkan SDK must be installed to build the Vulkan backend. "

backends/webgpu/CMakeLists.txt

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
cmake_minimum_required(VERSION 3.19)
8+
9+
if(NOT EXECUTORCH_ROOT)
10+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
11+
endif()
12+
13+
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
14+
15+
# Ensure vulkan_schema is available even when EXECUTORCH_BUILD_VULKAN is OFF.
16+
# The WebGPU backend reuses the Vulkan FlatBuffer serialization format.
17+
if(NOT TARGET vulkan_schema)
18+
# We need the schema generation from the Vulkan backend. Build only the schema
19+
# target by including the Vulkan CMakeLists.txt. The full Vulkan backend will
20+
# only build if EXECUTORCH_BUILD_VULKAN is ON (which gates the vulkan_backend
21+
# target), but vulkan_schema is unconditionally defined.
22+
add_subdirectory(
23+
${CMAKE_CURRENT_SOURCE_DIR}/../vulkan
24+
${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema
25+
)
26+
endif()
27+
28+
set(WEBGPU_SRCS
29+
runtime/WebGPUBackend.cpp runtime/WebGPUGraph.cpp
30+
runtime/WebGPUDelegateHeader.cpp runtime/WebGPUDevice.cpp
31+
runtime/ops/OperatorRegistry.cpp runtime/ops/add/BinaryOp.cpp
32+
)
33+
34+
add_library(webgpu_backend ${WEBGPU_SRCS})
35+
36+
target_include_directories(
37+
webgpu_backend PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
38+
)
39+
40+
target_link_libraries(webgpu_backend PRIVATE vulkan_schema executorch_core)
41+
42+
# Native build: link against wgpu-native
43+
set(WGPU_NATIVE_DIR
44+
"${CMAKE_CURRENT_SOURCE_DIR}/third-party/wgpu-native"
45+
CACHE PATH "Path to wgpu-native installation"
46+
)
47+
48+
if(NOT EXISTS "${WGPU_NATIVE_DIR}/lib/libwgpu_native.a")
49+
message(FATAL_ERROR "wgpu-native not found at ${WGPU_NATIVE_DIR}. "
50+
"Run: bash backends/webgpu/scripts/setup-wgpu-native.sh"
51+
)
52+
endif()
53+
54+
add_library(wgpu_native STATIC IMPORTED)
55+
set_target_properties(
56+
wgpu_native PROPERTIES IMPORTED_LOCATION
57+
"${WGPU_NATIVE_DIR}/lib/libwgpu_native.a"
58+
)
59+
60+
target_include_directories(
61+
webgpu_backend PUBLIC $<BUILD_INTERFACE:${WGPU_NATIVE_DIR}/include>
62+
)
63+
target_link_libraries(webgpu_backend PRIVATE wgpu_native)
64+
65+
if(APPLE)
66+
target_link_libraries(
67+
webgpu_backend PRIVATE "-framework Metal" "-framework QuartzCore"
68+
"-framework CoreGraphics" "-framework Foundation"
69+
)
70+
else()
71+
target_link_libraries(webgpu_backend PRIVATE dl m pthread)
72+
endif()
73+
74+
target_compile_options(webgpu_backend PRIVATE -fexceptions)
75+
76+
# Link with --whole-archive for static registration of backend + ops
77+
executorch_target_link_options_shared_lib(webgpu_backend)
78+
79+
set_property(TARGET webgpu_backend PROPERTY CXX_STANDARD 17)
80+
81+
install(
82+
TARGETS webgpu_backend
83+
EXPORT ExecuTorchTargets
84+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
85+
)
86+
87+
# Native test target
88+
if(EXECUTORCH_BUILD_WEBGPU_TEST)
89+
add_executable(webgpu_native_test test/test_webgpu_native.cpp)
90+
91+
target_include_directories(
92+
webgpu_native_test PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
93+
"${WGPU_NATIVE_DIR}/include"
94+
)
95+
96+
target_link_libraries(
97+
webgpu_native_test
98+
PRIVATE webgpu_backend
99+
wgpu_native
100+
executorch_core
101+
extension_module_static
102+
extension_data_loader
103+
extension_tensor
104+
portable_kernels
105+
portable_ops_lib
106+
)
107+
108+
if(APPLE)
109+
target_link_libraries(
110+
webgpu_native_test PRIVATE "-framework Metal" "-framework QuartzCore"
111+
"-framework CoreGraphics"
112+
)
113+
else()
114+
target_link_libraries(webgpu_native_test PRIVATE dl m pthread)
115+
endif()
116+
117+
target_compile_options(webgpu_native_test PRIVATE -fexceptions)
118+
set_property(TARGET webgpu_native_test PROPERTY CXX_STANDARD 17)
119+
endif()

backends/webgpu/README.md

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# WebGPU Backend
2+
3+
Run ExecuTorch models on the GPU via [WebGPU](https://www.w3.org/TR/webgpu/). The backend compiles delegated subgraphs into WGSL compute shaders executed natively through [wgpu-native](https://github.com/gfx-rs/wgpu-native) (Metal on macOS, Vulkan on Linux/Windows).
4+
5+
> **Status: Prototype.** The backend supports a single operator today and is under active development. See [TODO.md](TODO.md) for the roadmap.
6+
7+
## Architecture
8+
9+
```
10+
PyTorch model
11+
│ torch.export
12+
13+
Exported Program
14+
│ VulkanPartitioner (tags supported fp32 ops)
15+
16+
Edge Dialect IR
17+
│ VulkanBackend.preprocess (builds Vulkan FlatBuffer, buffer-only storage)
18+
19+
.pte file (with VH00/VK00 delegate blob)
20+
21+
22+
Native runtime (wgpu-native → Metal / Vulkan)
23+
│ WebGPUGraph::build → creates GPU buffers, pipelines, bind groups
24+
│ WebGPUGraph::execute → encodes + submits compute passes
25+
26+
GPU output (mapped back to CPU via wgpuDevicePoll)
27+
```
28+
29+
Key design choices:
30+
- **Reuses Vulkan serialization** — the delegate blob is a Vulkan FlatBuffer (`VK00`) with a `VH00` header. All tensor storage is forced to `BUFFER` (WebGPU has no 3D storage textures).
31+
- **Built-in WGSL shaders** — shader source is compiled as C++ string constants. Future work will embed fused shaders in the FlatBuffer for compile-time mega-kernel fusion.
32+
- **No Python AOT code** — directly consumes .pte files exported via `VulkanPartitioner`.
33+
34+
## Operator Support
35+
36+
| Operator | WGSL Shader | Notes |
37+
|---|---|---|
38+
| `aten.add.Tensor` | `binary_add.wgsl` | Element-wise with alpha: `out = in1 + alpha * in2` |
39+
40+
**Planned:** `sub`, `mul`, `relu`, `linear` (matmul), `softmax`, `layer_norm`
41+
42+
## Quick Start
43+
44+
### 1. Setup
45+
46+
```bash
47+
bash backends/webgpu/scripts/setup-wgpu-native.sh
48+
```
49+
50+
This downloads prebuilt wgpu-native binaries for your platform.
51+
52+
### 2. Export a model
53+
54+
```python
55+
import torch
56+
from executorch.backends.vulkan import VulkanPartitioner
57+
from executorch.exir import to_edge_transform_and_lower
58+
59+
class AddModule(torch.nn.Module):
60+
def forward(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
61+
return a + b
62+
63+
ep = torch.export.export(AddModule(), (torch.randn(4, 4), torch.randn(4, 4)))
64+
et_program = to_edge_transform_and_lower(
65+
ep, partitioner=[VulkanPartitioner()]
66+
).to_executorch()
67+
68+
with open("add.pte", "wb") as f:
69+
f.write(et_program.buffer)
70+
```
71+
72+
### 3. Build and run
73+
74+
```bash
75+
bash backends/webgpu/test/test_build_webgpu.sh
76+
```
77+
78+
This runs Python export tests, exports a .pte, builds the native runtime, and validates GPU output.
79+
80+
## Directory Structure
81+
82+
```
83+
backends/webgpu/
84+
├── CMakeLists.txt
85+
├── README.md
86+
├── TODO.md
87+
├── runtime/
88+
│ ├── WebGPUBackend.h/cpp # BackendInterface (init/execute)
89+
│ ├── WebGPUGraph.h/cpp # GPU graph: buffers, pipelines, dispatch
90+
│ ├── WebGPUDelegateHeader.h/cpp # VH00 header parser
91+
│ ├── WebGPUDevice.h/cpp # wgpu-native device abstraction
92+
│ └── ops/
93+
│ ├── OperatorRegistry.h/cpp # Op dispatch table
94+
│ └── add/
95+
│ ├── BinaryOp.cpp # aten.add.Tensor implementation
96+
│ ├── binary_add.wgsl # WGSL shader source
97+
│ └── binary_add_wgsl.h # Shader as C++ string constant
98+
├── scripts/
99+
│ └── setup-wgpu-native.sh # Download wgpu-native binaries
100+
└── test/
101+
├── conftest.py
102+
├── test_build_webgpu.sh # End-to-end build + test
103+
├── test_webgpu_native.cpp # C++ native test runner
104+
└── ops/
105+
└── add/
106+
└── test_add.py # Python export tests
107+
```
108+
109+
## Requirements
110+
111+
- **macOS**: Metal-capable GPU
112+
- **Linux**: Vulkan-capable GPU + drivers
113+
- **Build**: CMake 3.19+, conda environment with ExecuTorch installed

backends/webgpu/TODO.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# WebGPU Backend — TODO
2+
3+
## Current State (Prototype)
4+
- Single op: `aten.add.Tensor` (fp32, buffer storage)
5+
- No Python AOT code — directly consumes Vulkan delegate (.pte exported via VulkanPartitioner)
6+
- Reuses Vulkan FlatBuffer format (VH00 header + VK00 payload)
7+
- Registers as `"VulkanBackend"` at runtime — mutually exclusive with Vulkan backend at link time
8+
- Built-in WGSL shaders (not embedded in .pte)
9+
10+
## Architecture
11+
```
12+
VulkanPartitioner (Python) → VkGraphBuilder → VK00 FlatBuffer → .pte
13+
→ WebGPU Runtime: registers as "VulkanBackend", parses VH00/VK00
14+
→ WebGPUGraph::build → GPU buffers/pipelines/bind groups
15+
→ WebGPUGraph::execute → encode + submit compute passes
16+
```
17+
18+
Adding a new op requires only C++ runtime work:
19+
1. WGSL shader + header
20+
2. C++ op implementation (read args from VkGraph, create pipeline, record dispatch)
21+
3. Register in CMakeLists.txt
22+
4. Test with VulkanPartitioner export
23+
24+
## Performance: Command Encoding Overhead
25+
WebGPU `GPUCommandBuffer` is single-use (no equivalent to Vulkan's cached command lists).
26+
Per-dispatch API call cost adds up for large graphs.
27+
28+
**Primary mitigation: mega-kernel fusion.** Generate fused WGSL shaders for chains of
29+
element-wise ops (add→relu→mul→clamp) at compile time. Embed via the existing
30+
`shaders: [VkBytes]` field in schema.fbs.
31+
32+
## Next Steps
33+
1. **More ops**: sub, mul, relu, linear (matmul), softmax, layer_norm
34+
2. **fp16 support**: Feature-detect `shader-f16`, fallback to fp32
35+
3. **Buffer pooling**: Reuse GPU buffers to avoid OOM at scale
36+
4. **Pipeline caching**: Cache compiled pipelines across runs
37+
5. **Profiling**: Wire WebGPU timestamp queries into ETDump/EventTracer
38+
6. **LLM support**: KV cache management, Flash Attention in WGSL, quantized ops (int4/int8)
39+
7. **Browser/JS runtime**: Emscripten build, JS harness, browser test page

0 commit comments

Comments
 (0)