-
Notifications
You must be signed in to change notification settings - Fork 968
[ETVK] WebGPU runtime #18808
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ETVK] WebGPU runtime #18808
Changes from all commits
b5e3ef9
ae3334a
35cc747
3787f17
048f8ee
9894298
b5dbe09
2f90d51
a1fab3d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,119 @@ | ||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| # All rights reserved. | ||
| # | ||
| # This source code is licensed under the BSD-style license found in the | ||
| # LICENSE file in the root directory of this source tree. | ||
|
|
||
| cmake_minimum_required(VERSION 3.19) | ||
|
|
||
| if(NOT EXECUTORCH_ROOT) | ||
| set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) | ||
| endif() | ||
|
|
||
| include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) | ||
|
|
||
| # Ensure vulkan_schema is available even when EXECUTORCH_BUILD_VULKAN is OFF. | ||
| # The WebGPU backend reuses the Vulkan FlatBuffer serialization format. | ||
| if(NOT TARGET vulkan_schema) | ||
| # We need the schema generation from the Vulkan backend. Build only the schema | ||
| # target by including the Vulkan CMakeLists.txt. The full Vulkan backend will | ||
| # only build if EXECUTORCH_BUILD_VULKAN is ON (which gates the vulkan_backend | ||
| # target), but vulkan_schema is unconditionally defined. | ||
| add_subdirectory( | ||
| ${CMAKE_CURRENT_SOURCE_DIR}/../vulkan | ||
| ${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema | ||
| ) | ||
| endif() | ||
|
|
||
| set(WEBGPU_SRCS | ||
| runtime/WebGPUBackend.cpp runtime/WebGPUGraph.cpp | ||
| runtime/WebGPUDelegateHeader.cpp runtime/WebGPUDevice.cpp | ||
| runtime/ops/OperatorRegistry.cpp runtime/ops/add/BinaryOp.cpp | ||
| ) | ||
|
|
||
| add_library(webgpu_backend ${WEBGPU_SRCS}) | ||
|
|
||
| target_include_directories( | ||
| webgpu_backend PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..> | ||
| ) | ||
|
|
||
| target_link_libraries(webgpu_backend PRIVATE vulkan_schema executorch_core) | ||
|
|
||
| # Native build: link against wgpu-native | ||
| set(WGPU_NATIVE_DIR | ||
| "${CMAKE_CURRENT_SOURCE_DIR}/third-party/wgpu-native" | ||
| CACHE PATH "Path to wgpu-native installation" | ||
| ) | ||
|
|
||
| if(NOT EXISTS "${WGPU_NATIVE_DIR}/lib/libwgpu_native.a") | ||
| message(FATAL_ERROR "wgpu-native not found at ${WGPU_NATIVE_DIR}. " | ||
| "Run: bash backends/webgpu/scripts/setup-wgpu-native.sh" | ||
| ) | ||
| endif() | ||
|
|
||
| add_library(wgpu_native STATIC IMPORTED) | ||
| set_target_properties( | ||
| wgpu_native PROPERTIES IMPORTED_LOCATION | ||
| "${WGPU_NATIVE_DIR}/lib/libwgpu_native.a" | ||
| ) | ||
|
|
||
| target_include_directories( | ||
| webgpu_backend PUBLIC $<BUILD_INTERFACE:${WGPU_NATIVE_DIR}/include> | ||
| ) | ||
| target_link_libraries(webgpu_backend PRIVATE wgpu_native) | ||
|
|
||
| if(APPLE) | ||
| target_link_libraries( | ||
| webgpu_backend PRIVATE "-framework Metal" "-framework QuartzCore" | ||
| "-framework CoreGraphics" "-framework Foundation" | ||
| ) | ||
| else() | ||
| target_link_libraries(webgpu_backend PRIVATE dl m pthread) | ||
| endif() | ||
|
|
||
| target_compile_options(webgpu_backend PRIVATE -fexceptions) | ||
|
|
||
|
Comment on lines
+65
to
+75
|
||
| # Link with --whole-archive for static registration of backend + ops | ||
| executorch_target_link_options_shared_lib(webgpu_backend) | ||
|
|
||
| set_property(TARGET webgpu_backend PROPERTY CXX_STANDARD 17) | ||
|
|
||
| install( | ||
| TARGETS webgpu_backend | ||
| EXPORT ExecuTorchTargets | ||
| DESTINATION ${CMAKE_INSTALL_LIBDIR} | ||
| ) | ||
|
|
||
| # Native test target | ||
| if(EXECUTORCH_BUILD_WEBGPU_TEST) | ||
| add_executable(webgpu_native_test test/test_webgpu_native.cpp) | ||
|
|
||
| target_include_directories( | ||
| webgpu_native_test PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..> | ||
| "${WGPU_NATIVE_DIR}/include" | ||
| ) | ||
|
|
||
| target_link_libraries( | ||
| webgpu_native_test | ||
| PRIVATE webgpu_backend | ||
| wgpu_native | ||
| executorch_core | ||
| extension_module_static | ||
| extension_data_loader | ||
| extension_tensor | ||
| portable_kernels | ||
| portable_ops_lib | ||
| ) | ||
|
|
||
| if(APPLE) | ||
| target_link_libraries( | ||
| webgpu_native_test PRIVATE "-framework Metal" "-framework QuartzCore" | ||
| "-framework CoreGraphics" | ||
| ) | ||
| else() | ||
| target_link_libraries(webgpu_native_test PRIVATE dl m pthread) | ||
| endif() | ||
|
|
||
| target_compile_options(webgpu_native_test PRIVATE -fexceptions) | ||
| set_property(TARGET webgpu_native_test PROPERTY CXX_STANDARD 17) | ||
| endif() | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,113 @@ | ||||||
| # WebGPU Backend | ||||||
|
|
||||||
| Run ExecuTorch models on the GPU via [WebGPU](https://www.w3.org/TR/webgpu/). The backend compiles delegated subgraphs into WGSL compute shaders executed natively through [wgpu-native](https://github.com/gfx-rs/wgpu-native) (Metal on macOS, Vulkan on Linux/Windows). | ||||||
|
||||||
| Run ExecuTorch models on the GPU via [WebGPU](https://www.w3.org/TR/webgpu/). The backend compiles delegated subgraphs into WGSL compute shaders executed natively through [wgpu-native](https://github.com/gfx-rs/wgpu-native) (Metal on macOS, Vulkan on Linux/Windows). | |
| Run ExecuTorch models on the GPU via [WebGPU](https://www.w3.org/TR/webgpu/). The backend compiles delegated subgraphs into WGSL compute shaders executed natively through [wgpu-native](https://github.com/gfx-rs/wgpu-native) (Metal on macOS, Vulkan on Linux). Windows is not supported yet in this prototype. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| # WebGPU Backend — TODO | ||
|
|
||
| ## Current State (Prototype) | ||
| - Single op: `aten.add.Tensor` (fp32, buffer storage) | ||
| - No Python AOT code — directly consumes Vulkan delegate (.pte exported via VulkanPartitioner) | ||
| - Reuses Vulkan FlatBuffer format (VH00 header + VK00 payload) | ||
| - Registers as `"VulkanBackend"` at runtime — mutually exclusive with Vulkan backend at link time | ||
| - Built-in WGSL shaders (not embedded in .pte) | ||
|
|
||
| ## Architecture | ||
| ``` | ||
| VulkanPartitioner (Python) → VkGraphBuilder → VK00 FlatBuffer → .pte | ||
| → WebGPU Runtime: registers as "VulkanBackend", parses VH00/VK00 | ||
| → WebGPUGraph::build → GPU buffers/pipelines/bind groups | ||
| → WebGPUGraph::execute → encode + submit compute passes | ||
| ``` | ||
|
|
||
| Adding a new op requires only C++ runtime work: | ||
| 1. WGSL shader + header | ||
| 2. C++ op implementation (read args from VkGraph, create pipeline, record dispatch) | ||
| 3. Register in CMakeLists.txt | ||
| 4. Test with VulkanPartitioner export | ||
|
|
||
| ## Performance: Command Encoding Overhead | ||
| WebGPU `GPUCommandBuffer` is single-use (no equivalent to Vulkan's cached command lists). | ||
| Per-dispatch API call cost adds up for large graphs. | ||
|
|
||
| **Primary mitigation: mega-kernel fusion.** Generate fused WGSL shaders for chains of | ||
| element-wise ops (add→relu→mul→clamp) at compile time. Embed via the existing | ||
| `shaders: [VkBytes]` field in schema.fbs. | ||
|
|
||
| ## Next Steps | ||
| 1. **More ops**: sub, mul, relu, linear (matmul), softmax, layer_norm | ||
| 2. **fp16 support**: Feature-detect `shader-f16`, fallback to fp32 | ||
| 3. **Buffer pooling**: Reuse GPU buffers to avoid OOM at scale | ||
| 4. **Pipeline caching**: Cache compiled pipelines across runs | ||
| 5. **Profiling**: Wire WebGPU timestamp queries into ETDump/EventTracer | ||
| 6. **LLM support**: KV cache management, Flash Attention in WGSL, quantized ops (int4/int8) | ||
| 7. **Browser/JS runtime**: Emscripten build, JS harness, browser test page |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add_subdirectory(../vulkan ...)pulls in the full Vulkan CMakeLists.txt, which currently unconditionally builds shader libraries and thevulkan_backendtarget (not justvulkan_schema). This makesEXECUTORCH_BUILD_WEBGPU=ONeffectively require the full Vulkan toolchain (e.g., glslc) and can also introduce duplicate backend registration. Consider factoringvulkan_schemainto a standalone CMake include, or adding a schema-only mode/guards inbackends/vulkan/CMakeLists.txtso including it here does not build the full Vulkan backend.