[ExecuTorch][WebGPU] Upload named-data constants in WebGPUGraph

JulianCloudNTH · JulianCloudNTH · commit 5dd66add3313 · 2026-06-03T18:37:55.000-07:00
The Vulkan serializer that the WebGPU backend reuses stores every non-empty constant in the PTE's named-data map with `offset == UINT64_MAX` and a `named_key`, rather than inline in the VK00 blob. `WebGPUGraph::build` previously handled only inline constants, so a delegated op's constant weights were never uploaded and the op produced all zeros. `build` now also fetches named-data constants via `NamedDataMap::get_data`, mirroring the path `VulkanBackend` already uses. `aten.add` was unaffected since it has no constant tensors; the first consumer is the `rms_norm` op in the child diff. Differential Revision: [D107288998](https://our.internmc.facebook.com/intern/diff/D107288998/) ghstack-source-id: 389182397 Pull-Request: pytorch#19962
diff --git a/backends/webgpu/runtime/WebGPUBackend.cpp b/backends/webgpu/runtime/WebGPUBackend.cpp
@@ -76,7 +76,7 @@ Result<DelegateHandle*> WebGPUBackend::init(
   }
 
   try {
-    graph->build(flatbuffer_data, constant_data);
+    graph->build(flatbuffer_data, constant_data, context.get_named_data_map());
   } catch (const std::exception& e) {
     ET_LOG(Error, "WebGPU graph build failed: %s", e.what());
     graph->~WebGPUGraph();
diff --git a/backends/webgpu/runtime/WebGPUGraph.cpp b/backends/webgpu/runtime/WebGPUGraph.cpp
@@ -10,6 +10,7 @@
 #include <executorch/backends/webgpu/runtime/ops/OperatorRegistry.h>
 
 #include <executorch/backends/vulkan/serialization/schema_generated.h>
+#include <executorch/runtime/core/named_data_map.h>
 
 #include <executorch/backends/webgpu/runtime/WebGPUDevice.h>
 #include <webgpu/wgpu.h>
@@ -93,7 +94,8 @@ WebGPUGraph::~WebGPUGraph() {
 
 void WebGPUGraph::build(
     const void* flatbuffer_data,
-    const uint8_t* constant_data) {
+    const uint8_t* constant_data,
+    const executorch::runtime::NamedDataMap* named_data_map) {
   if (!device_) {
     auto* ctx = get_default_webgpu_context();
     if (ctx) {
@@ -165,6 +167,31 @@ void WebGPUGraph::build(
                 const uint8_t* src = constant_data + vk_bytes->offset();
                 wgpuQueueWriteBuffer(
                     queue_, tensor.buffer, 0, src, tensor.nbytes);
+              } else if (
+                  vk_bytes->named_key() != nullptr &&
+                  named_data_map != nullptr) {
+                // Constant stored in the PTE named-data map.
+                auto buf =
+                    named_data_map->get_data(vk_bytes->named_key()->c_str());
+                if (!buf.ok()) {
+                  throw std::runtime_error(
+                      std::string("WebGPU: named constant '") +
+                      vk_bytes->named_key()->c_str() +
+                      "' not found in NamedDataMap");
+                }
+                if (buf->size() < tensor.nbytes) {
+                  throw std::runtime_error(
+                      std::string("WebGPU: named constant '") +
+                      vk_bytes->named_key()->c_str() + "' undersized: have " +
+                      std::to_string(buf->size()) + " bytes, need " +
+                      std::to_string(tensor.nbytes));
+                }
+                wgpuQueueWriteBuffer(
+                    queue_, tensor.buffer, 0, buf->data(), tensor.nbytes);
+                buf->Free();
+              } else {
+                throw std::runtime_error(
+                    "WebGPU: constant has no inline offset and no named-data key");
               }
             }
           }
diff --git a/backends/webgpu/runtime/WebGPUGraph.h b/backends/webgpu/runtime/WebGPUGraph.h
@@ -15,6 +15,8 @@
 #include <unordered_map>
 #include <vector>
 
+#include <executorch/runtime/core/named_data_map.h>
+
 namespace executorch {
 namespace backends {
 namespace webgpu {
@@ -66,7 +68,10 @@ class WebGPUGraph {
 
   // Build the graph from a deserialized VkGraph flatbuffer and constant data.
   // The flatbuffer_data pointer must remain valid during build().
-  void build(const void* flatbuffer_data, const uint8_t* constant_data);
+  void build(
+      const void* flatbuffer_data,
+      const uint8_t* constant_data,
+      const executorch::runtime::NamedDataMap* named_data_map = nullptr);
 
   // Copy input tensor data from host pointers into GPU buffers.
   void copy_inputs(const std::vector<std::pair<const void*, size_t>>& inputs);

Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ Result<DelegateHandle*> WebGPUBackend::init(`
`76`	`76`	`}`
`77`	`77`
`78`	`78`	`try {`
`79`		`- graph->build(flatbuffer_data, constant_data);`
	`79`	`+ graph->build(flatbuffer_data, constant_data, context.get_named_data_map());`
`80`	`80`	`} catch (const std::exception& e) {`
`81`	`81`	`ET_LOG(Error, "WebGPU graph build failed: %s", e.what());`
`82`	`82`	`graph->~WebGPUGraph();`