diff --git a/packages/skia/android/CMakeLists.txt b/packages/skia/android/CMakeLists.txt
index 0f4f24235c..61b71abadc 100644
--- a/packages/skia/android/CMakeLists.txt
+++ b/packages/skia/android/CMakeLists.txt
@@ -120,9 +120,8 @@ if(SK_GRAPHITE)
         "${PROJECT_SOURCE_DIR}/../cpp/jsi2/Promise.cpp"
 
         # WebGPU async system
-        "${PROJECT_SOURCE_DIR}/../cpp/rnwgpu/async/AsyncRunner.cpp"
         "${PROJECT_SOURCE_DIR}/../cpp/rnwgpu/async/AsyncTaskHandle.cpp"
-        "${PROJECT_SOURCE_DIR}/../cpp/rnwgpu/async/JSIMicrotaskDispatcher.cpp"
+        "${PROJECT_SOURCE_DIR}/../cpp/rnwgpu/async/RuntimeContext.cpp"
 
         # WebGPU API
         "${PROJECT_SOURCE_DIR}/../cpp/rnwgpu/api/GPU.cpp"
diff --git a/packages/skia/cpp/api/JsiSkApi.h b/packages/skia/cpp/api/JsiSkApi.h
index e9d55c6e43..2962b61b2a 100644
--- a/packages/skia/cpp/api/JsiSkApi.h
+++ b/packages/skia/cpp/api/JsiSkApi.h
@@ -9,7 +9,7 @@
 #ifdef SK_GRAPHITE
 #include "rnskia/RNDawnContext.h"
 #include "rnwgpu/api/GPUDevice.h"
-#include "rnwgpu/async/AsyncRunner.h"
+#include "rnwgpu/async/RuntimeContext.h"
 #endif
 
 #include "JsiNativeBuffer.h"
@@ -164,12 +164,12 @@ class JsiSkApi : public JsiSkHostObject {
         "getDevice", JSI_HOST_FUNCTION_LAMBDA {
 #ifdef SK_GRAPHITE
           auto &dawnContext = DawnContext::getInstance();
-          auto asyncRunner = rnwgpu::async::AsyncRunner::get(runtime);
-          if (!asyncRunner) {
-            throw jsi::JSError(runtime, "AsyncRunner not initialized");
-          }
+          // Per-runtime context: async ops on this device resolve on the calling
+          // runtime's own thread (via its ProcessEvents pump).
+          auto context = rnwgpu::async::RuntimeContext::getOrCreate(
+              runtime, dawnContext.getWGPUInstance());
           auto device = std::make_shared<rnwgpu::GPUDevice>(
-              dawnContext.getWGPUDevice(), asyncRunner, "Skia Device");
+              dawnContext.getWGPUDevice(), context, "Skia Device");
           return rnwgpu::GPUDevice::create(runtime, device);
 #else
       throw jsi::JSError(runtime,
diff --git a/packages/skia/cpp/jsi2/NativeObject.h b/packages/skia/cpp/jsi2/NativeObject.h
index bf5651c0ce..e41ab7d3e4 100644
--- a/packages/skia/cpp/jsi2/NativeObject.h
+++ b/packages/skia/cpp/jsi2/NativeObject.h
@@ -432,6 +432,29 @@ class NativeObject : public jsi::NativeState,
     prototype.setProperty(runtime, name, func);
   }
 
+  /**
+   * Install a method whose native implementation needs the calling jsi::Runtime
+   * as its first parameter. Used by entry points that must act per-runtime
+   * (e.g. GPU::requestAdapter, which creates a per-runtime RuntimeContext).
+   */
+  template <typename ReturnType, typename... Args>
+  static void
+  installMethodWithRuntime(jsi::Runtime &runtime, jsi::Object &prototype,
+                           const char *name,
+                           ReturnType (Derived::*method)(jsi::Runtime &,
+                                                         Args...)) {
+    auto func = jsi::Function::createFromHostFunction(
+        runtime, jsi::PropNameID::forUtf8(runtime, name), sizeof...(Args),
+        [method](jsi::Runtime &rt, const jsi::Value &thisVal,
+                 const jsi::Value *args, size_t count) -> jsi::Value {
+          auto native = Derived::fromValue(rt, thisVal);
+          return callMethodWithRuntime(native.get(), method, rt, args,
+                                       std::index_sequence_for<Args...>{},
+                                       count);
+        });
+    prototype.setProperty(runtime, name, func);
+  }
+
   /**
    * Install a getter on the prototype.
    */
@@ -567,6 +590,22 @@ class NativeObject : public jsi::NativeState,
   }
 
 private:
+  // Helper to call a method that takes the calling jsi::Runtime as its first
+  // parameter, with JSI argument conversion for the rest and JSI conversion of
+  // the result.
+  template <typename ReturnType, typename... Args, size_t... Is>
+  static jsi::Value
+  callMethodWithRuntime(Derived *obj,
+                        ReturnType (Derived::*method)(jsi::Runtime &, Args...),
+                        jsi::Runtime &runtime, const jsi::Value *args,
+                        std::index_sequence<Is...>, size_t count) {
+    ReturnType result = (obj->*method)(
+        runtime, rnwgpu::JSIConverter<std::decay_t<Args>>::fromJSI(
+                     runtime, args[Is], Is >= count)...);
+    return rnwgpu::JSIConverter<std::decay_t<ReturnType>>::toJSI(
+        runtime, std::move(result));
+  }
+
   // Helper to call a method with JSI argument conversion
   template <typename ReturnType, typename... Args, size_t... Is>
   static jsi::Value callMethod(Derived *obj,
diff --git a/packages/skia/cpp/rnskia/RNSkManager.cpp b/packages/skia/cpp/rnskia/RNSkManager.cpp
index 8eb939b1e6..03b51a0246 100644
--- a/packages/skia/cpp/rnskia/RNSkManager.cpp
+++ b/packages/skia/cpp/rnskia/RNSkManager.cpp
@@ -23,6 +23,8 @@
 #include "rnwgpu/api/descriptors/GPUMapMode.h"
 #include "rnwgpu/api/descriptors/GPUShaderStage.h"
 #include "rnwgpu/api/descriptors/GPUTextureUsage.h"
+#include "rnwgpu/api/WebGPUConstants.h"
+#include "rnwgpu/async/RuntimeContext.h"
 #include "jsi2/Promise.h"
 
 #include "include/core/SkData.h"
@@ -82,6 +84,12 @@ void RNSkManager::installBindings() {
       jsi::Object::createFromHostObject(*_jsRuntime, _viewApi));
 
 #ifdef SK_GRAPHITE
+  // Register the main runtime + its CallInvoker so spontaneous events
+  // (device.lost / uncapturederror) on main-runtime devices can be delivered to
+  // the JS thread without the ProcessEvents pump. Worklet-runtime devices have
+  // no invoker (best-effort; see the RuntimeContext "Threading model" doc).
+  rnwgpu::async::RuntimeContext::registerMainRuntime(_jsRuntime, _jsCallInvoker);
+
   // Install WebGPU constructors
   rnwgpu::GPU::installConstructor(*_jsRuntime);
   rnwgpu::GPUUncapturedErrorEvent::installConstructor(*_jsRuntime);
@@ -104,18 +112,26 @@ void RNSkManager::installBindings() {
                                      std::move(navigator));
   }
 
-  // Install WebGPU constant objects as plain JS objects
-  _jsRuntime->global().setProperty(*_jsRuntime, "GPUBufferUsage",
-                                   rnwgpu::GPUBufferUsage::create(*_jsRuntime));
-  _jsRuntime->global().setProperty(*_jsRuntime, "GPUColorWrite",
-                                   rnwgpu::GPUColorWrite::create(*_jsRuntime));
-  _jsRuntime->global().setProperty(*_jsRuntime, "GPUMapMode",
-                                   rnwgpu::GPUMapMode::create(*_jsRuntime));
-  _jsRuntime->global().setProperty(*_jsRuntime, "GPUShaderStage",
-                                   rnwgpu::GPUShaderStage::create(*_jsRuntime));
+  // Install WebGPU constant objects as plain JS objects on the main runtime.
+  rnwgpu::installWebGPUConstants(*_jsRuntime);
+
+  // Install a global `installWebGPU()` host function so worklet runtimes can get
+  // the same constants. A host function captured into a worklet is serialized as
+  // a SerializableHostFunction and re-created on the worklet runtime, so the body
+  // runs there (its `rt` is the worklet runtime) and installs the constants on
+  // that runtime. The constants come from the native wgpu::*Usage enums, so the
+  // values stay a single source of truth across every runtime. Calling it on a
+  // runtime that already has the globals is a safe, idempotent no-op.
   _jsRuntime->global().setProperty(
-      *_jsRuntime, "GPUTextureUsage",
-      rnwgpu::GPUTextureUsage::create(*_jsRuntime));
+      *_jsRuntime, "installWebGPU",
+      jsi::Function::createFromHostFunction(
+          *_jsRuntime, jsi::PropNameID::forAscii(*_jsRuntime, "installWebGPU"),
+          0,
+          [](jsi::Runtime &rt, const jsi::Value & /*thisVal*/,
+             const jsi::Value * /*args*/, size_t /*count*/) -> jsi::Value {
+            rnwgpu::installWebGPUConstants(rt);
+            return jsi::Value::undefined();
+          }));
 
   // Install RNWebGPU global object for WebGPU Canvas support
   auto rnWebGPU = std::make_shared<rnwgpu::RNWebGPU>(gpu, nullptr);
diff --git a/packages/skia/cpp/rnwgpu/SurfaceRegistry.h b/packages/skia/cpp/rnwgpu/SurfaceRegistry.h
index e41de864a6..279dbb1a54 100644
--- a/packages/skia/cpp/rnwgpu/SurfaceRegistry.h
+++ b/packages/skia/cpp/rnwgpu/SurfaceRegistry.h
@@ -7,6 +7,12 @@
 
 #include "webgpu/webgpu_cpp.h"
 
+#ifdef __APPLE__
+namespace dawn::native::metal {
+void WaitForCommandsToBeScheduled(WGPUDevice device);
+} // namespace dawn::native::metal
+#endif
+
 namespace rnwgpu {
 
 struct NativeInfo {
@@ -112,13 +118,39 @@ class SurfaceInfo {
     height = newHeight;
   }
 
-  void present() {
+  // Present the current surface texture. Called synchronously from the thread
+  // that did getCurrentTexture / submit (via GPUCanvasContext::present), so it
+  // preserves Dawn surface thread-affinity. No-op when offscreen / unconfigured
+  // (no surface).
+  void presentFrame() {
+#ifdef __APPLE__
+    // Ensure command buffers are scheduled before presenting. Read the device
+    // under a shared lock, then wait without holding it (the wait can block).
+    // The device may be reconfigured between the two locks; that is safe because
+    // present() is called on the rendering thread right after submit(), the wait
+    // just flushes that thread's already-submitted work, and the Present() below
+    // re-checks `surface` under the unique lock before touching it.
+    wgpu::Device device;
+    {
+      std::shared_lock<std::shared_mutex> lock(_mutex);
+      device = config.device;
+    }
+    if (device) {
+      dawn::native::metal::WaitForCommandsToBeScheduled(device.Get());
+    }
+#endif
     std::unique_lock<std::shared_mutex> lock(_mutex);
     if (surface) {
       surface.Present();
     }
   }
 
+  // True when an on-screen wgpu::Surface is attached (vs offscreen texture).
+  bool hasSurface() {
+    std::shared_lock<std::shared_mutex> lock(_mutex);
+    return surface != nullptr;
+  }
+
   wgpu::Texture getCurrentTexture() {
     std::shared_lock<std::shared_mutex> lock(_mutex);
     if (surface) {
diff --git a/packages/skia/cpp/rnwgpu/api/GPU.cpp b/packages/skia/cpp/rnwgpu/api/GPU.cpp
index 258389ad98..bddc26cd0b 100644
--- a/packages/skia/cpp/rnwgpu/api/GPU.cpp
+++ b/packages/skia/cpp/rnwgpu/api/GPU.cpp
@@ -9,17 +9,15 @@
 
 #include "Convertors.h"
 #include "jsi2/JSIConverter.h"
-#include "rnwgpu/async/JSIMicrotaskDispatcher.h"
+#include "rnwgpu/async/RuntimeContext.h"
 
 namespace rnwgpu {
 
-GPU::GPU(jsi::Runtime &runtime, wgpu::Instance instance)
-    : NativeObject(CLASS_NAME), _instance(instance) {
-  auto dispatcher = std::make_shared<async::JSIMicrotaskDispatcher>(runtime);
-  _async = async::AsyncRunner::getOrCreate(runtime, _instance, dispatcher);
-}
+GPU::GPU(jsi::Runtime & /*runtime*/, wgpu::Instance instance)
+    : NativeObject(CLASS_NAME), _instance(instance) {}
 
 async::AsyncTaskHandle GPU::requestAdapter(
+    jsi::Runtime &runtime,
     std::optional<std::shared_ptr<GPURequestAdapterOptions>> options) {
   wgpu::RequestAdapterOptions aOptions;
   Convertor conv;
@@ -32,12 +30,17 @@ async::AsyncTaskHandle GPU::requestAdapter(
   constexpr auto kDefaultBackendType = wgpu::BackendType::Vulkan;
 #endif
   aOptions.backendType = kDefaultBackendType;
-  return _async->postTask(
-      [this, aOptions](const async::AsyncTaskHandle::ResolveFunction &resolve,
-                       const async::AsyncTaskHandle::RejectFunction &reject) {
+
+  // Per-runtime context: async ops requested on this runtime resolve on this
+  // runtime's own thread (via its ProcessEvents pump).
+  auto context = async::RuntimeContext::getOrCreate(runtime, _instance);
+  return context->postTask(
+      [this, aOptions,
+       context](const async::AsyncTaskHandle::ResolveFunction &resolve,
+                const async::AsyncTaskHandle::RejectFunction &reject) {
         _instance.RequestAdapter(
             &aOptions, wgpu::CallbackMode::AllowProcessEvents,
-            [asyncRunner = _async, resolve,
+            [context, resolve,
              reject](wgpu::RequestAdapterStatus status, wgpu::Adapter adapter,
                      wgpu::StringView message) {
               if (message.length) {
@@ -45,8 +48,8 @@ async::AsyncTaskHandle GPU::requestAdapter(
               }
 
               if (status == wgpu::RequestAdapterStatus::Success && adapter) {
-                auto adapterHost = std::make_shared<GPUAdapter>(
-                    std::move(adapter), asyncRunner);
+                auto adapterHost =
+                    std::make_shared<GPUAdapter>(std::move(adapter), context);
                 auto result =
                     std::variant<std::nullptr_t, std::shared_ptr<GPUAdapter>>(
                         adapterHost);
diff --git a/packages/skia/cpp/rnwgpu/api/GPU.h b/packages/skia/cpp/rnwgpu/api/GPU.h
index 93de777f70..d4ec87525f 100644
--- a/packages/skia/cpp/rnwgpu/api/GPU.h
+++ b/packages/skia/cpp/rnwgpu/api/GPU.h
@@ -9,8 +9,8 @@
 
 #include "jsi2/NativeObject.h"
 
-#include "rnwgpu/async/AsyncRunner.h"
 #include "rnwgpu/async/AsyncTaskHandle.h"
+#include "rnwgpu/async/RuntimeContext.h"
 
 #include "webgpu/webgpu_cpp.h"
 
@@ -32,7 +32,10 @@ class GPU : public NativeObject<GPU> {
 public:
   std::string getBrand() { return CLASS_NAME; }
 
+  // requestAdapter needs the calling runtime so each runtime gets its own
+  // RuntimeContext (and ProcessEvents pump on its own thread).
   async::AsyncTaskHandle requestAdapter(
+      jsi::Runtime &runtime,
       std::optional<std::shared_ptr<GPURequestAdapterOptions>> options);
   wgpu::TextureFormat getPreferredCanvasFormat();
 
@@ -40,7 +43,8 @@ class GPU : public NativeObject<GPU> {
 
   static void definePrototype(jsi::Runtime &runtime, jsi::Object &prototype) {
     installGetter(runtime, prototype, "__brand", &GPU::getBrand);
-    installMethod(runtime, prototype, "requestAdapter", &GPU::requestAdapter);
+    installMethodWithRuntime(runtime, prototype, "requestAdapter",
+                             &GPU::requestAdapter);
     installMethod(runtime, prototype, "getPreferredCanvasFormat",
                   &GPU::getPreferredCanvasFormat);
     installGetter(runtime, prototype, "wgslLanguageFeatures",
@@ -51,7 +55,6 @@ class GPU : public NativeObject<GPU> {
 
 private:
   wgpu::Instance _instance;
-  std::shared_ptr<async::AsyncRunner> _async;
 };
 
 } // namespace rnwgpu
diff --git a/packages/skia/cpp/rnwgpu/api/GPUAdapter.cpp b/packages/skia/cpp/rnwgpu/api/GPUAdapter.cpp
index 8d3610f9fa..1e6c5645c3 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUAdapter.cpp
+++ b/packages/skia/cpp/rnwgpu/api/GPUAdapter.cpp
@@ -138,7 +138,7 @@ async::AsyncTaskHandle GPUAdapter::requestDevice(
         }
         _instance.RequestDevice(
             &deviceDesc, wgpu::CallbackMode::AllowProcessEvents,
-            [asyncRunner = _async, resolve, reject, label, creationRuntime,
+            [context = _async, resolve, reject, label, creationRuntime,
              deviceLostBinding](wgpu::RequestDeviceStatus status,
                                 wgpu::Device device,
                                 wgpu::StringView message) {
@@ -190,7 +190,7 @@ async::AsyncTaskHandle GPUAdapter::requestDevice(
                   creationRuntime);
 
               auto deviceHost = std::make_shared<GPUDevice>(std::move(device),
-                                                            asyncRunner, label);
+                                                            context, label);
               *deviceLostBinding = deviceHost;
               resolve([deviceHost = std::move(deviceHost)](
                           jsi::Runtime &runtime) mutable {
diff --git a/packages/skia/cpp/rnwgpu/api/GPUAdapter.h b/packages/skia/cpp/rnwgpu/api/GPUAdapter.h
index 593c2b7164..327c76419a 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUAdapter.h
+++ b/packages/skia/cpp/rnwgpu/api/GPUAdapter.h
@@ -8,8 +8,8 @@
 
 #include "jsi2/NativeObject.h"
 
-#include "rnwgpu/async/AsyncRunner.h"
 #include "rnwgpu/async/AsyncTaskHandle.h"
+#include "rnwgpu/async/RuntimeContext.h"
 
 #include "webgpu/webgpu_cpp.h"
 
@@ -27,7 +27,7 @@ class GPUAdapter : public NativeObject<GPUAdapter> {
   static constexpr const char *CLASS_NAME = "GPUAdapter";
 
   explicit GPUAdapter(wgpu::Adapter instance,
-                      std::shared_ptr<async::AsyncRunner> async)
+                      std::shared_ptr<async::RuntimeContext> async)
       : NativeObject(CLASS_NAME), _instance(instance), _async(async) {}
 
 public:
@@ -53,7 +53,7 @@ class GPUAdapter : public NativeObject<GPUAdapter> {
 
 private:
   wgpu::Adapter _instance;
-  std::shared_ptr<async::AsyncRunner> _async;
+  std::shared_ptr<async::RuntimeContext> _async;
 };
 
 } // namespace rnwgpu
diff --git a/packages/skia/cpp/rnwgpu/api/GPUBuffer.h b/packages/skia/cpp/rnwgpu/api/GPUBuffer.h
index 2706a50490..c07504bcf5 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUBuffer.h
+++ b/packages/skia/cpp/rnwgpu/api/GPUBuffer.h
@@ -9,8 +9,8 @@
 
 #include "jsi2/NativeObject.h"
 
-#include "rnwgpu/async/AsyncRunner.h"
 #include "rnwgpu/async/AsyncTaskHandle.h"
+#include "rnwgpu/async/RuntimeContext.h"
 
 #include "webgpu/webgpu_cpp.h"
 
@@ -25,7 +25,7 @@ class GPUBuffer : public NativeObject<GPUBuffer> {
   static constexpr const char *CLASS_NAME = "GPUBuffer";
 
   explicit GPUBuffer(wgpu::Buffer instance,
-                     std::shared_ptr<async::AsyncRunner> async,
+                     std::shared_ptr<async::RuntimeContext> async,
                      std::string label)
       : NativeObject(CLASS_NAME), _instance(instance), _async(async),
         _label(label) {}
@@ -71,7 +71,7 @@ class GPUBuffer : public NativeObject<GPUBuffer> {
 
 private:
   wgpu::Buffer _instance;
-  std::shared_ptr<async::AsyncRunner> _async;
+  std::shared_ptr<async::RuntimeContext> _async;
   std::string _label;
   struct Mapping {
     uint64_t start;
diff --git a/packages/skia/cpp/rnwgpu/api/GPUCanvasContext.cpp b/packages/skia/cpp/rnwgpu/api/GPUCanvasContext.cpp
index 0fb682531e..c47521d2d0 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUCanvasContext.cpp
+++ b/packages/skia/cpp/rnwgpu/api/GPUCanvasContext.cpp
@@ -2,14 +2,6 @@
 #include "Convertors.h"
 #include <memory>
 
-#ifdef __APPLE__
-namespace dawn::native::metal {
-
-void WaitForCommandsToBeScheduled(WGPUDevice device);
-
-}
-#endif
-
 namespace rnwgpu {
 
 void GPUCanvasContext::configure(
@@ -46,19 +38,24 @@ std::shared_ptr<GPUTexture> GPUCanvasContext::getCurrentTexture() {
   if (sizeHasChanged) {
     _surfaceInfo->reconfigure(width, height);
   }
+
   auto texture = _surfaceInfo->getCurrentTexture();
-  return std::make_shared<GPUTexture>(texture, "", false);
-}
 
-void GPUCanvasContext::present() {
-#ifdef __APPLE__
-  dawn::native::metal::WaitForCommandsToBeScheduled(
-      _surfaceInfo->getDevice().Get());
-#endif
   auto size = _surfaceInfo->getSize();
   _canvas->setClientWidth(size.width);
   _canvas->setClientHeight(size.height);
-  _surfaceInfo->present();
+
+  return std::make_shared<GPUTexture>(texture, "", false);
+}
+
+void GPUCanvasContext::present() {
+  // Present runs synchronously on the calling thread (the one that did
+  // getCurrentTexture / submit), preserving Dawn surface thread-affinity.
+  // Required on every runtime (main JS, Reanimated UI, dedicated worklet);
+  // offscreen surfaces have no wgpu::Surface so they no-op.
+  if (_surfaceInfo->hasSurface()) {
+    _surfaceInfo->presentFrame();
+  }
 }
 
 } // namespace rnwgpu
diff --git a/packages/skia/cpp/rnwgpu/api/GPUCanvasContext.h b/packages/skia/cpp/rnwgpu/api/GPUCanvasContext.h
index 6923f95c0c..83eb349424 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUCanvasContext.h
+++ b/packages/skia/cpp/rnwgpu/api/GPUCanvasContext.h
@@ -54,6 +54,9 @@ class GPUCanvasContext : public NativeObject<GPUCanvasContext> {
   void configure(std::shared_ptr<GPUCanvasConfiguration> configuration);
   void unconfigure();
   std::shared_ptr<GPUTexture> getCurrentTexture();
+  // Present is explicit on every runtime (main JS, Reanimated UI, and dedicated
+  // worklet runtimes). It runs synchronously on the calling thread, preserving
+  // Dawn surface thread-affinity; offscreen surfaces no-op.
   void present();
 
 private:
diff --git a/packages/skia/cpp/rnwgpu/api/GPUDevice.cpp b/packages/skia/cpp/rnwgpu/api/GPUDevice.cpp
index 436f1dd7ea..b62b71c53e 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUDevice.cpp
+++ b/packages/skia/cpp/rnwgpu/api/GPUDevice.cpp
@@ -6,6 +6,8 @@
 #include <utility>
 #include <vector>
 
+#include <ReactCommon/CallInvoker.h>
+
 #include "Convertors.h"
 #include "NativeBufferUtils.h"
 #include "jsi2/JSIConverter.h"
@@ -19,23 +21,33 @@ namespace rnwgpu {
 
 void GPUDevice::notifyDeviceLost(wgpu::DeviceLostReason reason,
                                  std::string message) {
-  if (_lostSettled) {
-    return;
-  }
+  std::optional<async::AsyncTaskHandle::ResolveFunction> resolveToCall;
+  std::shared_ptr<GPUDeviceLostInfo> info;
+  {
+    std::lock_guard<std::mutex> lock(_lostMutex);
+    if (_lostSettled) {
+      return;
+    }
+
+    _lostSettled = true;
+    _lostInfo = std::make_shared<GPUDeviceLostInfo>(reason, std::move(message));
+    info = _lostInfo;
 
-  _lostSettled = true;
-  _lostInfo = std::make_shared<GPUDeviceLostInfo>(reason, std::move(message));
+    if (_lostResolve.has_value()) {
+      resolveToCall = std::move(*_lostResolve);
+      _lostResolve.reset();
+    }
 
-  if (_lostResolve.has_value()) {
-    auto resolve = std::move(*_lostResolve);
-    _lostResolve.reset();
-    resolve([info = _lostInfo](jsi::Runtime &runtime) mutable {
+    _lostHandle.reset();
+  }
+
+  // Settle outside the lock: resolve() only enqueues onto the JS thread.
+  if (resolveToCall.has_value()) {
+    (*resolveToCall)([info](jsi::Runtime &runtime) mutable {
       return JSIConverter<std::shared_ptr<GPUDeviceLostInfo>>::toJSI(runtime,
                                                                      info);
     });
   }
-
-  _lostHandle.reset();
 }
 
 void GPUDevice::forceLossForTesting() {
@@ -474,6 +486,11 @@ std::unordered_set<std::string> GPUDevice::getFeatures() {
 }
 
 async::AsyncTaskHandle GPUDevice::getLost() {
+  // Held across the whole body: the postTask callback below runs synchronously
+  // on this (JS) thread and touches the same _lost* fields, so it must not
+  // re-lock. notifyDeviceLost() takes the same lock from its (possibly worker)
+  // thread.
+  std::lock_guard<std::mutex> lock(_lostMutex);
   if (_lostHandle.has_value()) {
     return *_lostHandle;
   }
@@ -488,7 +505,7 @@ async::AsyncTaskHandle GPUDevice::getLost() {
                 runtime, info);
           });
         },
-        false);
+        /*keepPumping=*/false);
   }
 
   auto handle = _async->postTask(
@@ -502,9 +519,10 @@ async::AsyncTaskHandle GPUDevice::getLost() {
           return;
         }
 
+        // Resolved later from notifyDeviceLost().
         _lostResolve = resolve;
       },
-      false);
+      /*keepPumping=*/false);
 
   _lostHandle = handle;
   return handle;
@@ -529,6 +547,23 @@ void GPUDevice::removeEventListener(std::string type, jsi::Function callback) {
 }
 
 void GPUDevice::notifyUncapturedError(GPUErrorVariant error) {
+  // Dawn can surface an uncaptured error from any ProcessEvents pump (a worklet
+  // runtime sharing this instance may pump it on the wrong thread). Marshal to
+  // the owning runtime's JS thread via its CallInvoker before touching JSI. The
+  // invoker is wired only for the main JS runtime, so a device created on a
+  // worklet runtime does not deliver uncaptured errors to JS (best-effort; see
+  // the Threading model).
+  auto invoker = _async ? _async->callInvoker() : nullptr;
+  if (!invoker) {
+    return;
+  }
+  auto self = shared_from_this();
+  invoker->invokeAsync([self, error = std::move(error)]() mutable {
+    self->deliverUncapturedError(std::move(error));
+  });
+}
+
+void GPUDevice::deliverUncapturedError(GPUErrorVariant error) {
   auto runtime = getCreationRuntime();
   if (runtime == nullptr) {
     return;
diff --git a/packages/skia/cpp/rnwgpu/api/GPUDevice.h b/packages/skia/cpp/rnwgpu/api/GPUDevice.h
index 834f245ee7..6910f23cd7 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUDevice.h
+++ b/packages/skia/cpp/rnwgpu/api/GPUDevice.h
@@ -15,8 +15,8 @@
 
 #include "jsi2/NativeObject.h"
 
-#include "rnwgpu/async/AsyncRunner.h"
 #include "rnwgpu/async/AsyncTaskHandle.h"
+#include "rnwgpu/async/RuntimeContext.h"
 
 #include "webgpu/webgpu_cpp.h"
 
@@ -99,7 +99,7 @@ class GPUDevice : public NativeObject<GPUDevice> {
   static constexpr const char *CLASS_NAME = "GPUDevice";
 
   explicit GPUDevice(wgpu::Device instance,
-                     std::shared_ptr<async::AsyncRunner> async,
+                     std::shared_ptr<async::RuntimeContext> async,
                      std::string label)
       : NativeObject(CLASS_NAME), _instance(instance), _async(async),
         _label(label) {
@@ -230,9 +230,18 @@ class GPUDevice : public NativeObject<GPUDevice> {
 private:
   friend class GPUAdapter;
 
+  // Runs the uncapturederror listeners on the creation runtime's JS thread.
+  // Invoked from notifyUncapturedError via the main CallInvoker.
+  void deliverUncapturedError(GPUErrorVariant error);
+
   wgpu::Device _instance;
-  std::shared_ptr<async::AsyncRunner> _async;
+  std::shared_ptr<async::RuntimeContext> _async;
   std::string _label;
+  // Guards the device-lost state below. In the ProcessEvents model both
+  // notifyDeviceLost() (fired by Dawn during ProcessEvents) and getLost() run on
+  // the owning runtime's own thread, but device destruction can also trigger
+  // notifyDeviceLost() synchronously, so the mutex keeps these fields safe.
+  std::mutex _lostMutex;
   std::optional<async::AsyncTaskHandle> _lostHandle;
   std::shared_ptr<GPUDeviceLostInfo> _lostInfo;
   bool _lostSettled = false;
diff --git a/packages/skia/cpp/rnwgpu/api/GPUQueue.h b/packages/skia/cpp/rnwgpu/api/GPUQueue.h
index 08eb5955cd..29f1282fff 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUQueue.h
+++ b/packages/skia/cpp/rnwgpu/api/GPUQueue.h
@@ -8,8 +8,8 @@
 
 #include "jsi2/NativeObject.h"
 
-#include "rnwgpu/async/AsyncRunner.h"
 #include "rnwgpu/async/AsyncTaskHandle.h"
+#include "rnwgpu/async/RuntimeContext.h"
 
 #include "webgpu/webgpu_cpp.h"
 
@@ -31,7 +31,7 @@ class GPUQueue : public NativeObject<GPUQueue> {
   static constexpr const char *CLASS_NAME = "GPUQueue";
 
   explicit GPUQueue(wgpu::Queue instance,
-                    std::shared_ptr<async::AsyncRunner> async,
+                    std::shared_ptr<async::RuntimeContext> async,
                     std::string label)
       : NativeObject(CLASS_NAME), _instance(instance), _async(async),
         _label(label) {}
@@ -77,7 +77,7 @@ class GPUQueue : public NativeObject<GPUQueue> {
 
 private:
   wgpu::Queue _instance;
-  std::shared_ptr<async::AsyncRunner> _async;
+  std::shared_ptr<async::RuntimeContext> _async;
   std::string _label;
 };
 
diff --git a/packages/skia/cpp/rnwgpu/api/GPUShaderModule.h b/packages/skia/cpp/rnwgpu/api/GPUShaderModule.h
index f5bda33ffd..88cd353880 100644
--- a/packages/skia/cpp/rnwgpu/api/GPUShaderModule.h
+++ b/packages/skia/cpp/rnwgpu/api/GPUShaderModule.h
@@ -7,8 +7,8 @@
 
 #include "jsi2/NativeObject.h"
 
-#include "rnwgpu/async/AsyncRunner.h"
 #include "rnwgpu/async/AsyncTaskHandle.h"
+#include "rnwgpu/async/RuntimeContext.h"
 
 #include "webgpu/webgpu_cpp.h"
 
@@ -23,7 +23,7 @@ class GPUShaderModule : public NativeObject<GPUShaderModule> {
   static constexpr const char *CLASS_NAME = "GPUShaderModule";
 
   explicit GPUShaderModule(wgpu::ShaderModule instance,
-                           std::shared_ptr<async::AsyncRunner> async,
+                           std::shared_ptr<async::RuntimeContext> async,
                            std::string label)
       : NativeObject(CLASS_NAME), _instance(instance), _async(async),
         _label(label) {}
@@ -59,7 +59,7 @@ class GPUShaderModule : public NativeObject<GPUShaderModule> {
 
 private:
   wgpu::ShaderModule _instance;
-  std::shared_ptr<async::AsyncRunner> _async;
+  std::shared_ptr<async::RuntimeContext> _async;
   std::string _label;
 };
 
diff --git a/packages/skia/cpp/rnwgpu/api/WebGPUConstants.h b/packages/skia/cpp/rnwgpu/api/WebGPUConstants.h
new file mode 100644
index 0000000000..7068c88ce7
--- /dev/null
+++ b/packages/skia/cpp/rnwgpu/api/WebGPUConstants.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <jsi/jsi.h>
+
+#include "descriptors/GPUBufferUsage.h"
+#include "descriptors/GPUColorWrite.h"
+#include "descriptors/GPUMapMode.h"
+#include "descriptors/GPUShaderStage.h"
+#include "descriptors/GPUTextureUsage.h"
+
+namespace rnwgpu {
+
+namespace jsi = facebook::jsi;
+
+// Installs the WebGPU flag constants (GPUBufferUsage, GPUColorWrite, GPUMapMode,
+// GPUShaderStage, GPUTextureUsage) as plain JS objects on `runtime`'s global.
+//
+// The numeric values are derived from the Dawn wgpu::*Usage enums (single source
+// of truth in C++), so this is safe to call on ANY runtime: the main JS runtime
+// at install time, and any worklet runtime (Reanimated UI, dedicated worklet
+// runtimes, Vision Camera frame processors) via the global `installWebGPU()`
+// host function. It is idempotent: re-installing overwrites the globals with
+// equal values.
+inline void installWebGPUConstants(jsi::Runtime &runtime) {
+  auto global = runtime.global();
+  global.setProperty(runtime, "GPUBufferUsage",
+                     GPUBufferUsage::create(runtime));
+  global.setProperty(runtime, "GPUColorWrite", GPUColorWrite::create(runtime));
+  global.setProperty(runtime, "GPUMapMode", GPUMapMode::create(runtime));
+  global.setProperty(runtime, "GPUShaderStage",
+                     GPUShaderStage::create(runtime));
+  global.setProperty(runtime, "GPUTextureUsage",
+                     GPUTextureUsage::create(runtime));
+}
+
+} // namespace rnwgpu
diff --git a/packages/skia/cpp/rnwgpu/async/AsyncDispatcher.h b/packages/skia/cpp/rnwgpu/async/AsyncDispatcher.h
deleted file mode 100644
index 0ec176824e..0000000000
--- a/packages/skia/cpp/rnwgpu/async/AsyncDispatcher.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-
-#include <functional>
-#include <memory>
-
-#include <jsi/jsi.h>
-
-namespace rnwgpu::async {
-
-namespace jsi = facebook::jsi;
-
-/**
- * Abstract dispatcher used by the AsyncRunner to enqueue work back onto the
- * JavaScript thread.
- */
-class AsyncDispatcher {
-public:
-  using Work = std::function<void(jsi::Runtime &)>;
-
-  virtual ~AsyncDispatcher() = default;
-
-  /**
-   * Enqueue a unit of work that will be executed on the JavaScript thread.
-   */
-  virtual void post(Work work) = 0;
-};
-
-} // namespace rnwgpu::async
diff --git a/packages/skia/cpp/rnwgpu/async/AsyncRunner.cpp b/packages/skia/cpp/rnwgpu/async/AsyncRunner.cpp
deleted file mode 100644
index 05f7e43ae4..0000000000
--- a/packages/skia/cpp/rnwgpu/async/AsyncRunner.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-#include "AsyncRunner.h"
-
-#include <chrono>
-#include <stdexcept>
-#include <utility>
-
-#include "AsyncTaskHandle.h"
-
-namespace rnwgpu::async {
-
-// Static member definitions
-std::mutex AsyncRunner::_runnersMutex;
-std::unordered_map<jsi::Runtime *, std::shared_ptr<AsyncRunner>>
-    AsyncRunner::_runners;
-
-AsyncRunner::AsyncRunner(wgpu::Instance instance,
-                         std::shared_ptr<AsyncDispatcher> dispatcher)
-    : _instance(std::move(instance)), _dispatcher(std::move(dispatcher)),
-      _pendingTasks(0), _pumpTasks(0), _tickScheduled(false),
-      _lastTickTimeNs(0) {
-  if (!_dispatcher) {
-    throw std::runtime_error("AsyncRunner requires a valid dispatcher.");
-  }
-}
-
-std::shared_ptr<AsyncRunner> AsyncRunner::get(jsi::Runtime &runtime) {
-  std::lock_guard<std::mutex> lock(_runnersMutex);
-  auto it = _runners.find(&runtime);
-  if (it == _runners.end()) {
-    return nullptr;
-  }
-  return it->second;
-}
-
-std::shared_ptr<AsyncRunner>
-AsyncRunner::getOrCreate(jsi::Runtime &runtime, wgpu::Instance instance,
-                         std::shared_ptr<AsyncDispatcher> dispatcher) {
-  std::lock_guard<std::mutex> lock(_runnersMutex);
-  auto it = _runners.find(&runtime);
-  if (it != _runners.end()) {
-    return it->second;
-  }
-
-  auto runner =
-      std::make_shared<AsyncRunner>(std::move(instance), std::move(dispatcher));
-  _runners[&runtime] = runner;
-  return runner;
-}
-
-AsyncTaskHandle AsyncRunner::postTask(const TaskCallback &callback,
-                                      bool keepPumping) {
-  auto handle = AsyncTaskHandle::create(shared_from_this(), keepPumping);
-  if (!handle.valid()) {
-    throw std::runtime_error("Failed to create AsyncTaskHandle.");
-  }
-
-  _pendingTasks.fetch_add(1, std::memory_order_acq_rel);
-  if (keepPumping) {
-    _pumpTasks.fetch_add(1, std::memory_order_acq_rel);
-  }
-  requestTick();
-
-  auto resolve = handle.createResolveFunction();
-  auto reject = handle.createRejectFunction();
-
-  try {
-    callback(resolve, reject);
-  } catch (const std::exception &exception) {
-    reject(exception.what());
-  } catch (...) {
-    reject("Unknown native error in AsyncRunner::postTask.");
-  }
-
-  return handle;
-}
-
-void AsyncRunner::requestTick() {
-  bool expected = false;
-  if (!_tickScheduled.compare_exchange_strong(expected, true,
-                                              std::memory_order_acq_rel)) {
-    return;
-  }
-
-  auto self = shared_from_this();
-  _dispatcher->post([self](jsi::Runtime &runtime) {
-    auto tickCallback = jsi::Function::createFromHostFunction(
-        runtime, jsi::PropNameID::forAscii(runtime, "AsyncRunnerTick"), 0,
-        [self](jsi::Runtime &runtime, const jsi::Value & /*thisValue*/,
-               const jsi::Value * /*args*/, size_t /*count*/) -> jsi::Value {
-          self->tick(runtime);
-          return jsi::Value::undefined();
-        });
-
-#if defined(ANDROID) || defined(__ANDROID__)
-    auto global = runtime.global();
-    auto setImmediateValue = global.getProperty(runtime, "setImmediate");
-    constexpr auto kMinTickInterval = std::chrono::milliseconds(4);
-    const int64_t nowNs =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now().time_since_epoch())
-            .count();
-    const int64_t lastNs =
-        self->_lastTickTimeNs.load(std::memory_order_acquire);
-    int delayMs = 0;
-    if (lastNs > 0) {
-      const int64_t elapsedNs = nowNs - lastNs;
-      const int64_t minIntervalNs = kMinTickInterval.count() * 1000000LL;
-      if (elapsedNs < minIntervalNs) {
-        const int64_t remainingNs = minIntervalNs - elapsedNs;
-        delayMs = static_cast<int>((remainingNs + 999999) / 1000000);
-      }
-    }
-
-    auto tryScheduleTimeout = [&](int ms) {
-      auto setTimeoutValue = global.getProperty(runtime, "setTimeout");
-      if (!setTimeoutValue.isObject()) {
-        return false;
-      }
-      auto setTimeoutObj = setTimeoutValue.asObject(runtime);
-      if (!setTimeoutObj.isFunction(runtime)) {
-        return false;
-      }
-      auto setTimeoutFn = setTimeoutObj.asFunction(runtime);
-      jsi::Value callbackArg(runtime, tickCallback);
-      jsi::Value delayArg(static_cast<double>(ms));
-      setTimeoutFn.call(runtime, callbackArg, delayArg);
-      return true;
-    };
-
-    if (delayMs > 0) {
-      if (tryScheduleTimeout(delayMs)) {
-        return;
-      }
-      // If setTimeout unavailable fall through to immediate scheduling.
-    }
-
-    if (setImmediateValue.isObject()) {
-      auto setImmediateObj = setImmediateValue.asObject(runtime);
-      if (setImmediateObj.isFunction(runtime)) {
-        auto setImmediateFn = setImmediateObj.asFunction(runtime);
-        jsi::Value callbackArg(runtime, tickCallback);
-        setImmediateFn.call(runtime, callbackArg);
-        return;
-      }
-    }
-
-    int timeoutDelayMs = delayMs > 0 ? delayMs : 0;
-    if (tryScheduleTimeout(timeoutDelayMs)) {
-      return;
-    }
-
-    runtime.queueMicrotask(std::move(tickCallback));
-#else
-    runtime.queueMicrotask(std::move(tickCallback));
-#endif
-  });
-}
-
-void AsyncRunner::tick(jsi::Runtime & /*runtime*/) {
-  _tickScheduled.store(false, std::memory_order_release);
-  _instance.ProcessEvents();
-  const auto nowNs = std::chrono::duration_cast<std::chrono::nanoseconds>(
-                         std::chrono::steady_clock::now().time_since_epoch())
-                         .count();
-  _lastTickTimeNs.store(nowNs, std::memory_order_release);
-  if (_pumpTasks.load(std::memory_order_acquire) > 0) {
-    requestTick();
-  }
-}
-
-void AsyncRunner::onTaskSettled(bool keepPumping) {
-  _pendingTasks.fetch_sub(1, std::memory_order_acq_rel);
-  if (keepPumping) {
-    _pumpTasks.fetch_sub(1, std::memory_order_acq_rel);
-  }
-}
-
-std::shared_ptr<AsyncDispatcher> AsyncRunner::dispatcher() const {
-  return _dispatcher;
-}
-
-} // namespace rnwgpu::async
diff --git a/packages/skia/cpp/rnwgpu/async/AsyncRunner.h b/packages/skia/cpp/rnwgpu/async/AsyncRunner.h
deleted file mode 100644
index 0bf461cbdf..0000000000
--- a/packages/skia/cpp/rnwgpu/async/AsyncRunner.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#pragma once
-
-#include <atomic>
-#include <cstdint>
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <unordered_map>
-
-#include <jsi/jsi.h>
-
-#include "AsyncDispatcher.h"
-#include "AsyncTaskHandle.h"
-
-#include "webgpu/webgpu_cpp.h"
-
-namespace jsi = facebook::jsi;
-
-namespace rnwgpu::async {
-
-class AsyncRunner : public std::enable_shared_from_this<AsyncRunner> {
-public:
-  using TaskCallback =
-      std::function<void(const AsyncTaskHandle::ResolveFunction &,
-                         const AsyncTaskHandle::RejectFunction &)>;
-
-  AsyncRunner(wgpu::Instance instance,
-              std::shared_ptr<AsyncDispatcher> dispatcher);
-
-  static std::shared_ptr<AsyncRunner> get(jsi::Runtime &runtime);
-  static std::shared_ptr<AsyncRunner>
-  getOrCreate(jsi::Runtime &runtime, wgpu::Instance instance,
-              std::shared_ptr<AsyncDispatcher> dispatcher);
-
-  AsyncTaskHandle postTask(const TaskCallback &callback,
-                           bool keepPumping = true);
-
-  void requestTick();
-  void tick(jsi::Runtime &runtime);
-  void onTaskSettled(bool keepPumping);
-
-  std::shared_ptr<AsyncDispatcher> dispatcher() const;
-
-private:
-  static std::mutex _runnersMutex;
-  static std::unordered_map<jsi::Runtime *, std::shared_ptr<AsyncRunner>>
-      _runners;
-
-  wgpu::Instance _instance;
-  std::shared_ptr<AsyncDispatcher> _dispatcher;
-  std::atomic<size_t> _pendingTasks;
-  std::atomic<size_t> _pumpTasks;
-  std::atomic<bool> _tickScheduled;
-  std::atomic<int64_t> _lastTickTimeNs;
-};
-
-} // namespace rnwgpu::async
diff --git a/packages/skia/cpp/rnwgpu/async/AsyncTaskHandle.cpp b/packages/skia/cpp/rnwgpu/async/AsyncTaskHandle.cpp
index 63515021e4..c62a6675e2 100644
--- a/packages/skia/cpp/rnwgpu/async/AsyncTaskHandle.cpp
+++ b/packages/skia/cpp/rnwgpu/async/AsyncTaskHandle.cpp
@@ -1,11 +1,14 @@
 #include "AsyncTaskHandle.h"
 
+#include <memory>
 #include <string>
 #include <utility>
 
+#include <ReactCommon/CallInvoker.h>
+
 #include "jsi2/Promise.h"
 
-#include "AsyncRunner.h"
+#include "RuntimeContext.h"
 
 namespace rnwgpu::async {
 
@@ -13,8 +16,8 @@ using Action = std::function<void(jsi::Runtime &, rnwgpu::Promise &)>;
 
 struct AsyncTaskHandle::State
     : public std::enable_shared_from_this<AsyncTaskHandle::State> {
-  State(std::shared_ptr<AsyncRunner> runner, bool keepPumping)
-      : runner(std::move(runner)), keepPumping(keepPumping) {}
+  State(std::shared_ptr<RuntimeContext> context, bool keepPumping)
+      : context(std::move(context)), keepPumping(keepPumping) {}
 
   void settle(Action action);
   void attachPromise(const std::shared_ptr<rnwgpu::Promise> &promise);
@@ -26,12 +29,12 @@ struct AsyncTaskHandle::State
   std::shared_ptr<rnwgpu::Promise> currentPromise();
 
   std::mutex mutex;
-  std::weak_ptr<AsyncRunner> runner;
+  std::shared_ptr<RuntimeContext> context;
+  bool keepPumping;
   std::shared_ptr<rnwgpu::Promise> promise;
   std::optional<Action> pendingAction;
   bool settled = false;
   std::shared_ptr<State> keepAlive;
-  bool keepPumping;
 };
 
 // MARK: - State helpers
@@ -77,30 +80,60 @@ void AsyncTaskHandle::State::attachPromise(
 }
 
 void AsyncTaskHandle::State::schedule(Action action) {
-  auto runnerRef = runner.lock();
-  if (!runnerRef) {
+  auto promiseRef = currentPromise();
+  if (!promiseRef) {
     return;
   }
 
-  auto promiseRef = currentPromise();
-  if (!promiseRef) {
-    runnerRef->onTaskSettled(keepPumping);
+  if (!context) {
+    // No context (shouldn't happen): best-effort inline settle.
+    action(promiseRef->runtime, *promiseRef);
+    std::lock_guard<std::mutex> lock(mutex);
+    keepAlive.reset();
     return;
   }
 
-  auto dispatcherRef = runnerRef->dispatcher();
-  if (!dispatcherRef) {
-    runnerRef->onTaskSettled(keepPumping);
+  auto self = shared_from_this();
+
+  if (!keepPumping) {
+    // Spontaneous task (e.g. device.lost): not driven by the ProcessEvents pump.
+    // Settle on the owning runtime's JS thread via its CallInvoker, which is
+    // wired only for the main JS runtime. A device created on a worklet runtime
+    // has no invoker, so its device.lost is dropped (best-effort; see the
+    // Threading model). invokeAsync runs the closure on the main JS thread,
+    // where promiseRef->runtime lives for a main-runtime device.
+    auto invoker = context->callInvoker();
+    if (invoker) {
+      invoker->invokeAsync(
+          [self, action = std::move(action), promiseRef]() mutable {
+            action(promiseRef->runtime, *promiseRef);
+            std::lock_guard<std::mutex> lock(self->mutex);
+            self->keepAlive.reset();
+          });
+    } else {
+      std::lock_guard<std::mutex> lock(mutex);
+      keepAlive.reset();
+    }
     return;
   }
 
-  dispatcherRef->post([self = shared_from_this(), action = std::move(action),
-                       runnerRef, promiseRef](jsi::Runtime &runtime) mutable {
-    runnerRef->onTaskSettled(self->keepPumping);
-    action(runtime, *promiseRef);
-    std::lock_guard<std::mutex> lock(self->mutex);
-    self->keepAlive.reset();
-  });
+  // Pumping task (request/response op). The resolve/reject callback may fire on
+  // a thread that is NOT the owning runtime's thread: with a shared
+  // wgpu::Instance, another runtime's ProcessEvents() pump can consume this Dawn
+  // event. Touching the Promise's runtime off-thread would corrupt Hermes. So we
+  // deposit the actual settle (the only JSI-touching work) into the owning
+  // context's mailbox; the context drains it on its own thread during its next
+  // tick. The deposited closure captures only C++ state and runs no JSI until
+  // drained, so depositing from any thread is safe.
+  context->postSettle(
+      [self, action = std::move(action), promiseRef]() mutable {
+        action(promiseRef->runtime, *promiseRef);
+        if (self->context) {
+          self->context->onTaskSettled(/*keepPumping=*/true);
+        }
+        std::lock_guard<std::mutex> lock(self->mutex);
+        self->keepAlive.reset();
+      });
 }
 
 AsyncTaskHandle::ResolveFunction
@@ -149,9 +182,9 @@ AsyncTaskHandle::AsyncTaskHandle(std::shared_ptr<State> state)
 bool AsyncTaskHandle::valid() const { return _state != nullptr; }
 
 AsyncTaskHandle
-AsyncTaskHandle::create(const std::shared_ptr<AsyncRunner> &runner,
+AsyncTaskHandle::create(const std::shared_ptr<RuntimeContext> &context,
                         bool keepPumping) {
-  auto state = std::make_shared<State>(runner, keepPumping);
+  auto state = std::make_shared<State>(context, keepPumping);
   state->keepAlive = state;
   return AsyncTaskHandle(std::move(state));
 }
diff --git a/packages/skia/cpp/rnwgpu/async/AsyncTaskHandle.h b/packages/skia/cpp/rnwgpu/async/AsyncTaskHandle.h
index cb6c7a2a4c..fea16c0f63 100644
--- a/packages/skia/cpp/rnwgpu/async/AsyncTaskHandle.h
+++ b/packages/skia/cpp/rnwgpu/async/AsyncTaskHandle.h
@@ -8,19 +8,22 @@
 
 #include <jsi/jsi.h>
 
-#include "AsyncDispatcher.h"
-
 namespace rnwgpu {
 class Promise;
 }
 
 namespace rnwgpu::async {
 
-class AsyncRunner;
+class RuntimeContext;
 
 /**
  * Represents a pending asynchronous WebGPU operation that can be converted into
  * a JavaScript Promise.
+ *
+ * In the ProcessEvents model the resolve/reject callbacks are invoked on the
+ * owning runtime's own thread (synchronously from instance.ProcessEvents()
+ * during the RuntimeContext tick, or synchronously from postTask), so the
+ * Promise is settled directly without any thread marshalling.
  */
 class AsyncTaskHandle {
 public:
@@ -34,7 +37,7 @@ class AsyncTaskHandle {
   AsyncTaskHandle();
 
   /**
-   * Internal constructor used by AsyncRunner.
+   * Internal constructor used by RuntimeContext.
    */
   explicit AsyncTaskHandle(std::shared_ptr<State> state);
 
@@ -45,7 +48,7 @@ class AsyncTaskHandle {
 
   void attachPromise(const std::shared_ptr<rnwgpu::Promise> &promise) const;
 
-  static AsyncTaskHandle create(const std::shared_ptr<AsyncRunner> &runner,
+  static AsyncTaskHandle create(const std::shared_ptr<RuntimeContext> &context,
                                 bool keepPumping);
 
 private:
diff --git a/packages/skia/cpp/rnwgpu/async/JSIMicrotaskDispatcher.cpp b/packages/skia/cpp/rnwgpu/async/JSIMicrotaskDispatcher.cpp
deleted file mode 100644
index 6231a833ca..0000000000
--- a/packages/skia/cpp/rnwgpu/async/JSIMicrotaskDispatcher.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "JSIMicrotaskDispatcher.h"
-
-#include <utility>
-
-namespace rnwgpu::async {
-
-JSIMicrotaskDispatcher::JSIMicrotaskDispatcher(jsi::Runtime &runtime)
-    : _runtime(runtime) {}
-
-void JSIMicrotaskDispatcher::post(Work work) {
-  auto microtask = jsi::Function::createFromHostFunction(
-      _runtime, jsi::PropNameID::forAscii(_runtime, "AsyncMicrotask"), 0,
-      [work = std::move(work)](
-          jsi::Runtime &runtime, const jsi::Value & /*thisValue*/,
-          const jsi::Value * /*args*/, size_t /*count*/) -> jsi::Value {
-        work(runtime);
-        return jsi::Value::undefined();
-      });
-
-  _runtime.queueMicrotask(std::move(microtask));
-}
-
-} // namespace rnwgpu::async
diff --git a/packages/skia/cpp/rnwgpu/async/JSIMicrotaskDispatcher.h b/packages/skia/cpp/rnwgpu/async/JSIMicrotaskDispatcher.h
deleted file mode 100644
index bae208c5d2..0000000000
--- a/packages/skia/cpp/rnwgpu/async/JSIMicrotaskDispatcher.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include "AsyncDispatcher.h"
-
-namespace rnwgpu::async {
-
-/**
- * Dispatcher implementation backed by `jsi::Runtime::queueMicrotask`.
- */
-class JSIMicrotaskDispatcher final
-    : public AsyncDispatcher,
-      public std::enable_shared_from_this<JSIMicrotaskDispatcher> {
-public:
-  explicit JSIMicrotaskDispatcher(jsi::Runtime &runtime);
-
-  void post(Work work) override;
-
-private:
-  jsi::Runtime &_runtime;
-};
-
-} // namespace rnwgpu::async
diff --git a/packages/skia/cpp/rnwgpu/async/RuntimeContext.cpp b/packages/skia/cpp/rnwgpu/async/RuntimeContext.cpp
new file mode 100644
index 0000000000..41bb6048bc
--- /dev/null
+++ b/packages/skia/cpp/rnwgpu/async/RuntimeContext.cpp
@@ -0,0 +1,194 @@
+#include "RuntimeContext.h"
+
+#include <memory>
+#include <stdexcept>
+#include <utility>
+
+#include <ReactCommon/CallInvoker.h>
+
+#include "AsyncTaskHandle.h"
+
+namespace rnwgpu::async {
+
+namespace {
+struct RuntimeData {
+  std::shared_ptr<RuntimeContext> context;
+};
+
+// The main JS runtime and its CallInvoker, registered once on install. The
+// context created for sMainRuntime gets sMainInvoker; spontaneous events
+// (device.lost) on a main-runtime device are delivered through it without the
+// pump. Worklet runtimes have no invoker (best-effort, see the header doc).
+jsi::Runtime *sMainRuntime = nullptr;
+std::shared_ptr<facebook::react::CallInvoker> sMainInvoker;
+
+// Serializes ProcessEvents() across all runtimes that share a wgpu::Instance.
+// Held only across the ProcessEvents call itself, never while running JS /
+// mailbox settle-actions, so it cannot deadlock against the per-context mailbox
+// mutex.
+std::mutex &processEventsMutex() {
+  static std::mutex mutex;
+  return mutex;
+}
+} // namespace
+
+void RuntimeContext::registerMainRuntime(
+    jsi::Runtime *runtime,
+    std::shared_ptr<facebook::react::CallInvoker> invoker) {
+  sMainRuntime = runtime;
+  sMainInvoker = std::move(invoker);
+}
+
+RuntimeContext::RuntimeContext(jsi::Runtime &runtime, wgpu::Instance instance)
+    : _runtime(runtime), _instance(std::move(instance)) {}
+
+std::shared_ptr<RuntimeContext> RuntimeContext::get(jsi::Runtime &runtime) {
+  auto data = runtime.getRuntimeData(runtimeDataUUID());
+  if (!data) {
+    return nullptr;
+  }
+  return std::static_pointer_cast<RuntimeData>(data)->context;
+}
+
+std::shared_ptr<RuntimeContext>
+RuntimeContext::getOrCreate(jsi::Runtime &runtime, wgpu::Instance instance) {
+  if (auto existing = get(runtime)) {
+    return existing;
+  }
+  auto context = std::make_shared<RuntimeContext>(runtime, std::move(instance));
+  // Only the main JS runtime's context carries the CallInvoker; it is used to
+  // deliver spontaneous events (device.lost) without the pump.
+  if (&runtime == sMainRuntime) {
+    context->_callInvoker = sMainInvoker;
+  }
+  auto data = std::make_shared<RuntimeData>();
+  data->context = context;
+  runtime.setRuntimeData(runtimeDataUUID(), data);
+  return context;
+}
+
+AsyncTaskHandle RuntimeContext::postTask(const TaskCallback &callback,
+                                         bool keepPumping) {
+  auto handle = AsyncTaskHandle::create(shared_from_this(), keepPumping);
+  if (!handle.valid()) {
+    throw std::runtime_error("Failed to create AsyncTaskHandle.");
+  }
+
+  // Only pumping tasks (request/response ops) drive the ProcessEvents pump.
+  // Spontaneous tasks (keepPumping == false, e.g. device.lost) never touch the
+  // pump: they settle via the CallInvoker (see AsyncTaskHandle::State::schedule).
+  if (keepPumping) {
+    _pumpTasks.fetch_add(1, std::memory_order_acq_rel);
+    requestTick();
+  }
+
+  auto resolve = handle.createResolveFunction();
+  auto reject = handle.createRejectFunction();
+  try {
+    callback(resolve, reject);
+  } catch (const std::exception &exception) {
+    reject(exception.what());
+  } catch (...) {
+    reject("Unknown native error in RuntimeContext::postTask.");
+  }
+  return handle;
+}
+
+void RuntimeContext::onTaskSettled(bool keepPumping) {
+  if (keepPumping) {
+    _pumpTasks.fetch_sub(1, std::memory_order_acq_rel);
+  }
+}
+
+void RuntimeContext::postSettle(std::function<void()> job) {
+  if (!job) {
+    return;
+  }
+  std::lock_guard<std::mutex> lock(_mailboxMutex);
+  _mailbox.push_back(std::move(job));
+}
+
+void RuntimeContext::drainMailbox() {
+  std::vector<std::function<void()>> jobs;
+  {
+    std::lock_guard<std::mutex> lock(_mailboxMutex);
+    jobs.swap(_mailbox);
+  }
+  // Run settle-actions on this (the owning) thread, NOT under the ProcessEvents
+  // mutex, so JS continuations never execute while the pump lock is held.
+  for (auto &job : jobs) {
+    job();
+  }
+}
+
+void RuntimeContext::requestTick() {
+  bool expected = false;
+  if (!_tickScheduled.compare_exchange_strong(expected, true,
+                                              std::memory_order_acq_rel)) {
+    return;
+  }
+
+  // The pump only ever runs while a request/response op is outstanding, so it
+  // always schedules as soon as possible (delay 0). postTask and tick both run
+  // on the owning runtime's thread, so we schedule the next tick directly via
+  // that runtime's own timer. setTimeout is available on the main RN runtime and
+  // on worklet runtimes (backed by the worklets EventLoop); setImmediate /
+  // queueMicrotask are fallbacks. We do NOT use queueMicrotask as the primary
+  // mechanism: a self-rescheduling microtask never yields the microtask
+  // checkpoint, starving the runtime's task loop.
+  auto self = shared_from_this();
+  jsi::Runtime &rt = _runtime;
+  auto tickCallback = jsi::Function::createFromHostFunction(
+      rt, jsi::PropNameID::forAscii(rt, "RNWGPUAsyncTick"), 0,
+      [self](jsi::Runtime & /*runtime*/, const jsi::Value & /*thisVal*/,
+             const jsi::Value * /*args*/, size_t /*count*/) -> jsi::Value {
+        self->tick();
+        return jsi::Value::undefined();
+      });
+
+  auto global = rt.global();
+  auto setTimeoutValue = global.getProperty(rt, "setTimeout");
+  if (setTimeoutValue.isObject() &&
+      setTimeoutValue.asObject(rt).isFunction(rt)) {
+    setTimeoutValue.asObject(rt).asFunction(rt).call(
+        rt, jsi::Value(rt, tickCallback), jsi::Value(0.0));
+    return;
+  }
+  auto setImmediateValue = global.getProperty(rt, "setImmediate");
+  if (setImmediateValue.isObject() &&
+      setImmediateValue.asObject(rt).isFunction(rt)) {
+    setImmediateValue.asObject(rt).asFunction(rt).call(
+        rt, jsi::Value(rt, tickCallback));
+    return;
+  }
+  rt.queueMicrotask(std::move(tickCallback));
+}
+
+void RuntimeContext::tick() {
+  _tickScheduled.store(false, std::memory_order_release);
+  {
+    // Serialize ProcessEvents across runtimes sharing this instance. Callbacks
+    // fired here only deposit into mailboxes (postSettle), they do not run JS.
+    std::lock_guard<std::mutex> lock(processEventsMutex());
+    _instance.ProcessEvents();
+  }
+  // Settle this runtime's ready promises on this thread, outside the pump lock.
+  drainMailbox();
+  // Keep pumping only while a "pumping" task (active async work) is outstanding.
+  // Non-pumping tasks (e.g. device.lost) intentionally do NOT keep the pump
+  // alive: we prioritise battery over catching a device.lost fired while idle.
+  if (_pumpTasks.load(std::memory_order_acquire) > 0) {
+    requestTick();
+  }
+}
+
+jsi::UUID RuntimeContext::runtimeDataUUID() {
+  // Fixed, unique key for storing the RuntimeContext in the runtime's
+  // runtimeData. Must not collide with other runtimeData consumers (e.g.
+  // react-native-worklets' weakRuntimeUUID).
+  static const jsi::UUID uuid{0x7b9a3c10, 0x4d2e, 0x4f8a, 0x9c3d,
+                              0x1f6e5a2b8c40};
+  return uuid;
+}
+
+} // namespace rnwgpu::async
diff --git a/packages/skia/cpp/rnwgpu/async/RuntimeContext.h b/packages/skia/cpp/rnwgpu/async/RuntimeContext.h
new file mode 100644
index 0000000000..c98d04d6f1
--- /dev/null
+++ b/packages/skia/cpp/rnwgpu/async/RuntimeContext.h
@@ -0,0 +1,121 @@
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <vector>
+
+#include <jsi/jsi.h>
+
+#include "AsyncTaskHandle.h"
+
+#include "webgpu/webgpu_cpp.h"
+
+namespace jsi = facebook::jsi;
+
+namespace facebook::react {
+class CallInvoker;
+} // namespace facebook::react
+
+namespace rnwgpu::async {
+
+/**
+ * Per-runtime coordinator for asynchronous WebGPU operations.
+ *
+ * Each JS runtime that uses WebGPU gets its own RuntimeContext, stored in the
+ * runtime's runtimeData. Async Dawn operations are registered with
+ * CallbackMode::AllowProcessEvents and driven to completion by pumping
+ * `instance.ProcessEvents()` on the runtime's OWN thread via a self-
+ * rescheduling tick (scheduled through that runtime's setTimeout). Because
+ * ProcessEvents invokes the Dawn callbacks synchronously on the pumping thread,
+ * the JS Promise is settled directly on the owning runtime, with no background
+ * thread and no cross-thread hop.
+ *
+ * The pump only runs while at least one "pumping" task is outstanding, so it
+ * costs nothing when idle and stops cleanly.
+ *
+ * Spontaneous events (keepPumping = false): events that may fire at any time,
+ * independent of any request/response op (today only GPUDevice::getLost, whose
+ * Dawn callback is registered AllowSpontaneous). These are NOT driven by the
+ * pump. Instead their settle is marshalled onto the owning runtime's JS thread
+ * via that runtime's CallInvoker, which is wired only for the MAIN JS runtime
+ * (callInvoker()). A device created on a worklet runtime has no invoker, so its
+ * device.lost is best-effort and may never fire.
+ *
+ * Shared-instance safety (mailbox): multiple runtimes may share one
+ * wgpu::Instance. ProcessEvents() drains the whole instance queue and fires
+ * callbacks on the calling thread, which may NOT be the owning runtime's thread
+ * for a given promise. So a settled callback never touches JSI inline; it
+ * deposits a settle-action (a plain C++ closure, no JSI) into the OWNING
+ * context's thread-safe mailbox via postSettle(), and each context drains its
+ * own mailbox on its own thread during tick(). ProcessEvents() itself is
+ * serialized across runtimes by a process-wide mutex, since concurrent
+ * ProcessEvents on one instance is not guaranteed reentrant.
+ *
+ * Threading contract: a RuntimeContext must only be pumped from the runtime it
+ * was created for. Create and use a GPUDevice (and the buffers/queues derived
+ * from it) on the same runtime that requested the adapter.
+ */
+class RuntimeContext : public std::enable_shared_from_this<RuntimeContext> {
+public:
+  using TaskCallback =
+      std::function<void(const AsyncTaskHandle::ResolveFunction &,
+                         const AsyncTaskHandle::RejectFunction &)>;
+
+  RuntimeContext(jsi::Runtime &runtime, wgpu::Instance instance);
+
+  static std::shared_ptr<RuntimeContext> get(jsi::Runtime &runtime);
+  static std::shared_ptr<RuntimeContext> getOrCreate(jsi::Runtime &runtime,
+                                                     wgpu::Instance instance);
+
+  // Register the main JS runtime and its CallInvoker. The RuntimeContext created
+  // for this runtime gets the invoker (callInvoker() returns it); every other
+  // runtime's context returns null. Called once from RNSkManager on install.
+  static void
+  registerMainRuntime(jsi::Runtime *runtime,
+                      std::shared_ptr<facebook::react::CallInvoker> invoker);
+
+  // CallInvoker for this runtime's JS thread, or null. Non-null only for the
+  // main JS runtime; used to deliver spontaneous events (device.lost) without
+  // the pump. See the class doc.
+  const std::shared_ptr<facebook::react::CallInvoker> &callInvoker() const {
+    return _callInvoker;
+  }
+
+  // The wgpu::Instance bound to this runtime.
+  wgpu::Instance instance() const { return _instance; }
+
+  AsyncTaskHandle postTask(const TaskCallback &callback,
+                           bool keepPumping = true);
+
+  // Deposit a settle-action to run on THIS context's runtime thread. Thread-safe
+  // (callable from any thread, e.g. another runtime that pumped ProcessEvents).
+  // The job must not touch JSI until it runs (it runs during drainMailbox on the
+  // owning thread).
+  void postSettle(std::function<void()> job);
+
+  // Invoked by a drained settle-action when its task settles. Runs on the owning
+  // runtime's thread.
+  void onTaskSettled(bool keepPumping);
+
+private:
+  static jsi::UUID runtimeDataUUID();
+
+  void requestTick();
+  void tick();
+  void drainMailbox();
+
+  jsi::Runtime &_runtime;
+  wgpu::Instance _instance;
+  // Non-null only for the main JS runtime's context (see registerMainRuntime).
+  std::shared_ptr<facebook::react::CallInvoker> _callInvoker;
+  std::atomic<std::size_t> _pumpTasks{0};
+  std::atomic<bool> _tickScheduled{false};
+
+  std::mutex _mailboxMutex;
+  std::vector<std::function<void()>> _mailbox;
+};
+
+} // namespace rnwgpu::async
diff --git a/packages/skia/src/skia/types/WebGPU.ts b/packages/skia/src/skia/types/WebGPU.ts
index 225f67de22..38ebd17368 100644
--- a/packages/skia/src/skia/types/WebGPU.ts
+++ b/packages/skia/src/skia/types/WebGPU.ts
@@ -1,5 +1,48 @@
+// Ensure the native bindings (which install the global `installWebGPU` host
+// function) are set up before we capture it below. `src/index.ts` already
+// imports this first; this makes the capture robust for direct deep imports too.
+import "../NativeSetup";
+
 import type { NativeBuffer } from "./NativeBuffer";
 
+/**
+ * Make the WebGPU flag constants (`GPUBufferUsage`, `GPUColorWrite`,
+ * `GPUMapMode`, `GPUShaderStage`, `GPUTextureUsage`) available on the runtime
+ * that calls this.
+ *
+ * The native module installs these globals on the main JS runtime, but worklet
+ * runtimes (Reanimated UI, dedicated worklet runtimes, Vision Camera frame
+ * processors) start without them, so referencing the bare global inside a
+ * worklet yields `undefined`. Call `installWebGPU()` once at the top of a
+ * worklet to install them there:
+ *
+ * ```tsx
+ * import { installWebGPU } from "@shopify/react-native-skia";
+ *
+ * const work = (device: GPUDevice) => {
+ *   "worklet";
+ *   installWebGPU();
+ *   device.createBuffer({
+ *     usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
+ *   });
+ * };
+ * ```
+ *
+ * `installWebGPU` is a native host function. When captured into a worklet, the
+ * Worklets serializer re-creates it on the worklet runtime, so calling it there
+ * installs the constants on that runtime. The values come from the native
+ * `wgpu::*Usage` enums, so they stay a single source of truth across runtimes.
+ * Calling it on a runtime that already has the constants is a safe no-op, and on
+ * web (where the constants are always global) it is a no-op too.
+ */
+export const installWebGPU: () => void = (() => {
+  const g =
+    typeof global !== "undefined"
+      ? (global as unknown as { installWebGPU?: () => void })
+      : undefined;
+  return g && typeof g.installWebGPU === "function" ? g.installWebGPU : () => {};
+})();
+
 // Skia's Graphite/Dawn backend extends the standard WebGPU API (typed by
 // @webgpu/types) with a few Skia- and Dawn-specific entry points. These are
 // only available on native (SK_GRAPHITE) builds, reachable through