@@ -79,7 +79,7 @@ static inline void compute_2d_workgroups(uint32_t total_wg, uint32_t max_per_dim
7979
8080/* Constants */
8181
82- #define WEBGPU_DEFAULT_COMMAND_SUBMIT_BATCH_SIZE 32u
82+ #define WEBGPU_DEFAULT_COMMAND_SUBMIT_BATCH_SIZE 64u
8383#define WEBGPU_NUM_PARAM_SLOT_SAFETY_MARGIN 10u
8484#define WEBGPU_RUNTIME_WAIT_TIMEOUT_MS 30000u
8585#define WEBGPU_RUNTIME_WAIT_TIMEOUT_NS (WEBGPU_RUNTIME_WAIT_TIMEOUT_MS * 1e6 )
@@ -97,14 +97,6 @@ static inline void compute_2d_workgroups(uint32_t total_wg, uint32_t max_per_dim
9797
9898/* End Constants */
9999
100- static inline wgpu::CallbackMode ggml_webgpu_callback_mode () {
101- #ifdef __EMSCRIPTEN__
102- return wgpu::CallbackMode::AllowProcessEvents;
103- #else
104- return wgpu::CallbackMode::AllowSpontaneous;
105- #endif
106- }
107-
108100// This is a "fake" base pointer, since WebGPU buffers do not have pointers to
109101// their locations.
110102static void * const webgpu_ptr_base = (void *) (uintptr_t ) 0x1000 ; // NOLINT
@@ -445,34 +437,25 @@ static void ggml_backend_webgpu_check_wait_status(wgpu::WaitStatus wait_status,
445437}
446438
447439#ifdef __EMSCRIPTEN__
448- // iOS browsers seem to have very strict limits on the number of in-flight GPU commands, so we need to throttle to avoid failures.
449440EM_JS (int , ggml_webgpu_is_ios_browser, (), {
450441 const ua = navigator.userAgent ;
451442 return (ua.includes (' iPhone' ) || ua.includes (' iPad' )) ? 1 : 0 ;
452443});
453444#endif
454445
455- static uint32_t ggml_backend_webgpu_get_max_inflight_batches (const wgpu::AdapterInfo & info) {
446+ // TODO: these next two functions may want tuning across different platforms and workloads,
447+ static uint32_t ggml_backend_webgpu_get_max_inflight_batches () {
456448#ifdef __EMSCRIPTEN__
449+ // iOS has very strict limits on the number of in-flight GPU commands,
450+ // so we need to throttle to avoid failures.
457451 if (ggml_webgpu_is_ios_browser ()) {
458452 return 1 ;
459453 }
460- #else
461- GGML_UNUSED (info);
462454#endif
463-
464455 return UINT32_MAX;
465456}
466457
467- static uint32_t ggml_backend_webgpu_get_command_submit_batch_size (const wgpu::AdapterInfo & info) {
468- #ifdef __EMSCRIPTEN__
469- if (ggml_webgpu_is_ios_browser ()) {
470- return 16 ;
471- }
472- #else
473- GGML_UNUSED (info);
474- #endif
475-
458+ static uint32_t ggml_backend_webgpu_get_command_submit_batch_size () {
476459 return WEBGPU_DEFAULT_COMMAND_SUBMIT_BATCH_SIZE;
477460}
478461
@@ -482,7 +465,7 @@ static void ggml_backend_webgpu_wait_queue(webgpu_global_context & ctx) {
482465
483466 const wgpu::WaitStatus wait_status = ctx->instance .WaitAny (
484467 ctx->queue .OnSubmittedWorkDone (
485- ggml_webgpu_callback_mode () ,
468+ wgpu::CallbackMode::AllowSpontaneous ,
486469 [&callback_status, &callback_message](wgpu::QueueWorkDoneStatus status, wgpu::StringView message) {
487470 callback_status = status;
488471 callback_message = std::string (message);
@@ -502,7 +485,7 @@ static void ggml_backend_webgpu_map_buffer(webgpu_global_context & ctx,
502485 std::string callback_message;
503486
504487 const wgpu::WaitStatus wait_status = ctx->instance .WaitAny (
505- buffer.MapAsync (mode, offset, size, ggml_webgpu_callback_mode () ,
488+ buffer.MapAsync (mode, offset, size, wgpu::CallbackMode::AllowSpontaneous ,
506489 [&callback_status, &callback_message](wgpu::MapAsyncStatus status, wgpu::StringView message) {
507490 callback_status = status;
508491 callback_message = std::string (message);
@@ -542,15 +525,15 @@ static void ggml_backend_webgpu_debug(webgpu_global_context & ctx) {
542525#endif
543526
544527#ifdef GGML_WEBGPU_GPU_PROFILE
545- static void ggml_backend_webgpu_collect_profile_futures (webgpu_global_context & ctx,
546- const std::vector<webgpu_command > & commands,
547- std::vector<wgpu::FutureWaitInfo> & futures) {
528+ static void ggml_backend_webgpu_collect_profile_futures (webgpu_global_context & ctx,
529+ const std::vector<webgpu_encoded_op > & commands,
530+ std::vector<wgpu::FutureWaitInfo> & futures) {
548531 for (const auto & command : commands) {
549532 auto label = command.pipeline_name ;
550533 auto ts_bufs = command.timestamp_query_bufs ;
551534
552535 wgpu::Future f = ts_bufs.host_buf .MapAsync (
553- wgpu::MapMode::Read, 0 , ts_bufs.host_buf .GetSize (), ggml_webgpu_callback_mode () ,
536+ wgpu::MapMode::Read, 0 , ts_bufs.host_buf .GetSize (), wgpu::CallbackMode::AllowSpontaneous ,
554537 [ctx, ts_bufs, label](wgpu::MapAsyncStatus status, wgpu::StringView message) {
555538 if (status != wgpu::MapAsyncStatus::Success) {
556539 GGML_LOG_ERROR (" ggml_webgpu: Failed to map timestamp buffer: %s\n " , std::string (message).c_str ());
@@ -3428,7 +3411,7 @@ static bool create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) {
34283411
34293412 ctx->webgpu_global_ctx ->instance .WaitAny (
34303413 ctx->webgpu_global_ctx ->instance .RequestAdapter (
3431- &options, ggml_webgpu_callback_mode () ,
3414+ &options, wgpu::CallbackMode::AllowSpontaneous ,
34323415 [&ctx](wgpu::RequestAdapterStatus status, wgpu::Adapter adapter, const char * message) {
34333416 if (status != wgpu::RequestAdapterStatus::Success) {
34343417 GGML_LOG_ERROR (" ggml_webgpu: Failed to get an adapter: %s\n " , message);
@@ -3449,8 +3432,8 @@ static bool create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) {
34493432 }
34503433#endif
34513434 ctx->webgpu_global_ctx ->adapter .GetInfo (&info);
3452- ctx->webgpu_global_ctx ->command_submit_batch_size = ggml_backend_webgpu_get_command_submit_batch_size (info );
3453- ctx->webgpu_global_ctx ->max_inflight_batches = ggml_backend_webgpu_get_max_inflight_batches (info );
3435+ ctx->webgpu_global_ctx ->command_submit_batch_size = ggml_backend_webgpu_get_command_submit_batch_size ();
3436+ ctx->webgpu_global_ctx ->max_inflight_batches = ggml_backend_webgpu_get_max_inflight_batches ();
34543437 wgpu::SupportedFeatures features;
34553438 ctx->webgpu_global_ctx ->adapter .GetFeatures (&features);
34563439 // we require f16 support
@@ -3501,7 +3484,7 @@ static bool create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) {
35013484 dev_desc.requiredFeatures = required_features.data ();
35023485 dev_desc.requiredFeatureCount = required_features.size ();
35033486 dev_desc.SetDeviceLostCallback (
3504- ggml_webgpu_callback_mode () ,
3487+ wgpu::CallbackMode::AllowSpontaneous ,
35053488 [ctx](const wgpu::Device & device, wgpu::DeviceLostReason reason, wgpu::StringView message) {
35063489 if (reason == wgpu::DeviceLostReason::Destroyed) {
35073490 return ;
@@ -3535,7 +3518,7 @@ static bool create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) {
35353518
35363519 ctx->webgpu_global_ctx ->instance .WaitAny (
35373520 ctx->webgpu_global_ctx ->adapter .RequestDevice (
3538- &dev_desc, ggml_webgpu_callback_mode () ,
3521+ &dev_desc, wgpu::CallbackMode::AllowSpontaneous ,
35393522 [ctx](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
35403523 if (status != wgpu::RequestDeviceStatus::Success) {
35413524 GGML_LOG_ERROR (" ggml_webgpu: Failed to get a device: %s\n " , std::string (message).c_str ());
0 commit comments