Skip to content

Commit a5317b1

Browse files
authored
Merge pull request #3 from leehack/fix/publish-assets-release-target
fix(webgpu): harden multimodal worker runtime and tuning
2 parents 5431bbf + f940706 commit a5317b1

5 files changed

Lines changed: 2262 additions & 221 deletions

File tree

CMakeLists.txt

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ set(LLAMA_WASM_MEM64 OFF CACHE BOOL "" FORCE)
3131
option(LLAMADART_WEBGPU_MEM64 "Build WebGPU bridge core with wasm64/memory64" OFF)
3232
set(LLAMADART_WEBGPU_MEM64_MAX_MEMORY "12884901888" CACHE STRING "Max wasm64 linear memory in bytes")
3333
option(LLAMADART_WEBGPU_PTHREADS "Enable pthread support for bridge runtime" ON)
34-
set(LLAMADART_WEBGPU_PTHREAD_POOL_SIZE "2" CACHE STRING "PThread pool size for bridge runtime")
34+
set(LLAMADART_WEBGPU_PTHREAD_POOL_SIZE "4" CACHE STRING "PThread pool size for bridge runtime")
35+
option(LLAMADART_WEBGPU_ALLOW_MEMORY_GROWTH "Allow wasm linear memory growth" ON)
36+
set(LLAMADART_WEBGPU_INITIAL_MEMORY "0" CACHE STRING "Initial wasm linear memory in bytes when growth is disabled")
3537

3638
if (LLAMADART_WEBGPU_PTHREADS)
3739
add_compile_options("-pthread")
@@ -135,22 +137,37 @@ target_include_directories(llama_webgpu_core PRIVATE
135137
target_link_libraries(llama_webgpu_core PRIVATE llama llamadart_mtmd)
136138

137139
set(LLAMADART_WEBGPU_LINK_OPTIONS
138-
"-sALLOW_MEMORY_GROWTH=1"
139140
"-sWASMFS=1"
140141
"-sASYNCIFY=1"
141142
"-sFORCE_FILESYSTEM=1"
142-
"-sASSERTIONS=1"
143+
"-sASSERTIONS=0"
143144
"-sMODULARIZE=1"
144145
"-sEXPORT_ES6=1"
145146
"-sEXPORT_NAME=createLlamaWebGpuCoreModule"
146147
"-sENVIRONMENT=web,worker"
147148
"-sEXPORTED_RUNTIME_METHODS=['FS','ccall','UTF8ToString']"
148-
"-sEXPORTED_FUNCTIONS=['_main','_llamadart_webgpu_probe','_llamadart_webgpu_backends_json','_llamadart_webgpu_last_error','_llamadart_webgpu_set_log_level','_llamadart_webgpu_load_model','_llamadart_webgpu_load_model_from_url','_llamadart_webgpu_mmproj_load','_llamadart_webgpu_mmproj_free','_llamadart_webgpu_mmproj_supports_vision','_llamadart_webgpu_mmproj_supports_audio','_llamadart_webgpu_media_clear_pending','_llamadart_webgpu_media_add_file','_llamadart_webgpu_media_add_encoded','_llamadart_webgpu_media_add_rgb','_llamadart_webgpu_media_add_audio_f32','_llamadart_webgpu_tokenize_to_json','_llamadart_webgpu_last_tokens_json','_llamadart_webgpu_detokenize_from_json','_llamadart_webgpu_last_detokenized','_llamadart_webgpu_embed_to_json','_llamadart_webgpu_last_embedding_json','_llamadart_webgpu_generate','_llamadart_webgpu_begin_generation','_llamadart_webgpu_next_token','_llamadart_webgpu_last_piece','_llamadart_webgpu_end_generation','_llamadart_webgpu_request_cancel','_llamadart_webgpu_last_output','_llamadart_webgpu_get_context_size','_llamadart_webgpu_model_meta_json','_llamadart_webgpu_shutdown']"
149+
"-sEXPORTED_FUNCTIONS=['_main','_llamadart_webgpu_probe','_llamadart_webgpu_supports_pthreads','_llamadart_webgpu_backends_json','_llamadart_webgpu_last_error','_llamadart_webgpu_set_log_level','_llamadart_webgpu_load_model','_llamadart_webgpu_load_model_from_url','_llamadart_webgpu_mmproj_load','_llamadart_webgpu_mmproj_free','_llamadart_webgpu_mmproj_supports_vision','_llamadart_webgpu_mmproj_supports_audio','_llamadart_webgpu_media_clear_pending','_llamadart_webgpu_media_add_file','_llamadart_webgpu_media_add_encoded','_llamadart_webgpu_media_add_rgb','_llamadart_webgpu_media_add_audio_f32','_llamadart_webgpu_tokenize_to_json','_llamadart_webgpu_last_tokens_json','_llamadart_webgpu_detokenize_from_json','_llamadart_webgpu_last_detokenized','_llamadart_webgpu_embed_to_json','_llamadart_webgpu_last_embedding_json','_llamadart_webgpu_generate','_llamadart_webgpu_begin_generation','_llamadart_webgpu_next_token','_llamadart_webgpu_last_piece','_llamadart_webgpu_end_generation','_llamadart_webgpu_request_cancel','_llamadart_webgpu_last_output','_llamadart_webgpu_get_context_size','_llamadart_webgpu_model_meta_json','_llamadart_webgpu_shutdown']"
149150
"-lwasmfs_fetch.js"
150151
)
151152

153+
if (LLAMADART_WEBGPU_ALLOW_MEMORY_GROWTH)
154+
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS "-sALLOW_MEMORY_GROWTH=1")
155+
else()
156+
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS "-sALLOW_MEMORY_GROWTH=0")
157+
if (LLAMADART_WEBGPU_INITIAL_MEMORY)
158+
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS "-sINITIAL_MEMORY=${LLAMADART_WEBGPU_INITIAL_MEMORY}")
159+
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS "-sMAXIMUM_MEMORY=${LLAMADART_WEBGPU_INITIAL_MEMORY}")
160+
endif()
161+
endif()
162+
163+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
164+
list(REMOVE_ITEM LLAMADART_WEBGPU_LINK_OPTIONS "-sASSERTIONS=0")
165+
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS "-sASSERTIONS=1")
166+
endif()
167+
152168
if (LLAMADART_WEBGPU_PTHREADS)
153169
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS
170+
"-sPTHREAD_POOL_SIZE_STRICT=2"
154171
"-sPTHREAD_POOL_SIZE=${LLAMADART_WEBGPU_PTHREAD_POOL_SIZE}"
155172
)
156173
endif()

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,15 @@ Useful environment variables:
2929
- `WEBGPU_BRIDGE_BUILD_MEM64` (`1` to also build optional wasm64 core assets)
3030
- `WEBGPU_BRIDGE_MEM64_MAX_MEMORY` (optional wasm64 max linear memory bytes)
3131
- `WEBGPU_BRIDGE_PTHREADS` (`1`/`0`, defaults to `1`)
32-
- `WEBGPU_BRIDGE_PTHREAD_POOL_SIZE` (defaults to `2`)
32+
- `WEBGPU_BRIDGE_PTHREAD_POOL_SIZE` (defaults to `4`)
3333

3434
Notes:
3535

3636
- wasm64 builds default to `WEBGPU_BRIDGE_MEM64_MAX_MEMORY=12884901888` (12 GiB).
3737
- Large single-file remote model loading requires a cross-origin isolated page
3838
(`COOP`/`COEP`) so worker-thread runtime paths are available.
39+
- pthread builds enable `-sPTHREAD_POOL_SIZE_STRICT=2` so pool exhaustion
40+
throws explicit errors instead of risking deadlock.
3941

4042
Build outputs:
4143

0 commit comments

Comments
 (0)