Skip to content

Commit 2f9d355

Browse files
committed
feat: stabilize large-model web bridge loading
Enable pthread/memory64 bridge builds with a higher wasm64 memory ceiling and add fetch-backed stream resume handling for large Hugging Face model downloads. This keeps unsplit multi-GB model loads reliable in cross-origin-isolated browser runtimes.
1 parent c0f9596 commit 2f9d355

7 files changed

Lines changed: 2454 additions & 284 deletions

File tree

CMakeLists.txt

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,22 @@ set(LLAMA_BUILD_HTML OFF CACHE BOOL "" FORCE)
2828
set(LLAMA_WASM_SINGLE_FILE OFF CACHE BOOL "" FORCE)
2929
set(LLAMA_WASM_MEM64 OFF CACHE BOOL "" FORCE)
3030

31+
option(LLAMADART_WEBGPU_MEM64 "Build WebGPU bridge core with wasm64/memory64" OFF)
32+
set(LLAMADART_WEBGPU_MEM64_MAX_MEMORY "12884901888" CACHE STRING "Max wasm64 linear memory in bytes")
33+
option(LLAMADART_WEBGPU_PTHREADS "Enable pthread support for bridge runtime" ON)
34+
set(LLAMADART_WEBGPU_PTHREAD_POOL_SIZE "2" CACHE STRING "PThread pool size for bridge runtime")
35+
36+
if (LLAMADART_WEBGPU_PTHREADS)
37+
add_compile_options("-pthread")
38+
add_link_options("-pthread")
39+
endif()
40+
41+
if (LLAMADART_WEBGPU_MEM64)
42+
set(LLAMA_WASM_MEM64 ON CACHE BOOL "" FORCE)
43+
add_compile_options("-sMEMORY64=1")
44+
add_link_options("-sMEMORY64=1" "-sWASM_BIGINT=1")
45+
endif()
46+
3147
set(GGML_NATIVE OFF CACHE BOOL "" FORCE)
3248
set(GGML_OPENMP OFF CACHE BOOL "" FORCE)
3349
set(GGML_WEBGPU ON CACHE BOOL "" FORCE)
@@ -103,19 +119,48 @@ target_include_directories(llama_webgpu_core PRIVATE
103119

104120
target_link_libraries(llama_webgpu_core PRIVATE llama llamadart_mtmd)
105121

106-
target_link_options(llama_webgpu_core PRIVATE
122+
set(LLAMADART_WEBGPU_LINK_OPTIONS
107123
"-sALLOW_MEMORY_GROWTH=1"
124+
"-sWASMFS=1"
125+
"-sASYNCIFY=1"
126+
"-sFORCE_FILESYSTEM=1"
108127
"-sASSERTIONS=1"
109-
"-sMIN_SAFARI_VERSION=170400"
110128
"-sMODULARIZE=1"
111129
"-sEXPORT_ES6=1"
112130
"-sEXPORT_NAME=createLlamaWebGpuCoreModule"
113131
"-sENVIRONMENT=web,worker"
114132
"-sEXPORTED_RUNTIME_METHODS=['FS','ccall','UTF8ToString']"
115-
"-sEXPORTED_FUNCTIONS=['_main','_llamadart_webgpu_probe','_llamadart_webgpu_backends_json','_llamadart_webgpu_last_error','_llamadart_webgpu_set_log_level','_llamadart_webgpu_load_model','_llamadart_webgpu_mmproj_load','_llamadart_webgpu_mmproj_free','_llamadart_webgpu_mmproj_supports_vision','_llamadart_webgpu_mmproj_supports_audio','_llamadart_webgpu_media_clear_pending','_llamadart_webgpu_media_add_file','_llamadart_webgpu_media_add_encoded','_llamadart_webgpu_media_add_rgb','_llamadart_webgpu_media_add_audio_f32','_llamadart_webgpu_tokenize_to_json','_llamadart_webgpu_last_tokens_json','_llamadart_webgpu_detokenize_from_json','_llamadart_webgpu_last_detokenized','_llamadart_webgpu_generate','_llamadart_webgpu_begin_generation','_llamadart_webgpu_next_token','_llamadart_webgpu_last_piece','_llamadart_webgpu_end_generation','_llamadart_webgpu_request_cancel','_llamadart_webgpu_last_output','_llamadart_webgpu_get_context_size','_llamadart_webgpu_model_meta_json','_llamadart_webgpu_shutdown']"
133+
"-sEXPORTED_FUNCTIONS=['_main','_llamadart_webgpu_probe','_llamadart_webgpu_backends_json','_llamadart_webgpu_last_error','_llamadart_webgpu_set_log_level','_llamadart_webgpu_load_model','_llamadart_webgpu_load_model_from_url','_llamadart_webgpu_mmproj_load','_llamadart_webgpu_mmproj_free','_llamadart_webgpu_mmproj_supports_vision','_llamadart_webgpu_mmproj_supports_audio','_llamadart_webgpu_media_clear_pending','_llamadart_webgpu_media_add_file','_llamadart_webgpu_media_add_encoded','_llamadart_webgpu_media_add_rgb','_llamadart_webgpu_media_add_audio_f32','_llamadart_webgpu_tokenize_to_json','_llamadart_webgpu_last_tokens_json','_llamadart_webgpu_detokenize_from_json','_llamadart_webgpu_last_detokenized','_llamadart_webgpu_generate','_llamadart_webgpu_begin_generation','_llamadart_webgpu_next_token','_llamadart_webgpu_last_piece','_llamadart_webgpu_end_generation','_llamadart_webgpu_request_cancel','_llamadart_webgpu_last_output','_llamadart_webgpu_get_context_size','_llamadart_webgpu_model_meta_json','_llamadart_webgpu_shutdown']"
134+
"-lwasmfs_fetch.js"
116135
)
117136

137+
if (LLAMADART_WEBGPU_PTHREADS)
138+
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS
139+
"-sPTHREAD_POOL_SIZE=${LLAMADART_WEBGPU_PTHREAD_POOL_SIZE}"
140+
)
141+
endif()
142+
143+
if (LLAMADART_WEBGPU_MEM64)
144+
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS
145+
"-sMEMORY64=1"
146+
"-sWASM_BIGINT=1"
147+
"-sMAXIMUM_MEMORY=${LLAMADART_WEBGPU_MEM64_MAX_MEMORY}"
148+
)
149+
else()
150+
list(APPEND LLAMADART_WEBGPU_LINK_OPTIONS
151+
"-sMIN_SAFARI_VERSION=170400"
152+
"-sMAXIMUM_MEMORY=4294967296"
153+
)
154+
endif()
155+
156+
target_link_options(llama_webgpu_core PRIVATE ${LLAMADART_WEBGPU_LINK_OPTIONS})
157+
158+
set(LLAMADART_WEBGPU_CORE_OUTPUT_NAME "llama_webgpu_core")
159+
if (LLAMADART_WEBGPU_MEM64)
160+
set(LLAMADART_WEBGPU_CORE_OUTPUT_NAME "llama_webgpu_core_mem64")
161+
endif()
162+
118163
set_target_properties(llama_webgpu_core PROPERTIES
119-
OUTPUT_NAME "llama_webgpu_core"
164+
OUTPUT_NAME "${LLAMADART_WEBGPU_CORE_OUTPUT_NAME}"
120165
SUFFIX ".js"
121166
)

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,29 @@ Useful environment variables:
2626
- `LLAMA_CPP_DIR` (path to llama.cpp source)
2727
- `BUILD_DIR` (cmake build dir)
2828
- `OUT_DIR` (output directory; defaults to `dist/`)
29+
- `WEBGPU_BRIDGE_BUILD_MEM64` (`1` to also build optional wasm64 core assets)
30+
- `WEBGPU_BRIDGE_MEM64_MAX_MEMORY` (optional wasm64 max linear memory bytes)
31+
- `WEBGPU_BRIDGE_PTHREADS` (`1`/`0`, defaults to `1`)
32+
- `WEBGPU_BRIDGE_PTHREAD_POOL_SIZE` (defaults to `2`)
33+
34+
Notes:
35+
36+
- wasm64 builds default to `WEBGPU_BRIDGE_MEM64_MAX_MEMORY=12884901888` (12 GiB).
37+
- Large single-file remote model loading requires a cross-origin isolated page
38+
(`COOP`/`COEP`) so worker-thread runtime paths are available.
2939

3040
Build outputs:
3141

3242
- `dist/llama_webgpu_bridge.js`
43+
- `dist/llama_webgpu_bridge_worker.js`
3344
- `dist/llama_webgpu_core.js`
3445
- `dist/llama_webgpu_core.wasm`
3546

47+
Optional outputs (when `WEBGPU_BRIDGE_BUILD_MEM64=1`):
48+
49+
- `dist/llama_webgpu_core_mem64.js`
50+
- `dist/llama_webgpu_core_mem64.wasm`
51+
3652
## CI
3753

3854
This repo includes a wasm build gate in:
@@ -67,6 +83,7 @@ Example publish:
6783
After publish, assets are CDN-available at:
6884

6985
- `https://cdn.jsdelivr.net/gh/leehack/llama-web-bridge-assets@v0.1.1/llama_webgpu_bridge.js`
86+
- `https://cdn.jsdelivr.net/gh/leehack/llama-web-bridge-assets@v0.1.1/llama_webgpu_bridge_worker.js`
7087
- `https://cdn.jsdelivr.net/gh/leehack/llama-web-bridge-assets@v0.1.1/llama_webgpu_core.js`
7188
- `https://cdn.jsdelivr.net/gh/leehack/llama-web-bridge-assets@v0.1.1/llama_webgpu_core.wasm`
7289

0 commit comments

Comments
 (0)