Skip to content

Commit 618845b

Browse files
unamedkrclaude
andcommitted
S5: WASM demo — one-click "Try Demo" with auto-download model
The WASM demo (189 KB, GitHub Pages) now has a prominent "Try Demo" button that auto-downloads SmolLM2-135M (~135 MB) from HuggingFace directly in the browser. No install, no API key, no server. Flow: click → download with progress bar → load into WASM → chat. Previously users had to find and drag-drop a GGUF file manually. Implementation: - loadDemoModel(): fetch() from HuggingFace with streaming progress - loadModelFromBytes(): shared path for both demo and file-drop - loadModel(file): refactored to delegate to loadModelFromBytes README: added "Try in your browser" link to the WASM demo page. Strategy S5 from docs/strategy_progressive_kv.md: COMPLETE. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent cc74de2 commit 618845b

2 files changed

Lines changed: 78 additions & 19 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ m = Model("llama-3b.gguf", context_length=32768) # fits in 8GB; FP32 would OOM
6565

6666
Pre-built wheels for Linux x86_64/aarch64, macOS arm64 (Python 3.9-3.13). Other platforms compile from source automatically.
6767

68+
**Try in your browser (no install):** [WASM Demo](https://quantumaikr.github.io/quant.cpp/) — 189 KB engine, click "Try Demo" to auto-load a model.
69+
6870
---
6971

7072
## The Problem

wasm/index.html

Lines changed: 76 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,16 @@ <h1>quant<span>.cpp</span></h1>
9797
</div>
9898

9999
<div class="main">
100-
<div class="dropzone" id="dropzone" onclick="document.getElementById('fileInput').click()">
101-
<h2>Drop a GGUF model here</h2>
102-
<p>Or click to browse. Recommended: SmolLM2-135M-Instruct (270MB) for browser use.</p>
103-
<p style="margin-top:8px; color:#444">Runs entirely in your browser. Nothing uploaded to any server.</p>
100+
<div class="dropzone" id="dropzone">
101+
<h2>LLM in Your Browser — 189 KB</h2>
102+
<p style="margin-bottom:16px; color:#6ee7b7; font-size:15px">No install. No API key. No server. Just click.</p>
103+
<button id="demoBtn" onclick="loadDemoModel()" style="
104+
padding: 12px 32px; font-size: 16px; font-weight: 600;
105+
background: #059669; color: white; border: none; border-radius: 8px;
106+
cursor: pointer; margin-bottom: 12px;
107+
">▶ Try with SmolLM2-135M (~135 MB download)</button>
108+
<p style="color:#555; font-size:13px">Or <a href="#" onclick="document.getElementById('fileInput').click(); return false" style="color:#6ee7b7">drop your own GGUF</a> file.</p>
109+
<p style="margin-top:8px; color:#333; font-size:12px">Runs entirely in your browser. Nothing uploaded to any server.</p>
104110
<input type="file" id="fileInput" accept=".gguf" style="display:none">
105111
</div>
106112

@@ -158,6 +164,52 @@ <h2>Drop a GGUF model here</h2>
158164
document.getElementById('loading').classList.remove('active');
159165
}
160166

167+
// Demo model auto-download from HuggingFace
168+
async function loadDemoModel() {
169+
const url = 'https://huggingface.co/Felladrin/gguf-Q8_0-SmolLM2-135M-Instruct/resolve/main/smollm2-135m-instruct-q8_0.gguf';
170+
const btn = document.getElementById('demoBtn');
171+
btn.disabled = true;
172+
btn.textContent = 'Downloading...';
173+
showLoading('Downloading SmolLM2-135M (~135 MB)...');
174+
175+
try {
176+
const response = await fetch(url);
177+
if (!response.ok) throw new Error(`HTTP ${response.status}`);
178+
179+
const total = parseInt(response.headers.get('content-length') || '0');
180+
const reader = response.body.getReader();
181+
const chunks = [];
182+
let received = 0;
183+
184+
while (true) {
185+
const { done, value } = await reader.read();
186+
if (done) break;
187+
chunks.push(value);
188+
received += value.length;
189+
if (total > 0) {
190+
const pct = Math.floor(received * 100 / total);
191+
const mb = (received / 1048576).toFixed(0);
192+
const totalMb = (total / 1048576).toFixed(0);
193+
document.getElementById('loadingText').textContent =
194+
`Downloading SmolLM2-135M... ${pct}% (${mb}/${totalMb} MB)`;
195+
}
196+
}
197+
198+
// Combine chunks into a single ArrayBuffer
199+
const blob = new Blob(chunks);
200+
const arrayBuffer = await blob.arrayBuffer();
201+
const data = new Uint8Array(arrayBuffer);
202+
203+
document.getElementById('loadingText').textContent = 'Loading model into WASM...';
204+
loadModelFromBytes(data, 'smollm2-135m-instruct-q8_0.gguf');
205+
} catch (err) {
206+
hideLoading();
207+
btn.disabled = false;
208+
btn.textContent = '▶ Try with SmolLM2-135M (~135 MB download)';
209+
alert('Download failed: ' + err.message + '\n\nTry dropping a local GGUF file instead.');
210+
}
211+
}
212+
161213
function addMessage(role, text) {
162214
const chat = document.getElementById('chat');
163215
const div = document.createElement('div');
@@ -176,40 +228,45 @@ <h2>Drop a GGUF model here</h2>
176228
.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
177229
}
178230

179-
async function loadModel(file) {
180-
showLoading(`Loading ${file.name} (${(file.size/1024/1024).toFixed(0)} MB)...`);
181-
addMessage('system', `Loading ${file.name}...`);
182-
231+
function loadModelFromBytes(bytes, name) {
232+
// Shared model loading from Uint8Array (used by both file drop and demo download)
183233
try {
184-
const buffer = await file.arrayBuffer();
185-
const bytes = new Uint8Array(buffer);
186-
187-
// Write to WASM filesystem
188234
Module.FS.writeFile('/model.gguf', bytes);
189-
190235
showLoading('Initializing model...');
191-
192-
// Call C function to load
193236
const rc = Module._wasm_load_model(Module.allocateUTF8('/model.gguf'));
194-
195237
if (rc === 0) {
196238
modelLoaded = true;
239+
const dropzone = document.getElementById('dropzone');
197240
dropzone.classList.add('loaded');
198-
dropzone.innerHTML = `<h2>✓ ${file.name} (${(file.size/1024/1024).toFixed(0)} MB)</h2>`;
241+
dropzone.innerHTML = `<h2>✓ ${name} (${(bytes.length/1048576).toFixed(0)} MB)</h2>
242+
<p style="color:#6ee7b7">KV compression active — 3x longer context</p>`;
199243
document.getElementById('kvBadge').style.display = '';
200244
document.getElementById('prompt').disabled = false;
201245
document.getElementById('sendBtn').disabled = false;
202246
document.getElementById('prompt').focus();
203-
addMessage('system', 'Model loaded! KV compression active (7x longer context).');
247+
addMessage('system', `Model loaded! ${name} (${(bytes.length/1048576).toFixed(0)} MB). Ask anything.`);
204248
} else {
205-
addMessage('system', 'Failed to load model. Try a smaller GGUF file.');
249+
addMessage('system', 'Failed to load model.');
206250
}
207251
} catch(e) {
208252
addMessage('system', `Error: ${e.message}`);
209253
}
210254
hideLoading();
211255
}
212256

257+
async function loadModel(file) {
258+
showLoading(`Loading ${file.name} (${(file.size/1024/1024).toFixed(0)} MB)...`);
259+
addMessage('system', `Loading ${file.name}...`);
260+
try {
261+
const buffer = await file.arrayBuffer();
262+
const bytes = new Uint8Array(buffer);
263+
loadModelFromBytes(bytes, file.name);
264+
} catch(e) {
265+
addMessage('system', `Error: ${e.message}`);
266+
}
267+
hideLoading();
268+
}
269+
213270
async function generate() {
214271
if (!modelLoaded || generating) return;
215272
const input = document.getElementById('prompt');

0 commit comments

Comments
 (0)