@@ -61,8 +61,9 @@ function generateReport(resultsDir = RESULTS_DIR, opts = {}) {
6161 } ) . filter ( r => r . data ) ;
6262
6363 // Load fixture images for Vision tab (base64)
64+ // Skip in live mode — saves ~43MB of base64 per regeneration, making per-test updates instant
6465 const fixtureImages = { } ;
65- if ( fs . existsSync ( FIXTURES_DIR ) ) {
66+ if ( ! liveMode && fs . existsSync ( FIXTURES_DIR ) ) {
6667 try {
6768 const frames = fs . readdirSync ( FIXTURES_DIR ) . filter ( f => / \. ( p n g | j p g | j p e g ) $ / i. test ( f ) ) ;
6869 for ( const f of frames ) {
@@ -131,8 +132,8 @@ function buildHTML(allResults, fixtureImages, { liveMode = false, liveStatus = n
131132
132133 const fixtureJSON = JSON . stringify ( fixtureImages ) ;
133134
134- // Live mode: auto-refresh meta tag
135- const refreshMeta = liveMode ? '<meta http-equiv="refresh" content="5">' : '' ;
135+ // Live mode: JS-based reload (stateful, preserves active tab + scroll)
136+ const refreshMeta = '' ;
136137 const liveBannerHTML = liveMode ? buildLiveBanner ( liveStatus ) : '' ;
137138
138139 return `<!DOCTYPE html>
@@ -434,7 +435,7 @@ function buildSidebar() {
434435 let html = '';
435436 for (const [family, runs] of Object.entries(groups)) {
436437 html += '<div class="model-group">';
437- html += '<div class="model-group-label" onclick="this.parentElement.classList.toggle(\' collapsed\' )"><span class="arrow">▾</span> ' + esc(family) + ' <span style="color:var(--text-muted);font-weight:400">(' + runs.length + ')</span></div>';
438+ html += '<div class="model-group-label" onclick="this.parentElement.classList.toggle(' collapsed' )"><span class="arrow">▾</span> ' + esc(family) + ' <span style="color:var(--text-muted);font-weight:400">(' + runs.length + ')</span></div>';
438439 html += '<div class="run-list">';
439440 for (const r of runs.reverse()) {
440441 const sel = selectedIndices.has(r._idx);
@@ -508,6 +509,14 @@ function renderPerformance() {
508509 html += statCard('Server Decode', fmt(srvDecode), 'tok/s', 'From llama-server /metrics');
509510 html += statCard('Total Time', fmt(totalTime / 1000), 's', run.total + ' tests');
510511 html += statCard('Throughput', fmt(tokPerSec), 'tok/s', fmtK(run.tokens || 0) + ' total tokens');
512+
513+ // GPU & Memory cards (from resource samples)
514+ const res = perf?.resource;
515+ if (res) {
516+ html += statCard('GPU Utilization', res.gpu ? res.gpu.util + '' : '—', '%', res.gpu ? 'Renderer: ' + res.gpu.renderer + '% · Tiler: ' + res.gpu.tiler + '%' : 'MPS not available');
517+ html += statCard('GPU Memory', res.gpu?.memUsedGB != null ? fmt(res.gpu.memUsedGB) : '—', 'GB', res.gpu?.memAllocGB != null ? 'Alloc: ' + fmt(res.gpu.memAllocGB) + ' GB' : 'MPS not available');
518+ html += statCard('System Memory', fmt(res.sys?.usedGB), 'GB', 'of ' + fmt(res.sys?.totalGB) + ' GB total · Free: ' + fmt(res.sys?.freeGB) + ' GB');
519+ }
511520 html += '</div>';
512521
513522 // Comparison table if multiple selected
@@ -611,7 +620,36 @@ function renderQuality() {
611620
612621 // Multi-run comparison
613622 if (sel.length > 1) {
614- html += '<div class="section-title">Quality Comparison</div>';
623+ // High-level summary comparison
624+ html += '<div class="section-title">Overall Comparison</div>';
625+ html += '<div class="table-wrap"><table class="compare-table"><thead><tr><th>Metric</th>';
626+ for (const r of sel) html += '<th class="model-col">' + esc(modelShort(r.model)) + '<br><span style="font-weight:400;font-size:0.68rem">' + shortDate(r.timestamp) + '</span></th>';
627+ html += '</tr></thead><tbody>';
628+ const hasVlm = sel.some(r => r.vlmTotal > 0);
629+ const hiRows = [
630+ ['Pass Rate', r => r.total > 0 ? pct(r.passed, r.total) + '%' : '—'],
631+ ['Score', r => r.passed + '/' + r.total],
632+ ['LLM Score', r => r.llmTotal > 0 ? (r.llmPassed || 0) + '/' + (r.llmTotal || 0) : '—'],
633+ ...(hasVlm ? [['VLM Score', r => r.vlmTotal > 0 ? (r.vlmPassed || 0) + '/' + (r.vlmTotal || 0) : '—']] : []),
634+ ['Failed', r => String(r.failed)],
635+ ['Time', r => fmt(r.timeMs / 1000) + 's'],
636+ ['Throughput', r => r.timeMs > 0 && r.tokens ? fmt(r.tokens / (r.timeMs / 1000)) + ' tok/s' : '—'],
637+ ];
638+ for (const [label, fn] of hiRows) {
639+ html += '<tr><td>' + label + '</td>';
640+ // Find best value for highlighting
641+ const vals = sel.map(fn);
642+ for (let i = 0; i < sel.length; i++) {
643+ const isBest = label === 'Failed' ? vals[i] === String(Math.min(...sel.map(r => r.failed))) :
644+ label === 'Pass Rate' ? vals[i] === pct(Math.max(...sel.map(r => r.passed)), sel[0].total) + '%' : false;
645+ html += '<td' + (isBest && sel.length > 1 ? ' style="color:var(--green);font-weight:600"' : '') + '>' + vals[i] + '</td>';
646+ }
647+ html += '</tr>';
648+ }
649+ html += '</tbody></table></div>';
650+
651+ // Per-suite breakdown
652+ html += '<div class="section-title">Suite Comparison</div>';
615653 html += '<div class="table-wrap"><table class="compare-table"><thead><tr><th>Suite</th>';
616654 for (const r of sel) html += '<th class="model-col">' + esc(modelShort(r.model)) + '</th>';
617655 html += '</tr></thead><tbody>';
@@ -823,9 +861,15 @@ function getActiveTab() {
823861
824862function renderActiveTab() {
825863 const tab = getActiveTab();
826- if (tab === 'performance') renderPerformance();
827- else if (tab === 'quality') renderQuality();
828- else if (tab === 'vision') renderVision();
864+ try {
865+ if (tab === 'performance') renderPerformance();
866+ else if (tab === 'quality') renderQuality();
867+ else if (tab === 'vision') renderVision();
868+ } catch (e) {
869+ const panel = document.getElementById('tab-' + tab);
870+ if (panel) panel.innerHTML = '<div style="color:var(--red);padding:2rem"><strong>Render error:</strong> ' + e.message + '<br><pre>' + e.stack + '</pre></div>';
871+ console.error('Tab render error:', e);
872+ }
829873}
830874
831875// ═══════════════════════════════════════════════════════════════════════════════
@@ -837,6 +881,52 @@ function refresh() {
837881 renderActiveTab();
838882}
839883
884+ // ═══════════════════════════════════════════════════════════════════════════════
885+ // LIVE RELOAD (stateful — preserves tab + scroll)
886+ // ═══════════════════════════════════════════════════════════════════════════════
887+ const IS_LIVE = ${ liveMode ? 'true' : 'false' } ;
888+
889+ function saveState() {
890+ try {
891+ sessionStorage.setItem('_bench_tab', getActiveTab());
892+ sessionStorage.setItem('_bench_scroll', String(window.scrollY));
893+ sessionStorage.setItem('_bench_selected', JSON.stringify([...selectedIndices]));
894+ sessionStorage.setItem('_bench_primary', String(primaryIndex));
895+ } catch {}
896+ }
897+
898+ function restoreState() {
899+ try {
900+ // Restore selection
901+ const savedSel = sessionStorage.getItem('_bench_selected');
902+ if (savedSel) {
903+ const arr = JSON.parse(savedSel).filter(i => i >= 0 && i < ALL_RUNS.length);
904+ if (arr.length > 0) { selectedIndices = new Set(arr); }
905+ }
906+ const savedPrimary = sessionStorage.getItem('_bench_primary');
907+ if (savedPrimary != null) {
908+ const pi = parseInt(savedPrimary);
909+ if (pi >= 0 && pi < ALL_RUNS.length) primaryIndex = pi;
910+ }
911+ // Restore tab
912+ const tab = sessionStorage.getItem('_bench_tab');
913+ if (tab && tab !== 'performance') {
914+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
915+ document.querySelectorAll('.tab-panel').forEach(p => p.classList.remove('active'));
916+ const tabEl = document.querySelector('.tab[data-tab="' + tab + '"]');
917+ if (tabEl) tabEl.classList.add('active');
918+ const panel = document.getElementById('tab-' + tab);
919+ if (panel) panel.classList.add('active');
920+ }
921+ const scroll = parseInt(sessionStorage.getItem('_bench_scroll') || '0');
922+ if (scroll > 0) setTimeout(() => window.scrollTo(0, scroll), 50);
923+ } catch {}
924+ }
925+
926+ if (IS_LIVE) {
927+ setTimeout(() => { saveState(); location.reload(); }, 5000);
928+ }
929+
840930// ═══════════════════════════════════════════════════════════════════════════════
841931// INIT
842932// ═══════════════════════════════════════════════════════════════════════════════
@@ -846,6 +936,7 @@ document.getElementById('btn-compare').addEventListener('click', () => {
846936 if (selectedIndices.size > 1) renderActiveTab();
847937});
848938
939+ restoreState();
849940refresh();
850941</script>
851942</body>
@@ -865,15 +956,17 @@ function buildLiveBanner(status) {
865956 if ( ! status ) {
866957 return `<div class="live-banner"><span class="live-dot"></span> Benchmark starting\u2026</div>` ;
867958 }
868- const { suitesCompleted = 0 , totalSuites = 0 , currentSuite = '' , startedAt = '' } = status ;
959+ const { suitesCompleted = 0 , totalSuites = 0 , currentSuite = '' , currentTest = '' , testsCompleted = 0 , startedAt = '' } = status ;
869960 const pct = totalSuites > 0 ? Math . round ( ( suitesCompleted / totalSuites ) * 100 ) : 0 ;
870961 const elapsed = startedAt ? Math . round ( ( Date . now ( ) - new Date ( startedAt ) . getTime ( ) ) / 1000 ) : 0 ;
871962 const elapsedStr = elapsed > 60 ? Math . floor ( elapsed / 60 ) + 'm ' + ( elapsed % 60 ) + 's' : elapsed + 's' ;
963+ const testInfo = currentTest ? ` — ✅ <em>${ escHtml ( currentTest ) } </em>` : '' ;
872964 return `<div class="live-banner">
873965 <span class="live-dot"></span>
874966 <strong>LIVE</strong> — Suite ${ suitesCompleted } /${ totalSuites } (${ pct } %)
875- ${ currentSuite ? ' — <em>' + currentSuite + '</em>' : '' }
876- <span style="margin-left:auto;font-size:0.78rem">${ elapsedStr } elapsed</span>
967+ ${ currentSuite ? ' — 🔧 <em>' + escHtml ( currentSuite ) + '</em>' : '' }
968+ ${ testInfo }
969+ <span style="margin-left:auto;font-size:0.78rem">${ testsCompleted } tests · ${ elapsedStr } elapsed</span>
877970 <div class="live-progress"><div class="live-progress-bar" style="width:${ pct } %"></div></div>
878971 </div>` ;
879972}
0 commit comments