Skip to content

Commit a0b3feb

Browse files
authored
Merge pull request #170 from SharpAI/feature/benchmark-operations-center
Feature/benchmark operations center
2 parents 884e270 + b5bc285 commit a0b3feb

File tree

2 files changed

+234
-29
lines changed

2 files changed

+234
-29
lines changed

skills/analysis/home-security-benchmark/scripts/generate-report.cjs

Lines changed: 104 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ function generateReport(resultsDir = RESULTS_DIR, opts = {}) {
6161
}).filter(r => r.data);
6262

6363
// Load fixture images for Vision tab (base64)
64+
// Skip in live mode — saves ~43MB of base64 per regeneration, making per-test updates instant
6465
const fixtureImages = {};
65-
if (fs.existsSync(FIXTURES_DIR)) {
66+
if (!liveMode && fs.existsSync(FIXTURES_DIR)) {
6667
try {
6768
const frames = fs.readdirSync(FIXTURES_DIR).filter(f => /\.(png|jpg|jpeg)$/i.test(f));
6869
for (const f of frames) {
@@ -131,8 +132,8 @@ function buildHTML(allResults, fixtureImages, { liveMode = false, liveStatus = n
131132

132133
const fixtureJSON = JSON.stringify(fixtureImages);
133134

134-
// Live mode: auto-refresh meta tag
135-
const refreshMeta = liveMode ? '<meta http-equiv="refresh" content="5">' : '';
135+
// Live mode: JS-based reload (stateful, preserves active tab + scroll)
136+
const refreshMeta = '';
136137
const liveBannerHTML = liveMode ? buildLiveBanner(liveStatus) : '';
137138

138139
return `<!DOCTYPE html>
@@ -434,7 +435,7 @@ function buildSidebar() {
434435
let html = '';
435436
for (const [family, runs] of Object.entries(groups)) {
436437
html += '<div class="model-group">';
437-
html += '<div class="model-group-label" onclick="this.parentElement.classList.toggle(\'collapsed\')"><span class="arrow">▾</span> ' + esc(family) + ' <span style="color:var(--text-muted);font-weight:400">(' + runs.length + ')</span></div>';
438+
html += '<div class="model-group-label" onclick="this.parentElement.classList.toggle(&#39;collapsed&#39;)"><span class="arrow">▾</span> ' + esc(family) + ' <span style="color:var(--text-muted);font-weight:400">(' + runs.length + ')</span></div>';
438439
html += '<div class="run-list">';
439440
for (const r of runs.reverse()) {
440441
const sel = selectedIndices.has(r._idx);
@@ -508,6 +509,14 @@ function renderPerformance() {
508509
html += statCard('Server Decode', fmt(srvDecode), 'tok/s', 'From llama-server /metrics');
509510
html += statCard('Total Time', fmt(totalTime / 1000), 's', run.total + ' tests');
510511
html += statCard('Throughput', fmt(tokPerSec), 'tok/s', fmtK(run.tokens || 0) + ' total tokens');
512+
513+
// GPU & Memory cards (from resource samples)
514+
const res = perf?.resource;
515+
if (res) {
516+
html += statCard('GPU Utilization', res.gpu ? res.gpu.util + '' : '—', '%', res.gpu ? 'Renderer: ' + res.gpu.renderer + '% · Tiler: ' + res.gpu.tiler + '%' : 'MPS not available');
517+
html += statCard('GPU Memory', res.gpu?.memUsedGB != null ? fmt(res.gpu.memUsedGB) : '—', 'GB', res.gpu?.memAllocGB != null ? 'Alloc: ' + fmt(res.gpu.memAllocGB) + ' GB' : 'MPS not available');
518+
html += statCard('System Memory', fmt(res.sys?.usedGB), 'GB', 'of ' + fmt(res.sys?.totalGB) + ' GB total · Free: ' + fmt(res.sys?.freeGB) + ' GB');
519+
}
511520
html += '</div>';
512521
513522
// Comparison table if multiple selected
@@ -611,7 +620,36 @@ function renderQuality() {
611620
612621
// Multi-run comparison
613622
if (sel.length > 1) {
614-
html += '<div class="section-title">Quality Comparison</div>';
623+
// High-level summary comparison
624+
html += '<div class="section-title">Overall Comparison</div>';
625+
html += '<div class="table-wrap"><table class="compare-table"><thead><tr><th>Metric</th>';
626+
for (const r of sel) html += '<th class="model-col">' + esc(modelShort(r.model)) + '<br><span style="font-weight:400;font-size:0.68rem">' + shortDate(r.timestamp) + '</span></th>';
627+
html += '</tr></thead><tbody>';
628+
const hasVlm = sel.some(r => r.vlmTotal > 0);
629+
const hiRows = [
630+
['Pass Rate', r => r.total > 0 ? pct(r.passed, r.total) + '%' : '—'],
631+
['Score', r => r.passed + '/' + r.total],
632+
['LLM Score', r => r.llmTotal > 0 ? (r.llmPassed || 0) + '/' + (r.llmTotal || 0) : '—'],
633+
...(hasVlm ? [['VLM Score', r => r.vlmTotal > 0 ? (r.vlmPassed || 0) + '/' + (r.vlmTotal || 0) : '—']] : []),
634+
['Failed', r => String(r.failed)],
635+
['Time', r => fmt(r.timeMs / 1000) + 's'],
636+
['Throughput', r => r.timeMs > 0 && r.tokens ? fmt(r.tokens / (r.timeMs / 1000)) + ' tok/s' : '—'],
637+
];
638+
for (const [label, fn] of hiRows) {
639+
html += '<tr><td>' + label + '</td>';
640+
// Find best value for highlighting
641+
const vals = sel.map(fn);
642+
for (let i = 0; i < sel.length; i++) {
643+
const isBest = label === 'Failed' ? vals[i] === String(Math.min(...sel.map(r => r.failed))) :
644+
label === 'Pass Rate' ? vals[i] === pct(Math.max(...sel.map(r => r.passed)), sel[0].total) + '%' : false;
645+
html += '<td' + (isBest && sel.length > 1 ? ' style="color:var(--green);font-weight:600"' : '') + '>' + vals[i] + '</td>';
646+
}
647+
html += '</tr>';
648+
}
649+
html += '</tbody></table></div>';
650+
651+
// Per-suite breakdown
652+
html += '<div class="section-title">Suite Comparison</div>';
615653
html += '<div class="table-wrap"><table class="compare-table"><thead><tr><th>Suite</th>';
616654
for (const r of sel) html += '<th class="model-col">' + esc(modelShort(r.model)) + '</th>';
617655
html += '</tr></thead><tbody>';
@@ -823,9 +861,15 @@ function getActiveTab() {
823861
824862
function renderActiveTab() {
825863
const tab = getActiveTab();
826-
if (tab === 'performance') renderPerformance();
827-
else if (tab === 'quality') renderQuality();
828-
else if (tab === 'vision') renderVision();
864+
try {
865+
if (tab === 'performance') renderPerformance();
866+
else if (tab === 'quality') renderQuality();
867+
else if (tab === 'vision') renderVision();
868+
} catch (e) {
869+
const panel = document.getElementById('tab-' + tab);
870+
if (panel) panel.innerHTML = '<div style="color:var(--red);padding:2rem"><strong>Render error:</strong> ' + e.message + '<br><pre>' + e.stack + '</pre></div>';
871+
console.error('Tab render error:', e);
872+
}
829873
}
830874
831875
// ═══════════════════════════════════════════════════════════════════════════════
@@ -837,6 +881,52 @@ function refresh() {
837881
renderActiveTab();
838882
}
839883
884+
// ═══════════════════════════════════════════════════════════════════════════════
885+
// LIVE RELOAD (stateful — preserves tab + scroll)
886+
// ═══════════════════════════════════════════════════════════════════════════════
887+
const IS_LIVE = ${liveMode ? 'true' : 'false'};
888+
889+
function saveState() {
890+
try {
891+
sessionStorage.setItem('_bench_tab', getActiveTab());
892+
sessionStorage.setItem('_bench_scroll', String(window.scrollY));
893+
sessionStorage.setItem('_bench_selected', JSON.stringify([...selectedIndices]));
894+
sessionStorage.setItem('_bench_primary', String(primaryIndex));
895+
} catch {}
896+
}
897+
898+
function restoreState() {
899+
try {
900+
// Restore selection
901+
const savedSel = sessionStorage.getItem('_bench_selected');
902+
if (savedSel) {
903+
const arr = JSON.parse(savedSel).filter(i => i >= 0 && i < ALL_RUNS.length);
904+
if (arr.length > 0) { selectedIndices = new Set(arr); }
905+
}
906+
const savedPrimary = sessionStorage.getItem('_bench_primary');
907+
if (savedPrimary != null) {
908+
const pi = parseInt(savedPrimary);
909+
if (pi >= 0 && pi < ALL_RUNS.length) primaryIndex = pi;
910+
}
911+
// Restore tab
912+
const tab = sessionStorage.getItem('_bench_tab');
913+
if (tab && tab !== 'performance') {
914+
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
915+
document.querySelectorAll('.tab-panel').forEach(p => p.classList.remove('active'));
916+
const tabEl = document.querySelector('.tab[data-tab="' + tab + '"]');
917+
if (tabEl) tabEl.classList.add('active');
918+
const panel = document.getElementById('tab-' + tab);
919+
if (panel) panel.classList.add('active');
920+
}
921+
const scroll = parseInt(sessionStorage.getItem('_bench_scroll') || '0');
922+
if (scroll > 0) setTimeout(() => window.scrollTo(0, scroll), 50);
923+
} catch {}
924+
}
925+
926+
if (IS_LIVE) {
927+
setTimeout(() => { saveState(); location.reload(); }, 5000);
928+
}
929+
840930
// ═══════════════════════════════════════════════════════════════════════════════
841931
// INIT
842932
// ═══════════════════════════════════════════════════════════════════════════════
@@ -846,6 +936,7 @@ document.getElementById('btn-compare').addEventListener('click', () => {
846936
if (selectedIndices.size > 1) renderActiveTab();
847937
});
848938
939+
restoreState();
849940
refresh();
850941
</script>
851942
</body>
@@ -865,15 +956,17 @@ function buildLiveBanner(status) {
865956
if (!status) {
866957
return `<div class="live-banner"><span class="live-dot"></span> Benchmark starting\u2026</div>`;
867958
}
868-
const { suitesCompleted = 0, totalSuites = 0, currentSuite = '', startedAt = '' } = status;
959+
const { suitesCompleted = 0, totalSuites = 0, currentSuite = '', currentTest = '', testsCompleted = 0, startedAt = '' } = status;
869960
const pct = totalSuites > 0 ? Math.round((suitesCompleted / totalSuites) * 100) : 0;
870961
const elapsed = startedAt ? Math.round((Date.now() - new Date(startedAt).getTime()) / 1000) : 0;
871962
const elapsedStr = elapsed > 60 ? Math.floor(elapsed / 60) + 'm ' + (elapsed % 60) + 's' : elapsed + 's';
963+
const testInfo = currentTest ? ` — ✅ <em>${escHtml(currentTest)}</em>` : '';
872964
return `<div class="live-banner">
873965
<span class="live-dot"></span>
874966
<strong>LIVE</strong> — Suite ${suitesCompleted}/${totalSuites} (${pct}%)
875-
${currentSuite ? ' — <em>' + currentSuite + '</em>' : ''}
876-
<span style="margin-left:auto;font-size:0.78rem">${elapsedStr} elapsed</span>
967+
${currentSuite ? ' — 🔧 <em>' + escHtml(currentSuite) + '</em>' : ''}
968+
${testInfo}
969+
<span style="margin-left:auto;font-size:0.78rem">${testsCompleted} tests · ${elapsedStr} elapsed</span>
877970
<div class="live-progress"><div class="live-progress-bar" style="width:${pct}%"></div></div>
878971
</div>`;
879972
}

0 commit comments

Comments
 (0)