diff --git a/skills/analysis/home-security-benchmark/scripts/generate-report.cjs b/skills/analysis/home-security-benchmark/scripts/generate-report.cjs index d5dda66d..9bddd296 100644 --- a/skills/analysis/home-security-benchmark/scripts/generate-report.cjs +++ b/skills/analysis/home-security-benchmark/scripts/generate-report.cjs @@ -109,6 +109,7 @@ function buildHTML(allResults, fixtureImages, { liveMode = false, liveStatus = n tokens: r.tokens || r.data?.tokenTotals?.total, perfSummary: r.perfSummary || r.data?.perfSummary || null, system: r.data?.system || {}, + serverParams: r.data?.serverParams || {}, tokenTotals: r.data?.tokenTotals || {}, suites: (r.data?.suites || []).map(s => ({ name: s.name, @@ -492,6 +493,15 @@ function renderPerformance() { let html = '
⚡ Performance
'; html += '
' + esc(run.model || '?') + ' — ' + shortDate(run.timestamp) + '
'; + if (run.serverParams && typeof run.serverParams === 'object' && Object.keys(run.serverParams).length > 0) { + let paramStr = ''; + for (const k in run.serverParams) { + if (paramStr) paramStr += ' | '; + paramStr += '' + esc(k) + ': ' + esc(String(run.serverParams[k])); + } + html += '
[Server Params] ' + paramStr + '
'; + } + // Hero cards html += '
'; const ttftAvg = perf?.ttft?.avgMs; diff --git a/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs b/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs index bf7969d8..e6f3b0b0 100644 --- a/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs +++ b/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs @@ -230,10 +230,14 @@ function suite(name, fn) { suites.push({ name, fn, tests: [] }); } +let targetServerParams = {}; +try { targetServerParams = JSON.parse(process.env.AEGIS_SERVER_PARAMS || '{}'); } catch { } + const results = { timestamp: new Date().toISOString(), gateway: GATEWAY_URL, vlm: VLM_URL || null, + serverParams: targetServerParams, system: {}, model: {}, suites: [], @@ -333,6 +337,9 @@ async function llmCall(messages, opts = {}) { ...(model && { model }), ...(temperature !== undefined && { temperature }), ...(opts.expectJSON && { top_p: 0.8 }), + // For JSON-expected tests on local servers, enable server-side JSON mode + // which activates prefix buffering to strip hallucinated artifacts + ...(opts.expectJSON && !isCloudApi && { response_format: { type: 'json_object' } }), ...(opts.tools && { tools: opts.tools }), // Model-family-specific params (e.g. reasoning_effort:'none' for Mistral). // These are merged last so they take precedence over defaults.