diff --git a/skills/analysis/home-security-benchmark/scripts/generate-report.cjs b/skills/analysis/home-security-benchmark/scripts/generate-report.cjs
index d5dda66d..9bddd296 100644
--- a/skills/analysis/home-security-benchmark/scripts/generate-report.cjs
+++ b/skills/analysis/home-security-benchmark/scripts/generate-report.cjs
@@ -109,6 +109,7 @@ function buildHTML(allResults, fixtureImages, { liveMode = false, liveStatus = n
tokens: r.tokens || r.data?.tokenTotals?.total,
perfSummary: r.perfSummary || r.data?.perfSummary || null,
system: r.data?.system || {},
+ serverParams: r.data?.serverParams || {},
tokenTotals: r.data?.tokenTotals || {},
suites: (r.data?.suites || []).map(s => ({
name: s.name,
@@ -492,6 +493,15 @@ function renderPerformance() {
let html = '
';
+ if (run.serverParams && typeof run.serverParams === 'object' && Object.keys(run.serverParams).length > 0) {
+ let paramStr = '';
+ for (const k in run.serverParams) {
+ if (paramStr) paramStr += ' | ';
+ paramStr += '' + esc(k) + ': ' + esc(String(run.serverParams[k]));
+ }
+ html += '[Server Params] ' + paramStr + '
';
+ }
+
// Hero cards
html += '';
const ttftAvg = perf?.ttft?.avgMs;
diff --git a/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs b/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs
index bf7969d8..e6f3b0b0 100644
--- a/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs
+++ b/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs
@@ -230,10 +230,14 @@ function suite(name, fn) {
suites.push({ name, fn, tests: [] });
}
+let targetServerParams = {};
+try { targetServerParams = JSON.parse(process.env.AEGIS_SERVER_PARAMS || '{}'); } catch { }
+
const results = {
timestamp: new Date().toISOString(),
gateway: GATEWAY_URL,
vlm: VLM_URL || null,
+ serverParams: targetServerParams,
system: {},
model: {},
suites: [],
@@ -333,6 +337,9 @@ async function llmCall(messages, opts = {}) {
...(model && { model }),
...(temperature !== undefined && { temperature }),
...(opts.expectJSON && { top_p: 0.8 }),
+ // For JSON-expected tests on local servers, enable server-side JSON mode
+ // which activates prefix buffering to strip hallucinated artifacts
+ ...(opts.expectJSON && !isCloudApi && { response_format: { type: 'json_object' } }),
...(opts.tools && { tools: opts.tools }),
// Model-family-specific params (e.g. reasoning_effort:'none' for Mistral).
// These are merged last so they take precedence over defaults.