Skip to content

Commit 953b9a4

Browse files
authored
Merge pull request #15 from SebaSOFT/growth/njs-growth-07-benchmark-harness-results
Build NJS-GROWTH-07 benchmark harness, measured result charts, and AI-rule-safety carousel
2 parents d4f8c5a + 3954bbf commit 953b9a4

47 files changed

Lines changed: 3441 additions & 48 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,22 @@ Do not use `neuron-js` when a simple hardcoded condition is clearer and rarely c
4343

4444
## Public proof assets
4545

46-
Neuron-JS proof material is published as methodology and inspectability artifacts first. Benchmark result claims remain blocked until the benchmark harness emits real measured output with `claims_allowed: true`.
46+
Neuron-JS proof material is published as methodology, measured benchmarks, and inspectability artifacts. Benchmark numbers come from a real `actual_benchmark` harness run only — reproduce them with `yarn benchmark`.
4747

4848
<p align="center">
4949
<img src="docs/benchmarks/assets/generated/explainability-trace-diagram.svg" alt="Neuron-JS diagram showing rule JSON and business input flowing through schema validation, developer registry, deterministic Synapse evaluation, result output, explanation trace, and audit-ready decision." width="760">
5050
</p>
5151

52+
### Benchmark results
53+
54+
Measured throughput, cold-start, bundle-size, validation, and explanation overhead across the pricing, eligibility, and workflow-routing scenarios, compared against `json-rules-engine`, `json-logic-js`, a hand-coded TypeScript baseline, and `rule-engine-js`. See the full charts, results table, and provenance:
55+
56+
- Benchmark results (charts + data + provenance): [`docs/benchmarks/results.md`](docs/benchmarks/results.md)
57+
- Reproduce locally: `yarn benchmark` then `yarn benchmark:charts`
58+
- Raw measured output: [`benchmarks/results/latest.actual.json`](benchmarks/results/latest.actual.json)
59+
60+
### Methodology and inspectability
61+
5262
- Benchmark methodology and result contract: [`docs/benchmarks/methodology.md`](docs/benchmarks/methodology.md)
5363
- Explainability proof metadata and alt text: [`docs/benchmarks/assets/generated/explainability-trace-diagram.md`](docs/benchmarks/assets/generated/explainability-trace-diagram.md)
5464
- Visual proof system and publication guardrails: [`docs/benchmarks/visual-proof-system.md`](docs/benchmarks/visual-proof-system.md)

benchmarks/charts/generate.ts

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
// Renders the five benchmark charts from real harness output.
2+
//
3+
// Every visible number is read verbatim from benchmarks/results/latest.actual.json
4+
// (no averaging/derivation), so charts cannot drift from measured data. Comparison
5+
// metrics use the pricing-discount / medium row; cold-start and bundle size are
6+
// engine-level constants. Run: yarn benchmark:charts
7+
import { readFileSync, writeFileSync } from "node:fs";
8+
import { fileURLToPath } from "node:url";
9+
10+
const RESULTS_PATH = fileURLToPath(
11+
new URL("../results/latest.actual.json", import.meta.url),
12+
);
13+
const OUT_DIR = fileURLToPath(
14+
new URL("../../docs/benchmarks/assets/generated/", import.meta.url),
15+
);
16+
const SOURCE_REF = "benchmarks/results/latest.actual.json";
17+
18+
interface Row {
19+
engine: string;
20+
scenario: string;
21+
input_size: string;
22+
throughput_decisions_per_second: number;
23+
p50_ms: number;
24+
p95_ms: number;
25+
cold_start_ms: number;
26+
bundle_size_minified_bytes: number;
27+
validation_overhead_ms: number;
28+
explanation_overhead_ms: number;
29+
node_version: string;
30+
package_version: string;
31+
commit_sha: string;
32+
}
33+
34+
interface ResultsFile {
35+
generated_at: string;
36+
results: Row[];
37+
}
38+
39+
const ENGINE_COLOR: Record<string, string> = {
40+
"@sebasoft/neuron-js": "#22d3ee",
41+
"json-rules-engine": "#a78bfa",
42+
"json-logic-js": "#fbbf24",
43+
"hand-coded-typescript": "#34d399",
44+
"rule-engine-js": "#94a3b8",
45+
};
46+
const ENGINES = Object.keys(ENGINE_COLOR);
47+
48+
const data = JSON.parse(readFileSync(RESULTS_PATH, "utf8")) as ResultsFile;
49+
50+
function rowFor(engine: string, scenario: string, size: string): Row {
51+
const row = data.results.find(
52+
(item) =>
53+
item.engine === engine &&
54+
item.scenario === scenario &&
55+
item.input_size === size,
56+
);
57+
if (!row) throw new Error(`Missing row ${engine}/${scenario}/${size}`);
58+
return row;
59+
}
60+
61+
const provenance = rowFor(ENGINES[0], "pricing-discount", "medium");
62+
63+
function escapeXml(value: string): string {
64+
return value.replace(/[<>&]/g, (c) =>
65+
c === "<" ? "&lt;" : c === ">" ? "&gt;" : "&amp;",
66+
);
67+
}
68+
69+
const groupSep = (n: number): string => n.toLocaleString("en-US");
70+
71+
interface ChartSpec {
72+
file: string;
73+
title: string;
74+
metric: keyof Row & string;
75+
unit: string;
76+
direction: "higher is better" | "lower is better";
77+
subtitle: string;
78+
format: (value: number) => string;
79+
/** Note rendered when most engines are zero (differentiator metrics). */
80+
differentiator?: string;
81+
}
82+
83+
const charts: ChartSpec[] = [
84+
{
85+
file: "benchmark-chart-throughput.svg",
86+
title: "Throughput comparison",
87+
metric: "throughput_decisions_per_second",
88+
unit: "decisions / second",
89+
direction: "higher is better",
90+
subtitle: "pricing-discount · medium (10,000 decisions)",
91+
format: (v) => `${groupSep(Math.round(v))} dec/s`,
92+
},
93+
{
94+
file: "benchmark-chart-cold-start.svg",
95+
title: "Cold-start comparison",
96+
metric: "cold_start_ms",
97+
unit: "milliseconds",
98+
direction: "lower is better",
99+
subtitle: "engine import + first decision, fresh process (median of 5)",
100+
format: (v) => `${v} ms`,
101+
},
102+
{
103+
file: "benchmark-chart-bundle-size.svg",
104+
title: "Bundle-size comparison",
105+
metric: "bundle_size_minified_bytes",
106+
unit: "minified bytes (esbuild, node platform)",
107+
direction: "lower is better",
108+
subtitle: "minified bundle of the engine's public surface",
109+
format: (v) =>
110+
v === 0 ? "0 B (no engine dependency)" : `${groupSep(v)} B`,
111+
},
112+
{
113+
file: "benchmark-chart-validation-overhead.svg",
114+
title: "Validation overhead",
115+
metric: "validation_overhead_ms",
116+
unit: "milliseconds per decision",
117+
direction: "lower is better",
118+
subtitle: "pricing-discount · validateScript delta · medium",
119+
format: (v) => `${v} ms`,
120+
differentiator:
121+
"Neuron-JS differentiator: competitors provide no schema-validation step (0).",
122+
},
123+
{
124+
file: "benchmark-chart-explanation-overhead.svg",
125+
title: "Explanation overhead",
126+
metric: "explanation_overhead_ms",
127+
unit: "milliseconds per decision",
128+
direction: "lower is better",
129+
subtitle: "pricing-discount · explainExecution delta · medium",
130+
format: (v) => `${v} ms`,
131+
differentiator:
132+
"Neuron-JS differentiator: competitors provide no explanation trace (0).",
133+
},
134+
];
135+
136+
const W = 1200;
137+
const H = 675;
138+
const BAR_X = 250;
139+
const BAR_W = 760;
140+
const FIRST_Y = 196;
141+
const ROW_H = 70;
142+
const BAR_H = 40;
143+
144+
// Comparison row: pricing-discount / medium. cold-start and bundle-size are
145+
// engine-level constants, so this row carries their values too.
146+
const CHART_SCENARIO = "pricing-discount";
147+
148+
function render(spec: ChartSpec): string {
149+
const rows = ENGINES.map((engine) => ({
150+
engine,
151+
value: rowFor(engine, CHART_SCENARIO, "medium")[spec.metric] as number,
152+
}));
153+
const max = Math.max(...rows.map((r) => r.value), 1);
154+
155+
const bars = rows
156+
.map((r, i) => {
157+
const y = FIRST_Y + i * ROW_H;
158+
const width = max > 0 ? (r.value / max) * BAR_W : 0;
159+
const color = ENGINE_COLOR[r.engine];
160+
const labelInside = width > 220;
161+
const valueX = labelInside ? BAR_X + width - 12 : BAR_X + width + 12;
162+
const valueAnchor = labelInside ? "end" : "start";
163+
const valueFill = labelInside ? "#020617" : "#e5e7eb";
164+
return ` <g>
165+
<text x="${BAR_X - 16}" y="${y + BAR_H / 2 + 6}" text-anchor="end" class="mono engine">${escapeXml(r.engine)}</text>
166+
<rect x="${BAR_X}" y="${y}" width="${BAR_W}" height="${BAR_H}" rx="8" fill="#0f172a" stroke="#1e293b"/>
167+
<rect x="${BAR_X}" y="${y}" width="${width.toFixed(1)}" height="${BAR_H}" rx="8" fill="${color}"/>
168+
<text x="${valueX.toFixed(1)}" y="${y + BAR_H / 2 + 6}" text-anchor="${valueAnchor}" class="mono value" fill="${valueFill}">${escapeXml(spec.format(r.value))}</text>
169+
</g>`;
170+
})
171+
.join("\n");
172+
173+
const note = spec.differentiator
174+
? `<text x="${BAR_X}" y="${FIRST_Y + ENGINES.length * ROW_H + 24}" class="mono note" fill="#22d3ee">${escapeXml(spec.differentiator)}</text>`
175+
: "";
176+
177+
const footer = `Source: ${SOURCE_REF} · commit ${provenance.commit_sha.slice(0, 10)} · ${provenance.node_version} · ${data.generated_at.slice(0, 10)}`;
178+
const altDesc = `${spec.title} (${spec.unit}, ${spec.direction}) by engine from measured Neuron-JS benchmark output: ${rows.map((r) => `${r.engine} ${spec.format(r.value)}`).join(", ")}.`;
179+
180+
return `<svg xmlns="http://www.w3.org/2000/svg" width="${W}" height="${H}" viewBox="0 0 ${W} ${H}" role="img" aria-labelledby="t d">
181+
<title id="t">${escapeXml(spec.title)}</title>
182+
<desc id="d">${escapeXml(altDesc)}</desc>
183+
<metadata>Asset: NJS-GROWTH-07 ${spec.file}. Source: ${SOURCE_REF}. Generated by benchmarks/charts/generate.ts from measured actual_benchmark output. ${footer}</metadata>
184+
<defs>
185+
<pattern id="grid" width="40" height="40" patternUnits="userSpaceOnUse"><path d="M40 0H0v40" fill="none" stroke="#1e293b" stroke-width="0.7"/></pattern>
186+
<style><![CDATA[
187+
.text{font-family:Inter,system-ui,-apple-system,"Segoe UI",sans-serif;fill:#e5e7eb}
188+
.mono{font-family:"JetBrains Mono",SFMono-Regular,Menlo,Consolas,monospace}
189+
.title{font-size:38px;font-weight:750;letter-spacing:-.03em;fill:#e5e7eb}
190+
.sub{font-size:19px;font-weight:500;fill:#94a3b8}
191+
.engine{font-size:17px;fill:#cbd5e1}
192+
.value{font-size:17px;font-weight:600}
193+
.note{font-size:16px}
194+
.foot{font-size:15px;fill:#64748b}
195+
.metric{font-size:16px;fill:#94a3b8}
196+
]]></style>
197+
</defs>
198+
<rect width="${W}" height="${H}" fill="#020617"/>
199+
<rect width="${W}" height="${H}" fill="url(#grid)" opacity=".6"/>
200+
<rect x="24" y="24" width="${W - 48}" height="${H - 48}" rx="22" fill="rgba(15,23,42,.55)" stroke="#1e293b" stroke-width="2"/>
201+
<text x="${BAR_X - 16}" y="92" text-anchor="end" class="text title">${escapeXml(spec.title)}</text>
202+
<text x="${BAR_X - 16}" y="124" text-anchor="end" class="text sub">${escapeXml(spec.subtitle)}</text>
203+
<text x="${BAR_X}" y="92" class="metric">metric: ${escapeXml(spec.metric)}</text>
204+
<text x="${BAR_X}" y="124" class="metric">unit: ${escapeXml(spec.unit)} · ${spec.direction}</text>
205+
${bars}
206+
${note}
207+
<text x="${BAR_X - 16}" y="${H - 40}" text-anchor="end" class="mono foot">Measured · no fabricated values</text>
208+
<text x="${BAR_X}" y="${H - 40}" class="mono foot">${escapeXml(footer)}</text>
209+
</svg>
210+
`;
211+
}
212+
213+
for (const spec of charts) {
214+
const svg = render(spec);
215+
writeFileSync(`${OUT_DIR}${spec.file}`, svg, "utf8");
216+
process.stdout.write(`wrote ${spec.file}\n`);
217+
}
218+
219+
// Generated showcase page (docs/benchmarks/results.md), built from the same data
220+
// so chart and table values can never drift from the measured source.
221+
const PAGE_PATH = fileURLToPath(
222+
new URL("../../docs/benchmarks/results.md", import.meta.url),
223+
);
224+
225+
function tableRows(): string {
226+
const lines: string[] = [];
227+
for (const engine of ENGINES) {
228+
for (const scenario of [
229+
"pricing-discount",
230+
"eligibility-approval",
231+
"workflow-routing",
232+
]) {
233+
const r = rowFor(engine, scenario, "medium");
234+
lines.push(
235+
`| \`${r.engine}\` | ${r.scenario} | ${groupSep(Math.round(r.throughput_decisions_per_second))} | ${r.p50_ms} | ${r.p95_ms} | ${r.cold_start_ms} | ${groupSep(r.bundle_size_minified_bytes)} | ${r.validation_overhead_ms} | ${r.explanation_overhead_ms} |`,
236+
);
237+
}
238+
}
239+
return lines.join("\n");
240+
}
241+
242+
const chartSection = charts
243+
.map(
244+
(spec) => `## ${spec.title}
245+
246+
metric: \`${spec.metric}\` · ${spec.unit} · _${spec.direction}_
247+
248+
![${spec.title} by engine, measured Neuron-JS benchmark output.](./assets/generated/${spec.file})
249+
${spec.differentiator ? `\n${spec.differentiator}\n` : ""}`,
250+
)
251+
.join("\n");
252+
253+
const page = `<!-- GENERATED by benchmarks/charts/generate.ts from benchmarks/results/latest.actual.json. Do not edit by hand; run \`yarn benchmark && yarn benchmark:charts\`. -->
254+
# Benchmark results
255+
256+
These are **measured** results produced by the Neuron-JS benchmark harness
257+
(\`yarn benchmark\`). They compare \`@sebasoft/neuron-js\` against \`json-rules-engine\`,
258+
\`json-logic-js\`, a hand-coded TypeScript baseline, and \`rule-engine-js\` across the
259+
pricing, eligibility, and workflow-routing scenarios. See the
260+
[methodology](./methodology) for how each metric is collected.
261+
262+
Numbers reflect a single machine, Node version, and commit; reproduce locally before
263+
citing. No value on this page is hand-entered — charts and the table below are generated
264+
from the same source file.
265+
266+
## Provenance
267+
268+
| Field | Value |
269+
| --- | --- |
270+
| Generated | \`${data.generated_at}\` |
271+
| Node | \`${provenance.node_version}\` |
272+
| Commit | \`${provenance.commit_sha}\` |
273+
| Neuron-JS version | \`${provenance.package_version}\` |
274+
| Command | \`yarn benchmark\` |
275+
| Raw source | \`${SOURCE_REF}\` |
276+
277+
${chartSection}
278+
## Full results (medium · 10,000 decisions)
279+
280+
Throughput is decisions/second (higher is better); all latency, cold-start, and overhead
281+
columns are milliseconds (lower is better); bundle size is minified bytes.
282+
283+
| Engine | Scenario | Throughput | p50 ms | p95 ms | Cold start ms | Bundle B | Validation ms | Explanation ms |
284+
| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
285+
${tableRows()}
286+
287+
Validation and explanation overhead are Neuron-JS capabilities (\`validateScript\`,
288+
\`explainExecution\`); the other engines provide no equivalent step, so their measured
289+
delta is \`0\`.
290+
`;
291+
292+
writeFileSync(PAGE_PATH, page, "utf8");
293+
process.stdout.write(`wrote ${PAGE_PATH}\n`);
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { readPath } from "../read-path.ts";
2+
import type { Adapter, Decision, Runner, ScenarioDef } from "../types.ts";
3+
4+
/**
5+
* Hand-coded TypeScript baseline. No engine: a direct `>=` comparison, the
6+
* floor cost any rules engine is measured against.
7+
*/
8+
export const handCodedAdapter: Adapter = {
9+
engine: "hand-coded-typescript",
10+
prepare(scenario: ScenarioDef): Runner {
11+
return (): Decision => {
12+
const value = readPath(scenario.data, scenario.factPath);
13+
const matched = typeof value === "number" && value >= scenario.threshold;
14+
return scenario.decide(matched);
15+
};
16+
},
17+
};
18+
19+
/** Canonical export consumed by the cold-start probe. */
20+
export const adapter = handCodedAdapter;
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import jsonLogic from "json-logic-js";
2+
import type { Adapter, Decision, Runner, ScenarioDef } from "../types.ts";
3+
4+
/**
5+
* JsonLogic (json-logic-js) adapter. Expresses each scenario as a `>=` predicate
6+
* over the nested data and derives the canonical decision from the boolean result.
7+
*/
8+
export const jsonLogicAdapter: Adapter = {
9+
engine: "json-logic-js",
10+
prepare(scenario: ScenarioDef): Runner {
11+
const rule = {
12+
">=": [{ var: scenario.factPath }, scenario.threshold],
13+
};
14+
15+
return (): Decision => {
16+
const matched = jsonLogic.apply(rule, scenario.data) === true;
17+
return scenario.decide(matched);
18+
};
19+
},
20+
};
21+
22+
/** Canonical export consumed by the cold-start probe. */
23+
export const adapter = jsonLogicAdapter;
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { Engine } from "json-rules-engine";
2+
import type { Adapter, Decision, Runner, ScenarioDef } from "../types.ts";
3+
4+
/**
5+
* json-rules-engine adapter. Builds one Engine with a single `>=` rule per
6+
* scenario, then resolves the canonical decision from the fired events. The
7+
* engine API is promise-based, so the runner is async.
8+
*/
9+
export const jsonRulesEngineAdapter: Adapter = {
10+
engine: "json-rules-engine",
11+
prepare(scenario: ScenarioDef): Runner {
12+
const engine = new Engine();
13+
engine.addRule({
14+
conditions: {
15+
all: [
16+
{
17+
fact: scenario.flatFactName,
18+
operator: "greaterThanInclusive",
19+
value: scenario.threshold,
20+
},
21+
],
22+
},
23+
event: { type: scenario.id },
24+
});
25+
26+
return async (): Promise<Decision> => {
27+
const { events } = await engine.run(scenario.flatFacts);
28+
return scenario.decide(events.length > 0);
29+
};
30+
},
31+
};
32+
33+
/** Canonical export consumed by the cold-start probe. */
34+
export const adapter = jsonRulesEngineAdapter;

0 commit comments

Comments
 (0)