Skip to content

Commit 542fbd9

Browse files
JohnMcLearclaude
andauthored
fix(sim): align Scraper allowlist + Reporter CSV with what core actually emits (#99)
Confirmed from the first real dive run (25934713423): core's /stats/prometheus uses prom-client's collectDefaultMetrics output (process_cpu_user_seconds_total, nodejs_eventloop_lag_p95_seconds, process_resident_memory_bytes, ...) — not the nodejs_cpu_gauge / nodejs_eventloop_latency_gauge names that src/node/metrics.ts defines but never registers. The Scraper's default allowlist was filtering EVERYTHING out, so all dive reports had empty cpu_user / evloop_p95_ms / rss_mb columns. Two changes: 1. Update DEFAULT_KEEP prefixes to match real prom-client names. Includes 'etherpad_' as a single prefix that covers all current and future etherpad_ rows (including the three added in ether/etherpad#7762). 2. Update Reporter CSV column mapping to read process_cpu_user_seconds_total, nodejs_eventloop_lag_p95_seconds, and process_resident_memory_bytes (converting seconds -> ms and bytes -> MB as before). CSV column names stay stable; only the underlying lookup keys change. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 72588f4 commit 542fbd9

3 files changed

Lines changed: 31 additions & 16 deletions

File tree

src/sim/config.ts

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,24 @@ export interface ConfigInput {
2929
force?: boolean;
3030
}
3131

32+
// Default allowlist. Names come from what etherpad core's /stats/prometheus
33+
// actually emits: a few etherpad_-prefixed custom rows plus the prom-client
34+
// default metrics (process_cpu_*, process_resident_memory_bytes,
35+
// nodejs_eventloop_lag_*). Names are matched as prefixes against the base
36+
// metric name (before `{labels}`).
3237
const DEFAULT_KEEP = [
33-
'nodejs_cpu_gauge',
34-
'nodejs_eventloop_latency_gauge',
35-
'nodejs_memory_process_gauge',
36-
'nodejs_gc_gauge',
37-
'etherpad_total_users',
38-
'etherpad_active_pads',
39-
'etherpad_pad_users',
40-
'etherpad_changeset_apply_duration_seconds',
41-
'etherpad_socket_emits_total',
38+
// etherpad custom (active + planned via ether/etherpad#7762)
39+
'etherpad_',
40+
// prom-client defaults
41+
'process_cpu_', // process_cpu_user_seconds_total, _system_seconds_total
42+
'process_resident_memory_bytes', // RSS
43+
'process_heap_bytes',
44+
'nodejs_eventloop_lag', // nodejs_eventloop_lag_seconds + _p50/_p95/_p99/_max
45+
'nodejs_heap_size',
46+
'nodejs_active_handles',
47+
'nodejs_gc_duration_seconds',
48+
// ueberdb
49+
'ueberdb_stats',
4250
];
4351

4452
const requireNonNeg = (name: string, v: number | undefined): void => {

src/sim/reporter.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,22 @@ export class Reporter {
6969
};
7070
const rows = this.steps.map((s) => {
7171
const g = s.snapshot.gauges;
72-
const rssBytes = g['nodejs_memory_process_gauge{type=rss}'];
72+
// CSV column → /stats/prometheus row mapping. Names come from prom-client's
73+
// collectDefaultMetrics output that etherpad core actually emits.
74+
const rssBytes = g['process_resident_memory_bytes'];
7375
const rssMb = rssBytes !== undefined ? Math.round(rssBytes / 1_048_576) : undefined;
76+
const evloopP95s = g['nodejs_eventloop_lag_p95_seconds'];
77+
const evloopP95Ms = evloopP95s !== undefined ? Math.round(evloopP95s * 1000) : undefined;
78+
const cpuUserS = g['process_cpu_user_seconds_total'];
7479
return [
7580
s.step,
7681
fmt(s.latencyMs.p50),
7782
fmt(s.latencyMs.p95),
7883
fmt(s.latencyMs.p99),
7984
fmt(s.latencyMs.max),
8085
fmt(s.throughputCsps),
81-
cell(g, 'nodejs_cpu_gauge{type=user}'),
82-
cell(g, 'nodejs_eventloop_latency_gauge{type=p95}'),
86+
fmt(cpuUserS),
87+
fmt(evloopP95Ms),
8388
fmt(rssMb),
8489
cell(g, 'etherpad_total_users'),
8590
s.errors,
@@ -101,9 +106,11 @@ export class Reporter {
101106
];
102107
const rows = this.steps.map((s) => {
103108
const g = s.snapshot.gauges;
104-
const el = g['nodejs_eventloop_latency_gauge{type=p95}'] ?? '';
105-
const cpu = g['nodejs_cpu_gauge{type=user}'] ?? '';
106-
return `| ${s.step} | ${s.latencyMs.p50} | ${s.latencyMs.p95} | ${s.latencyMs.p99} | ${el} | ${cpu} | ${s.errors} | ${s.breakageFlags.join('|')} |`;
109+
const elS = g['nodejs_eventloop_lag_p95_seconds'];
110+
const elMs = elS !== undefined ? Math.round(elS * 1000) : '';
111+
const cpuS = g['process_cpu_user_seconds_total'];
112+
const cpu = cpuS !== undefined ? cpuS.toFixed(2) : '';
113+
return `| ${s.step} | ${s.latencyMs.p50} | ${s.latencyMs.p95} | ${s.latencyMs.p99} | ${elMs} | ${cpu} | ${s.errors} | ${s.breakageFlags.join('|')} |`;
107114
});
108115

109116
// Sparkline of p95

tests/sim/config.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ describe('makeConfig defaults', () => {
1414
expect(c.break.action).toBe('continue');
1515
expect(c.scrape.intervalMs).toBe(1000);
1616
expect(c.scrape.url).toBe('http://127.0.0.1:9001/stats/prometheus');
17-
expect(c.scrape.keep).toContain('nodejs_eventloop_latency_gauge');
17+
expect(c.scrape.keep).toContain('nodejs_eventloop_lag');
1818
});
1919

2020
it('derives scrape url from sutUrl when not overridden', () => {

0 commit comments

Comments
 (0)