Skip to content

Commit e0bed7a

Browse files
authored
Merge pull request #152 from KubaO/staging
Improve render preformance and reduce runtime heap use.
2 parents fa0decd + 1d81fad commit e0bed7a

9 files changed

Lines changed: 2730 additions & 1012 deletions

docs/lib/paged.browser.js

Lines changed: 468 additions & 974 deletions
Large diffs are not rendered by default.

perf/README.md

Lines changed: 1682 additions & 0 deletions
Large diffs are not rendered by default.

perf/analyze-heap-profile.mjs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Bottom-up heap sampling profile analyzer.
2+
//
3+
// Reads a V8 .heapprofile (the JSON returned by CDP's
4+
// HeapProfiler.stopSampling) and prints the top allocation sites by
5+
// self-bytes, aggregated by (function name + source location). Same
6+
// shape as Chrome DevTools' Memory tab "Allocation sampling"
7+
// bottom-up view, but in the terminal.
8+
//
9+
// Usage:
10+
// node analyze-heap-profile.mjs <path/to/render.heapprofile> [--top N] [--min-pct P]
11+
//
12+
// Defaults: --top 30, --min-pct 0.1 (hide rows under 0.1% self-bytes).
13+
//
14+
// .heapprofile schema:
15+
// head: { callFrame, selfSize, id, children: [...] } (tree of nodes)
16+
// samples: [{ size, nodeId, ordinal }] (allocation events)
17+
// Each node's `selfSize` is the sum of bytes from samples whose
18+
// nodeId targeted that node directly (i.e. that node was the top of
19+
// the allocation stack). Same shape as cpuprofile self-time.
20+
21+
import { readFileSync } from 'node:fs';
22+
import { resolve } from 'node:path';
23+
24+
const args = process.argv.slice(2);
25+
let profilePath = null;
26+
let topN = 30;
27+
let minPct = 0.1;
28+
for (let i = 0; i < args.length; i++) {
29+
const a = args[i];
30+
if (a === '--top') topN = parseInt(args[++i], 10);
31+
else if (a === '--min-pct') minPct = parseFloat(args[++i]);
32+
else if (!profilePath) profilePath = a;
33+
}
34+
if (!profilePath) {
35+
console.error('usage: node analyze-heap-profile.mjs <path> [--top N] [--min-pct P]');
36+
process.exit(2);
37+
}
38+
profilePath = resolve(process.cwd(), profilePath);
39+
40+
const profile = JSON.parse(readFileSync(profilePath, 'utf8'));
41+
42+
// Flatten the tree into a list of nodes, keyed by call-frame.
43+
const byKey = new Map();
44+
let totalBytes = 0;
45+
const walk = (node) => {
46+
const cf = node.callFrame || {};
47+
const fn = cf.functionName || '(anonymous)';
48+
const url = cf.url || '';
49+
const line = cf.lineNumber != null ? cf.lineNumber + 1 : '?';
50+
const key = `${fn} @ ${url || '(no url)'}:${line}`;
51+
const cur = byKey.get(key) || { bytes: 0, fn, url, line };
52+
cur.bytes += node.selfSize || 0;
53+
byKey.set(key, cur);
54+
totalBytes += node.selfSize || 0;
55+
for (const c of node.children || []) walk(c);
56+
};
57+
walk(profile.head);
58+
59+
const rows = [...byKey.values()]
60+
.map(r => ({
61+
...r,
62+
pct: 100 * r.bytes / totalBytes,
63+
}))
64+
.sort((a, b) => b.bytes - a.bytes)
65+
.filter(r => r.pct >= minPct)
66+
.slice(0, topN);
67+
68+
const fmtBytes = (n) => {
69+
if (n >= 1024 * 1024) return (n / 1024 / 1024).toFixed(2) + ' MB';
70+
if (n >= 1024) return (n / 1024).toFixed(1) + ' KB';
71+
return n + ' B';
72+
};
73+
const fmtPct = (n, w) => n.toFixed(2).padStart(w);
74+
console.log(`profile: ${profilePath}`);
75+
console.log(`samples: ${profile.samples ? profile.samples.length : '?'} total selfSize: ${fmtBytes(totalBytes)}`);
76+
console.log(`top ${topN} by self-bytes (min ${minPct}%):`);
77+
console.log('');
78+
console.log(' self_bytes self_% function @ source');
79+
console.log(' ---------- ------ ----------------------------------------------');
80+
for (const r of rows) {
81+
const where = `${r.url ? r.url.replace(/^file:\/\/\//, '') : '(no url)'}:${r.line}`;
82+
const fn = r.fn || '(anonymous)';
83+
console.log(` ${fmtBytes(r.bytes).padStart(11)} ${fmtPct(r.pct, 5)}% ${fn} @ ${where}`);
84+
}

perf/diff-heap-profile.mjs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import { readFileSync } from 'node:fs';
2+
3+
function loadByFn(p) {
4+
const profile = JSON.parse(readFileSync(p, 'utf8'));
5+
const byKey = new Map();
6+
let total = 0;
7+
const walk = (n) => {
8+
const cf = n.callFrame || {};
9+
const fn = cf.functionName || '(anonymous)';
10+
const line = cf.lineNumber != null ? cf.lineNumber + 1 : '?';
11+
const url = (cf.url || '').replace(/^file:\/\/\//, '');
12+
const tail = url ? url.split(/[\\/]/).pop() : '';
13+
const key = tail ? fn + ' @ ' + tail + ':' + line : fn;
14+
byKey.set(key, (byKey.get(key) || 0) + (n.selfSize || 0));
15+
total += n.selfSize || 0;
16+
for (const c of n.children || []) walk(c);
17+
};
18+
walk(profile.head);
19+
return { byKey, total, samples: profile.samples ? profile.samples.length : 0 };
20+
}
21+
22+
const [prePath, postPath] = process.argv.slice(2);
23+
const pre = loadByFn(prePath);
24+
const post = loadByFn(postPath);
25+
const keys = new Set([...pre.byKey.keys(), ...post.byKey.keys()]);
26+
const rows = [];
27+
for (const k of keys) {
28+
const preB = pre.byKey.get(k) || 0;
29+
const postB = post.byKey.get(k) || 0;
30+
rows.push({ k, pre: preB, post: postB, delta: postB - preB });
31+
}
32+
33+
const fmtB = b => {
34+
const a = Math.abs(b);
35+
if (a >= 1024 * 1024) return (b / 1024 / 1024).toFixed(2) + ' MB';
36+
if (a >= 1024) return (b / 1024).toFixed(1) + ' KB';
37+
return b + ' B';
38+
};
39+
const pad = (s, w) => s.padStart(w);
40+
41+
console.log('pre samples=' + pre.samples + ', total=' + fmtB(pre.total));
42+
console.log('post samples=' + post.samples + ', total=' + fmtB(post.total));
43+
console.log('total delta : ' + fmtB(post.total - pre.total));
44+
console.log();
45+
console.log('top 20 by |delta|:');
46+
console.log(' PRE POST Δ function');
47+
console.log(' ---------- ---------- ---------- ------------------------');
48+
rows.sort((a, b) => Math.abs(b.delta) - Math.abs(a.delta));
49+
for (const r of rows.slice(0, 20)) {
50+
const sign = r.delta > 0 ? '+' : '';
51+
console.log(' ' + pad(fmtB(r.pre), 10) + ' ' + pad(fmtB(r.post), 10) + ' ' + pad(sign + fmtB(r.delta), 10) + ' ' + r.k);
52+
}

perf/find-callees.mjs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Companion to find-callers.mjs: shows where a function spends its time
2+
// across direct callees. Reports self-time + per-callee subtree totals so
3+
// you can see whether the cost lives in the function body or in what it
4+
// calls.
5+
//
6+
// Usage:
7+
// node perf/find-callees.mjs <profile> <calleeName>
8+
//
9+
// Example:
10+
// node perf/find-callees.mjs results/.../render.cpuprofile removeOverflow
11+
12+
import { readFileSync } from 'node:fs';
13+
14+
const [profilePath, targetName] = process.argv.slice(2);
15+
if (!profilePath || !targetName) {
16+
console.error('usage: node find-callees.mjs <profile> <calleeName>');
17+
process.exit(2);
18+
}
19+
20+
const profile = JSON.parse(readFileSync(profilePath, 'utf8'));
21+
const usPerSample = (profile.endTime - profile.startTime) / profile.samples.length;
22+
23+
const byId = new Map();
24+
for (const n of profile.nodes) byId.set(n.id, n);
25+
26+
const subtreeHits = (rootId) => {
27+
const stack = [rootId];
28+
const seen = new Set();
29+
let hits = 0;
30+
while (stack.length) {
31+
const id = stack.pop();
32+
if (seen.has(id)) continue;
33+
seen.add(id);
34+
const n = byId.get(id);
35+
hits += n.hitCount || 0;
36+
for (const c of n.children || []) stack.push(c);
37+
}
38+
return hits;
39+
};
40+
41+
let selfHits = 0;
42+
let totalHits = 0;
43+
const calleeHits = new Map();
44+
45+
for (const n of profile.nodes) {
46+
const fn = n.callFrame?.functionName || '';
47+
if (fn !== targetName) continue;
48+
selfHits += n.hitCount || 0;
49+
totalHits += subtreeHits(n.id);
50+
for (const cid of n.children || []) {
51+
const c = byId.get(cid);
52+
const fnC = c.callFrame?.functionName || '(anon)';
53+
const url = (c.callFrame?.url || '').replace(/^file:\/\/\//, '');
54+
const line = (c.callFrame?.lineNumber ?? -1) + 1;
55+
const key = `${fnC} @ ${url || '(native)'}:${line}`;
56+
calleeHits.set(key, (calleeHits.get(key) || 0) + subtreeHits(cid));
57+
}
58+
}
59+
60+
const ms = (hits) => (hits * usPerSample / 1000).toFixed(2);
61+
62+
console.log(`${targetName}: self=${ms(selfHits)}ms, total=${ms(totalHits)}ms (callees combined=${ms(totalHits - selfHits)}ms)`);
63+
console.log('per direct callee (subtree total ms):');
64+
[...calleeHits.entries()]
65+
.sort((a, b) => b[1] - a[1])
66+
.forEach(([k, h]) => {
67+
const v = h * usPerSample / 1000;
68+
if (v >= 0.5) console.log(` ${ms(h).padStart(8)} ms ${k}`);
69+
});

perf/grep-profile.mjs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// One-off: list every node in a .cpuprofile whose functionName matches
2+
// the given regex, with self-time and source location. Helpful for
3+
// "is this frame in the profile at all, and what's it called?"
4+
5+
import { readFileSync } from 'node:fs';
6+
7+
const [profilePath, pattern] = process.argv.slice(2);
8+
if (!profilePath || !pattern) {
9+
console.error('usage: node grep-profile.mjs <profile> <regex>');
10+
process.exit(2);
11+
}
12+
13+
const profile = JSON.parse(readFileSync(profilePath, 'utf8'));
14+
const usPerSample = (profile.endTime - profile.startTime) / profile.samples.length;
15+
const re = new RegExp(pattern);
16+
17+
const rows = [];
18+
for (const n of profile.nodes) {
19+
const fn = n.callFrame?.functionName || '';
20+
if (!re.test(fn)) continue;
21+
const ms = (n.hitCount || 0) * usPerSample / 1000;
22+
rows.push({
23+
ms,
24+
fn,
25+
url: (n.callFrame?.url || '').replace(/^file:\/\/\//, '') || '(native)',
26+
line: (n.callFrame?.lineNumber ?? -1) + 1,
27+
hits: n.hitCount || 0,
28+
});
29+
}
30+
rows.sort((a, b) => b.ms - a.ms);
31+
32+
let total = 0;
33+
for (const r of rows) {
34+
total += r.ms;
35+
console.log(` ${r.ms.toFixed(2).padStart(8)} ms ${r.fn} @ ${r.url}:${r.line} hits=${r.hits}`);
36+
}
37+
console.log(` -------- ${total.toFixed(2)} ms total across ${rows.length} matching nodes`);

perf/instrument-clones.js

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// One-off probe: count how many Layout.append clones survive into the
2+
// finalized page wrapper vs. how many get rolled back by removeOverflow.
3+
//
4+
// Mechanism:
5+
// - Wrap Layout.prototype.append to (a) count calls and (b) tag every
6+
// returned clone with an expando __pagedjs_clone_tag = true.
7+
// - Wrap Node.prototype.cloneNode globally so we can also report the
8+
// gross cloneNode call count (which includes rebuildAncestors and
9+
// anything else outside Layout.append).
10+
// - At finalizePage, walk the just-finalized page wrapper counting
11+
// tagged survivors. (removeOverflow has already fired by this point.)
12+
// - At afterRendered, summarise totals + per-page distribution.
13+
//
14+
// Cost: O(1) per append + one tree walk per finalized page. Run with
15+
// --detach-pages --no-timing --additional-script ..\perf\instrument-clones.js
16+
// from a measure.mjs invocation. Numbers are reported via console.log
17+
// which measure.mjs forwards to stdout.
18+
19+
(() => {
20+
const Layout = window.PagedLayout;
21+
if (!Layout) {
22+
console.log('[clone-count] ERROR: window.PagedLayout not exposed; bundle patch missing.');
23+
return;
24+
}
25+
const origAppend = Layout.prototype.append;
26+
let appendCalls = 0;
27+
Layout.prototype.append = function (...args) {
28+
const clone = origAppend.apply(this, args);
29+
appendCalls++;
30+
if (clone) clone.__pagedjs_clone_tag = true;
31+
return clone;
32+
};
33+
34+
const origCloneNode = Node.prototype.cloneNode;
35+
let cloneNodeCalls = 0;
36+
Node.prototype.cloneNode = function (deep) {
37+
cloneNodeCalls++;
38+
return origCloneNode.call(this, deep);
39+
};
40+
41+
const perPage = []; // { appended, kept }
42+
let appendAtPageStart = 0;
43+
44+
class CloneCountHandler extends Paged.Handler {
45+
beforePageLayout() {
46+
appendAtPageStart = appendCalls;
47+
}
48+
finalizePage(pageElement) {
49+
const appendedThisPage = appendCalls - appendAtPageStart;
50+
let kept = 0;
51+
const walker = document.createTreeWalker(
52+
pageElement,
53+
NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT
54+
);
55+
let n;
56+
while ((n = walker.nextNode())) {
57+
if (n.__pagedjs_clone_tag) kept++;
58+
}
59+
perPage.push({ appended: appendedThisPage, kept });
60+
}
61+
afterRendered(pages) {
62+
let totalAppended = 0;
63+
let totalKept = 0;
64+
let pagesWithOvershoot = 0;
65+
let maxOvershoot = 0;
66+
let maxOvershootPage = -1;
67+
const pcts = [];
68+
perPage.forEach((entry, idx) => {
69+
totalAppended += entry.appended;
70+
totalKept += entry.kept;
71+
const over = entry.appended - entry.kept;
72+
if (over > 0) pagesWithOvershoot++;
73+
if (over > maxOvershoot) {
74+
maxOvershoot = over;
75+
maxOvershootPage = idx;
76+
}
77+
pcts.push(entry.appended > 0 ? (over / entry.appended) * 100 : 0);
78+
});
79+
const totalOvershoot = totalAppended - totalKept;
80+
const pct = totalAppended > 0
81+
? (totalOvershoot / totalAppended) * 100
82+
: 0;
83+
84+
console.log(`[clone-count] pages=${pages.length}`);
85+
console.log(`[clone-count] Layout.append calls (source-walker leaf clones): ${totalAppended}`);
86+
console.log(`[clone-count] survivors in finalized pages: ${totalKept}`);
87+
console.log(`[clone-count] overshoot (appended-then-removed): ${totalOvershoot} (${pct.toFixed(1)}%)`);
88+
console.log(`[clone-count] pages with any overshoot: ${pagesWithOvershoot}/${pages.length}`);
89+
console.log(`[clone-count] max overshoot on one page: ${maxOvershoot} (page index ${maxOvershootPage}, appended=${perPage[maxOvershootPage]?.appended ?? 0})`);
90+
console.log(`[clone-count] gross Node.cloneNode calls (incl. rebuildAncestors, handlers, etc.): ${cloneNodeCalls}`);
91+
console.log(`[clone-count] non-Layout.append clones: ${cloneNodeCalls - totalAppended}`);
92+
93+
// Per-page overshoot % buckets.
94+
const buckets = [
95+
{ lo: 0, hi: 1 },
96+
{ lo: 1, hi: 5 },
97+
{ lo: 5, hi: 10 },
98+
{ lo: 10, hi: 20 },
99+
{ lo: 20, hi: 30 },
100+
{ lo: 30, hi: 50 },
101+
{ lo: 50, hi: 101 },
102+
];
103+
const counts = buckets.map(() => 0);
104+
for (const p of pcts) {
105+
for (let i = 0; i < buckets.length; i++) {
106+
if (p >= buckets[i].lo && p < buckets[i].hi) {
107+
counts[i]++;
108+
break;
109+
}
110+
}
111+
}
112+
console.log(`[clone-count] per-page overshoot % distribution:`);
113+
for (let i = 0; i < buckets.length; i++) {
114+
const b = buckets[i];
115+
const hi = b.hi === 101 ? '100' : String(b.hi);
116+
console.log(`[clone-count] ${String(b.lo).padStart(3)} - ${hi.padStart(3)}%: ${counts[i]} pages`);
117+
}
118+
119+
// Cumulative percentile cutpoints.
120+
const sortedPcts = pcts.slice().sort((a, b) => a - b);
121+
const pickPct = (q) => sortedPcts[Math.min(sortedPcts.length - 1, Math.floor(q * sortedPcts.length))];
122+
console.log(`[clone-count] per-page overshoot %: p50=${pickPct(0.5).toFixed(1)}% p90=${pickPct(0.9).toFixed(1)}% p99=${pickPct(0.99).toFixed(1)}% max=${pickPct(0.999).toFixed(1)}%`);
123+
}
124+
}
125+
126+
Paged.registerHandlers(CloneCountHandler);
127+
console.log('[clone-count] handler registered');
128+
})();

0 commit comments

Comments
 (0)