Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
462539f
Speed up the PDF render by another ~2.5s by getting rid of idle time.
KubaO May 21, 2026
5b148ff
Hook fast-path: skip await microtask when no async handlers.
KubaO May 21, 2026
f3a042f
Make the per-page chunker chain synchronous.
KubaO May 21, 2026
2839d9c
Use a Set for chunker loop-detection (O(n^2) -> O(n)).
KubaO May 21, 2026
58db1e9
Use Range.deleteContents over extractContents when no footnotes.
KubaO May 21, 2026
9ba62c7
Guard afterOverflowRemoved trigger with _assertSync.
KubaO May 21, 2026
ce62694
Guard all sync-only Hook.trigger sites with _assertSync.
KubaO May 21, 2026
41cce85
Add --no-timing flag to measure.mjs.
KubaO May 21, 2026
858a728
Drop unused requestIdleCallback const and ResizeObserver/UUID typeof …
KubaO May 21, 2026
ac38737
Drop SafeUint32Array fallback and source-map hasNativeMap branches.
KubaO May 21, 2026
64034db
Drop es5-ext Object.assign/keys/String.contains shim machinery.
KubaO May 21, 2026
3084d80
Drop es6-symbol bundle: Symbol, Array.from, Math.sign, Number.isNaN, …
KubaO May 21, 2026
e8da0f8
Collapse leftover polyfill aliases (sign, from, isNan, toInteger$1, i…
KubaO May 21, 2026
9474784
Replace UUID() with a base36 counter (~1s render saving, 1.5MB less s…
KubaO May 21, 2026
e29c724
Inline getBoundingClientRect/getClientRects wrappers and delete them.
KubaO May 21, 2026
5ac609d
Cheaper BreakToken.toJSON: skip indexOf scan + JSON.stringify (~64ms …
KubaO May 21, 2026
7898bc8
Fix Footnotes.renderNode always-truthy NodeList condition.
KubaO May 21, 2026
a5880f1
Hook.triggerSync empty-handlers fast-path (~490ms saved).
KubaO May 21, 2026
80c3b4f
Footnotes unregisters its hooks when no footnotes in source (~370ms s…
KubaO May 21, 2026
08289a1
perf/README: document the append() investigation and three landings.
KubaO May 21, 2026
3ac5ec0
Layout.append: cache last-seen (srcParent, dest) -> destParent across…
KubaO May 21, 2026
d37d191
Layout.wrapContent: skip the innerHTML round-trip (~870ms saved).
KubaO May 21, 2026
9616548
Fix recoredCharLength typo (recoredCharLength -> recordCharLength).
KubaO May 21, 2026
d79e976
Chunker.maxChars: propagate per page, track running max not avg (~2.9…
KubaO May 21, 2026
b7d6f36
Document the canonical way we profile paged.browser.js.
KubaO May 22, 2026
3ee19de
Add --clone-count and --render-only flags to the per measurement tool.
KubaO May 22, 2026
d73e098
Document the move-not-clone approach that had negative fallout.
KubaO May 22, 2026
9dfe97a
Add heap profiling for render.
KubaO May 22, 2026
e40aa3e
Cache attributes.
KubaO May 22, 2026
61d60dd
Use decimal map indices and use an array for indexOfRefs. Saves ~5MB …
KubaO May 22, 2026
421fd96
Don't use long decimal strings where shorter base-36 ones will do.
KubaO May 22, 2026
2f4f08e
Remove dead writes.
KubaO May 22, 2026
7b49369
Cache and reuse a string.
KubaO May 22, 2026
3cb4858
Add a heap profile differ.
KubaO May 22, 2026
1d81fad
Presize arrays to save heap pressure.
KubaO May 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,442 changes: 468 additions & 974 deletions docs/lib/paged.browser.js

Large diffs are not rendered by default.

1,682 changes: 1,682 additions & 0 deletions perf/README.md

Large diffs are not rendered by default.

84 changes: 84 additions & 0 deletions perf/analyze-heap-profile.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Bottom-up heap sampling profile analyzer.
//
// Reads a V8 .heapprofile (the JSON returned by CDP's
// HeapProfiler.stopSampling) and prints the top allocation sites by
// self-bytes, aggregated by (function name + source location). Same
// shape as Chrome DevTools' Memory tab "Allocation sampling"
// bottom-up view, but in the terminal.
//
// Usage:
// node analyze-heap-profile.mjs <path/to/render.heapprofile> [--top N] [--min-pct P]
//
// Defaults: --top 30, --min-pct 0.1 (hide rows under 0.1% self-bytes).
//
// .heapprofile schema:
// head: { callFrame, selfSize, id, children: [...] } (tree of nodes)
// samples: [{ size, nodeId, ordinal }] (allocation events)
// Each node's `selfSize` is the sum of bytes from samples whose
// nodeId targeted that node directly (i.e. that node was the top of
// the allocation stack). Same shape as cpuprofile self-time.

import { readFileSync } from 'node:fs';
import { resolve } from 'node:path';

const args = process.argv.slice(2);
let profilePath = null;
let topN = 30;
let minPct = 0.1;
for (let i = 0; i < args.length; i++) {
const a = args[i];
if (a === '--top') topN = parseInt(args[++i], 10);
else if (a === '--min-pct') minPct = parseFloat(args[++i]);
else if (!profilePath) profilePath = a;
}
if (!profilePath) {
console.error('usage: node analyze-heap-profile.mjs <path> [--top N] [--min-pct P]');
process.exit(2);
}
profilePath = resolve(process.cwd(), profilePath);

const profile = JSON.parse(readFileSync(profilePath, 'utf8'));

// Flatten the tree into a list of nodes, keyed by call-frame.
const byKey = new Map();
let totalBytes = 0;
const walk = (node) => {
const cf = node.callFrame || {};
const fn = cf.functionName || '(anonymous)';
const url = cf.url || '';
const line = cf.lineNumber != null ? cf.lineNumber + 1 : '?';
const key = `${fn} @ ${url || '(no url)'}:${line}`;
const cur = byKey.get(key) || { bytes: 0, fn, url, line };
cur.bytes += node.selfSize || 0;
byKey.set(key, cur);
totalBytes += node.selfSize || 0;
for (const c of node.children || []) walk(c);
};
walk(profile.head);

const rows = [...byKey.values()]
.map(r => ({
...r,
pct: 100 * r.bytes / totalBytes,
}))
.sort((a, b) => b.bytes - a.bytes)
.filter(r => r.pct >= minPct)
.slice(0, topN);

const fmtBytes = (n) => {
if (n >= 1024 * 1024) return (n / 1024 / 1024).toFixed(2) + ' MB';
if (n >= 1024) return (n / 1024).toFixed(1) + ' KB';
return n + ' B';
};
const fmtPct = (n, w) => n.toFixed(2).padStart(w);
console.log(`profile: ${profilePath}`);
console.log(`samples: ${profile.samples ? profile.samples.length : '?'} total selfSize: ${fmtBytes(totalBytes)}`);
console.log(`top ${topN} by self-bytes (min ${minPct}%):`);
console.log('');
console.log(' self_bytes self_% function @ source');
console.log(' ---------- ------ ----------------------------------------------');
for (const r of rows) {
const where = `${r.url ? r.url.replace(/^file:\/\/\//, '') : '(no url)'}:${r.line}`;
const fn = r.fn || '(anonymous)';
console.log(` ${fmtBytes(r.bytes).padStart(11)} ${fmtPct(r.pct, 5)}% ${fn} @ ${where}`);
}
52 changes: 52 additions & 0 deletions perf/diff-heap-profile.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { readFileSync } from 'node:fs';

function loadByFn(p) {
const profile = JSON.parse(readFileSync(p, 'utf8'));
const byKey = new Map();
let total = 0;
const walk = (n) => {
const cf = n.callFrame || {};
const fn = cf.functionName || '(anonymous)';
const line = cf.lineNumber != null ? cf.lineNumber + 1 : '?';
const url = (cf.url || '').replace(/^file:\/\/\//, '');
const tail = url ? url.split(/[\\/]/).pop() : '';
const key = tail ? fn + ' @ ' + tail + ':' + line : fn;
byKey.set(key, (byKey.get(key) || 0) + (n.selfSize || 0));
total += n.selfSize || 0;
for (const c of n.children || []) walk(c);
};
walk(profile.head);
return { byKey, total, samples: profile.samples ? profile.samples.length : 0 };
}

const [prePath, postPath] = process.argv.slice(2);
const pre = loadByFn(prePath);
const post = loadByFn(postPath);
const keys = new Set([...pre.byKey.keys(), ...post.byKey.keys()]);
const rows = [];
for (const k of keys) {
const preB = pre.byKey.get(k) || 0;
const postB = post.byKey.get(k) || 0;
rows.push({ k, pre: preB, post: postB, delta: postB - preB });
}

const fmtB = b => {
const a = Math.abs(b);
if (a >= 1024 * 1024) return (b / 1024 / 1024).toFixed(2) + ' MB';
if (a >= 1024) return (b / 1024).toFixed(1) + ' KB';
return b + ' B';
};
const pad = (s, w) => s.padStart(w);

console.log('pre samples=' + pre.samples + ', total=' + fmtB(pre.total));
console.log('post samples=' + post.samples + ', total=' + fmtB(post.total));
console.log('total delta : ' + fmtB(post.total - pre.total));
console.log();
console.log('top 20 by |delta|:');
console.log(' PRE POST Δ function');
console.log(' ---------- ---------- ---------- ------------------------');
rows.sort((a, b) => Math.abs(b.delta) - Math.abs(a.delta));
for (const r of rows.slice(0, 20)) {
const sign = r.delta > 0 ? '+' : '';
console.log(' ' + pad(fmtB(r.pre), 10) + ' ' + pad(fmtB(r.post), 10) + ' ' + pad(sign + fmtB(r.delta), 10) + ' ' + r.k);
}
69 changes: 69 additions & 0 deletions perf/find-callees.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Companion to find-callers.mjs: shows where a function spends its time
// across direct callees. Reports self-time + per-callee subtree totals so
// you can see whether the cost lives in the function body or in what it
// calls.
//
// Usage:
// node perf/find-callees.mjs <profile> <calleeName>
//
// Example:
// node perf/find-callees.mjs results/.../render.cpuprofile removeOverflow

import { readFileSync } from 'node:fs';

const [profilePath, targetName] = process.argv.slice(2);
if (!profilePath || !targetName) {
console.error('usage: node find-callees.mjs <profile> <calleeName>');
process.exit(2);
}

const profile = JSON.parse(readFileSync(profilePath, 'utf8'));
const usPerSample = (profile.endTime - profile.startTime) / profile.samples.length;

const byId = new Map();
for (const n of profile.nodes) byId.set(n.id, n);

const subtreeHits = (rootId) => {
const stack = [rootId];
const seen = new Set();
let hits = 0;
while (stack.length) {
const id = stack.pop();
if (seen.has(id)) continue;
seen.add(id);
const n = byId.get(id);
hits += n.hitCount || 0;
for (const c of n.children || []) stack.push(c);
}
return hits;
};

let selfHits = 0;
let totalHits = 0;
const calleeHits = new Map();

for (const n of profile.nodes) {
const fn = n.callFrame?.functionName || '';
if (fn !== targetName) continue;
selfHits += n.hitCount || 0;
totalHits += subtreeHits(n.id);
for (const cid of n.children || []) {
const c = byId.get(cid);
const fnC = c.callFrame?.functionName || '(anon)';
const url = (c.callFrame?.url || '').replace(/^file:\/\/\//, '');
const line = (c.callFrame?.lineNumber ?? -1) + 1;
const key = `${fnC} @ ${url || '(native)'}:${line}`;
calleeHits.set(key, (calleeHits.get(key) || 0) + subtreeHits(cid));
}
}

const ms = (hits) => (hits * usPerSample / 1000).toFixed(2);

console.log(`${targetName}: self=${ms(selfHits)}ms, total=${ms(totalHits)}ms (callees combined=${ms(totalHits - selfHits)}ms)`);
console.log('per direct callee (subtree total ms):');
[...calleeHits.entries()]
.sort((a, b) => b[1] - a[1])
.forEach(([k, h]) => {
const v = h * usPerSample / 1000;
if (v >= 0.5) console.log(` ${ms(h).padStart(8)} ms ${k}`);
});
37 changes: 37 additions & 0 deletions perf/grep-profile.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// One-off: list every node in a .cpuprofile whose functionName matches
// the given regex, with self-time and source location. Helpful for
// "is this frame in the profile at all, and what's it called?"

import { readFileSync } from 'node:fs';

const [profilePath, pattern] = process.argv.slice(2);
if (!profilePath || !pattern) {
console.error('usage: node grep-profile.mjs <profile> <regex>');
process.exit(2);
}

const profile = JSON.parse(readFileSync(profilePath, 'utf8'));
const usPerSample = (profile.endTime - profile.startTime) / profile.samples.length;
const re = new RegExp(pattern);

const rows = [];
for (const n of profile.nodes) {
const fn = n.callFrame?.functionName || '';
if (!re.test(fn)) continue;
const ms = (n.hitCount || 0) * usPerSample / 1000;
rows.push({
ms,
fn,
url: (n.callFrame?.url || '').replace(/^file:\/\/\//, '') || '(native)',
line: (n.callFrame?.lineNumber ?? -1) + 1,
hits: n.hitCount || 0,
});
}
rows.sort((a, b) => b.ms - a.ms);

let total = 0;
for (const r of rows) {
total += r.ms;
console.log(` ${r.ms.toFixed(2).padStart(8)} ms ${r.fn} @ ${r.url}:${r.line} hits=${r.hits}`);
}
console.log(` -------- ${total.toFixed(2)} ms total across ${rows.length} matching nodes`);
128 changes: 128 additions & 0 deletions perf/instrument-clones.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// One-off probe: count how many Layout.append clones survive into the
// finalized page wrapper vs. how many get rolled back by removeOverflow.
//
// Mechanism:
// - Wrap Layout.prototype.append to (a) count calls and (b) tag every
// returned clone with an expando __pagedjs_clone_tag = true.
// - Wrap Node.prototype.cloneNode globally so we can also report the
// gross cloneNode call count (which includes rebuildAncestors and
// anything else outside Layout.append).
// - At finalizePage, walk the just-finalized page wrapper counting
// tagged survivors. (removeOverflow has already fired by this point.)
// - At afterRendered, summarise totals + per-page distribution.
//
// Cost: O(1) per append + one tree walk per finalized page. Run with
// --detach-pages --no-timing --additional-script ..\perf\instrument-clones.js
// from a measure.mjs invocation. Numbers are reported via console.log
// which measure.mjs forwards to stdout.

(() => {
const Layout = window.PagedLayout;
if (!Layout) {
console.log('[clone-count] ERROR: window.PagedLayout not exposed; bundle patch missing.');
return;
}
const origAppend = Layout.prototype.append;
let appendCalls = 0;
Layout.prototype.append = function (...args) {
const clone = origAppend.apply(this, args);
appendCalls++;
if (clone) clone.__pagedjs_clone_tag = true;
return clone;
};

const origCloneNode = Node.prototype.cloneNode;
let cloneNodeCalls = 0;
Node.prototype.cloneNode = function (deep) {
cloneNodeCalls++;
return origCloneNode.call(this, deep);
};

const perPage = []; // { appended, kept }
let appendAtPageStart = 0;

class CloneCountHandler extends Paged.Handler {
beforePageLayout() {
appendAtPageStart = appendCalls;
}
finalizePage(pageElement) {
const appendedThisPage = appendCalls - appendAtPageStart;
let kept = 0;
const walker = document.createTreeWalker(
pageElement,
NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT
);
let n;
while ((n = walker.nextNode())) {
if (n.__pagedjs_clone_tag) kept++;
}
perPage.push({ appended: appendedThisPage, kept });
}
afterRendered(pages) {
let totalAppended = 0;
let totalKept = 0;
let pagesWithOvershoot = 0;
let maxOvershoot = 0;
let maxOvershootPage = -1;
const pcts = [];
perPage.forEach((entry, idx) => {
totalAppended += entry.appended;
totalKept += entry.kept;
const over = entry.appended - entry.kept;
if (over > 0) pagesWithOvershoot++;
if (over > maxOvershoot) {
maxOvershoot = over;
maxOvershootPage = idx;
}
pcts.push(entry.appended > 0 ? (over / entry.appended) * 100 : 0);
});
const totalOvershoot = totalAppended - totalKept;
const pct = totalAppended > 0
? (totalOvershoot / totalAppended) * 100
: 0;

console.log(`[clone-count] pages=${pages.length}`);
console.log(`[clone-count] Layout.append calls (source-walker leaf clones): ${totalAppended}`);
console.log(`[clone-count] survivors in finalized pages: ${totalKept}`);
console.log(`[clone-count] overshoot (appended-then-removed): ${totalOvershoot} (${pct.toFixed(1)}%)`);
console.log(`[clone-count] pages with any overshoot: ${pagesWithOvershoot}/${pages.length}`);
console.log(`[clone-count] max overshoot on one page: ${maxOvershoot} (page index ${maxOvershootPage}, appended=${perPage[maxOvershootPage]?.appended ?? 0})`);
console.log(`[clone-count] gross Node.cloneNode calls (incl. rebuildAncestors, handlers, etc.): ${cloneNodeCalls}`);
console.log(`[clone-count] non-Layout.append clones: ${cloneNodeCalls - totalAppended}`);

// Per-page overshoot % buckets.
const buckets = [
{ lo: 0, hi: 1 },
{ lo: 1, hi: 5 },
{ lo: 5, hi: 10 },
{ lo: 10, hi: 20 },
{ lo: 20, hi: 30 },
{ lo: 30, hi: 50 },
{ lo: 50, hi: 101 },
];
const counts = buckets.map(() => 0);
for (const p of pcts) {
for (let i = 0; i < buckets.length; i++) {
if (p >= buckets[i].lo && p < buckets[i].hi) {
counts[i]++;
break;
}
}
}
console.log(`[clone-count] per-page overshoot % distribution:`);
for (let i = 0; i < buckets.length; i++) {
const b = buckets[i];
const hi = b.hi === 101 ? '100' : String(b.hi);
console.log(`[clone-count] ${String(b.lo).padStart(3)} - ${hi.padStart(3)}%: ${counts[i]} pages`);
}

// Cumulative percentile cutpoints.
const sortedPcts = pcts.slice().sort((a, b) => a - b);
const pickPct = (q) => sortedPcts[Math.min(sortedPcts.length - 1, Math.floor(q * sortedPcts.length))];
console.log(`[clone-count] per-page overshoot %: p50=${pickPct(0.5).toFixed(1)}% p90=${pickPct(0.9).toFixed(1)}% p99=${pickPct(0.99).toFixed(1)}% max=${pickPct(0.999).toFixed(1)}%`);
}
}

Paged.registerHandlers(CloneCountHandler);
console.log('[clone-count] handler registered');
})();
Loading
Loading