diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js
index 533bf298..8d212d9e 100644
--- a/docs/lib/paged.browser.js
+++ b/docs/lib/paged.browser.js
@@ -8,51 +8,30 @@
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.PagedPolyfill = factory());
})(this, (function () { 'use strict';
- function getBoundingClientRect(element) {
- if (!element) {
- return;
- }
- let rect;
- if (typeof element.getBoundingClientRect !== "undefined") {
- rect = element.getBoundingClientRect();
- } else {
- let range = document.createRange();
- range.selectNode(element);
- rect = range.getBoundingClientRect();
- }
- return rect;
- }
-
- function getClientRects(element) {
- if (!element) {
- return;
- }
- let rect;
- if (typeof element.getClientRects !== "undefined") {
- rect = element.getClientRects();
- } else {
- let range = document.createRange();
- range.selectNode(element);
- rect = range.getClientRects();
- }
- return rect;
- }
-
/**
- * Generates a UUID
- * based on: http://stackoverflow.com/questions/105034/how-to-create-a-guid-uuid-in-javascript
- * @returns {string} uuid
+ * Returns a unique-within-render id as a base36 string.
+ * Replaced the prior RFC 4122 v4 UUID generator -- our pipeline only
+ * needs uniqueness within a single render (data-ref attributes,
+ * generated CSS variable / selector names, internal object identity),
+ * not globally. Counter + base36 shaves the per-call cost from
+ * ~3us (Date.now + per-char replace closure) to ~50ns and keeps IDs
+ * short (max ~5 chars for the ~50k DOM nodes in a typical book).
+ *
+ * UUIDDecimal() below shares the same counter but returns the
+ * decimal representation -- needed at addRefs (the data-ref
+ * writer) so V8 auto-coerces the ref string to an integer index
+ * when used against `source.indexOfRefs` (an Array). Base36 strings
+ * like "1z" would force that array into dictionary mode; decimal
+ * keeps it in PACKED_ELEMENTS, saving ~2-3 MB vs the previous
+ * string-keyed dict. Every other UUID caller goes through UUID()
+ * because their consumers don't index a JS array with the result.
*/
+ var __pagedjsCounter = 0;
function UUID() {
- var d = new Date().getTime();
- if (typeof performance !== "undefined" && typeof performance.now === "function") {
- d += performance.now(); //use high-precision timer if available
- }
- return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
- var r = (d + Math.random() * 16) % 16 | 0;
- d = Math.floor(d / 16);
- return (c === "x" ? r : (r & 0x3 | 0x8)).toString(16);
- });
+ return (++__pagedjsCounter).toString(36);
+ }
+ function UUIDDecimal() {
+ return (++__pagedjsCounter).toString();
}
function attr(element, attributes) {
@@ -169,8 +148,6 @@
this.reject = null;
- this.id = UUID();
-
this.promise = new Promise((resolve, reject) => {
this.resolve = resolve;
this.reject = reject;
@@ -178,8 +155,6 @@
Object.freeze(this);
}
- const requestIdleCallback = typeof window !== "undefined" && ("requestIdleCallback" in window ? window.requestIdleCallback : window.requestAnimationFrame);
-
function CSSValueToString(obj) {
return obj.value + (obj.unit || "");
}
@@ -903,28 +878,21 @@
return true;
}
+ // Cheap loop-detection key for the chunker's per-page Set.
+ // Common case (Element with its own data-ref): "ref|offset". Falls
+ // back to "parentRef|siblingIndex|offset" for Text/Comment nodes
+ // where the ref lives on the parent. The string is opaque to all
+ // callers other than chunker.flow's loop guard, so the format only
+ // needs to be unique-per-break-point, not human-readable.
toJSON(hash) {
- let node;
- let index = 0;
- if (!this.node) {
- return {};
- }
+ if (!this.node) return "";
if (isElement(this.node) && this.node.dataset.ref) {
- node = this.node.dataset.ref;
- } else if (hash) {
- node = this.node.parentElement.dataset.ref;
+ return this.node.dataset.ref + "|" + (this.offset || 0);
}
-
- if (this.node.parentElement) {
- const children = Array.from(this.node.parentElement.childNodes);
- index = children.indexOf(this.node);
- }
-
- return JSON.stringify({
- "node": node,
- "index" : index,
- "offset": this.offset
- });
+ const parent = this.node.parentElement;
+ const parentRef = parent ? parent.dataset.ref : "";
+ const index = parent ? Array.prototype.indexOf.call(parent.childNodes, this.node) : 0;
+ return parentRef + "|" + index + "|" + (this.offset || 0);
}
}
@@ -1011,32 +979,6 @@
return true;
};
- var isImplemented$7 = function () {
- var assign = Object.assign, obj;
- if (typeof assign !== "function") return false;
- obj = { foo: "raz" };
- assign(obj, { bar: "dwa" }, { trzy: "trzy" });
- return obj.foo + obj.bar + obj.trzy === "razdwatrzy";
- };
-
- var isImplemented$6;
- var hasRequiredIsImplemented$2;
-
- function requireIsImplemented$2 () {
- if (hasRequiredIsImplemented$2) return isImplemented$6;
- hasRequiredIsImplemented$2 = 1;
-
- isImplemented$6 = function () {
- try {
- Object.keys("primitive");
- return true;
- } catch (e) {
- return false;
- }
- };
- return isImplemented$6;
- }
-
// eslint-disable-next-line no-empty-function
var noop$4 = function () {};
@@ -1044,32 +986,6 @@
var isValue$4 = function (val) { return val !== _undefined && val !== null; };
- var shim$5;
- var hasRequiredShim$5;
-
- function requireShim$5 () {
- if (hasRequiredShim$5) return shim$5;
- hasRequiredShim$5 = 1;
-
- var isValue = isValue$4;
-
- var keys = Object.keys;
-
- shim$5 = function (object) { return keys(isValue(object) ? Object(object) : object); };
- return shim$5;
- }
-
- var keys;
- var hasRequiredKeys;
-
- function requireKeys () {
- if (hasRequiredKeys) return keys;
- hasRequiredKeys = 1;
-
- keys = requireIsImplemented$2()() ? Object.keys : requireShim$5();
- return keys;
- }
-
var isValue$3 = isValue$4;
var validValue = function (value) {
@@ -1077,39 +993,6 @@
return value;
};
- var shim$4;
- var hasRequiredShim$4;
-
- function requireShim$4 () {
- if (hasRequiredShim$4) return shim$4;
- hasRequiredShim$4 = 1;
-
- var keys = requireKeys()
- , value = validValue
- , max = Math.max;
-
- shim$4 = function (dest, src /*, …srcn*/) {
- var error, i, length = max(arguments.length, 2), assign;
- dest = Object(value(dest));
- assign = function (key) {
- try {
- dest[key] = src[key];
- } catch (e) {
- if (!error) error = e;
- }
- };
- for (i = 1; i < length; ++i) {
- src = arguments[i];
- keys(src).forEach(assign);
- }
- if (error !== undefined) throw error;
- return dest;
- };
- return shim$4;
- }
-
- var assign$2 = isImplemented$7() ? Object.assign : requireShim$4();
-
var isValue$2 = isValue$4;
var forEach$1 = Array.prototype.forEach, create$5 = Object.create;
@@ -1129,35 +1012,9 @@
return result;
};
- var str = "razdwatrzy";
-
- var isImplemented$5 = function () {
- if (typeof str.contains !== "function") return false;
- return str.contains("dwa") === true && str.contains("foo") === false;
- };
-
- var shim$3;
- var hasRequiredShim$3;
-
- function requireShim$3 () {
- if (hasRequiredShim$3) return shim$3;
- hasRequiredShim$3 = 1;
-
- var indexOf = String.prototype.indexOf;
-
- shim$3 = function (searchString /*, position*/) {
- return indexOf.call(this, searchString, arguments[1]) > -1;
- };
- return shim$3;
- }
-
- var contains$1 = isImplemented$5() ? String.prototype.contains : requireShim$3();
-
var isValue$1 = is$4
, isPlainFunction = is
- , assign$1 = assign$2
- , normalizeOpts = normalizeOptions
- , contains = contains$1;
+ , normalizeOpts = normalizeOptions;
var d$1 = (d$2.exports = function (dscr, value/*, options*/) {
var c, e, w, options, desc;
@@ -1169,16 +1026,16 @@
options = arguments[2];
}
if (isValue$1(dscr)) {
- c = contains.call(dscr, "c");
- e = contains.call(dscr, "e");
- w = contains.call(dscr, "w");
+ c = dscr.includes("c");
+ e = dscr.includes("e");
+ w = dscr.includes("w");
} else {
c = w = true;
e = false;
}
desc = { value: value, configurable: c, enumerable: e, writable: w };
- return !options ? desc : assign$1(normalizeOpts(options), desc);
+ return !options ? desc : Object.assign(normalizeOpts(options), desc);
});
d$1.gs = function (dscr, get, set/*, options*/) {
@@ -1203,15 +1060,15 @@
set = undefined;
}
if (isValue$1(dscr)) {
- c = contains.call(dscr, "c");
- e = contains.call(dscr, "e");
+ c = dscr.includes("c");
+ e = dscr.includes("e");
} else {
c = true;
e = false;
}
desc = { get: get, set: set, configurable: c, enumerable: e };
- return !options ? desc : assign$1(normalizeOpts(options), desc);
+ return !options ? desc : Object.assign(normalizeOpts(options), desc);
};
var dExports = d$2.exports;
@@ -1366,6 +1223,25 @@
* @param {any} context scope of this
* @example this.content = new Hook(this);
*/
+ // [PATCH: sync-chain] Used by the chunker hot path to confirm that
+ // Hook.trigger() returned the sync sentinel (undefined). If a handler
+ // returned a thenable, the chunker dropping it here would silently
+ // lose async work -- so we throw instead. Limitation of this fork:
+ // the per-page hooks (beforePageLayout / afterPageLayout /
+ // finalizePage / handleBreaks / *layout / page.layout etc.) must
+ // have all-synchronous handlers. Bundle ships with no async handlers
+ // for these on our pipeline; document and assert.
+ function _assertSync(triggerResult, hookName) {
+ if (triggerResult && typeof triggerResult.then === "function") {
+ throw new Error(
+ "paged.js (forked): async handler registered for hook '" + hookName + "'. " +
+ "This bundle's per-page hot path is synchronous; async handlers " +
+ "must be registered for the once-per-render hooks (beforeParsed, " +
+ "afterParsed, afterRendered) instead, or the chain re-asyncified."
+ );
+ }
+ }
+
class Hook {
constructor(context){
this.context = context || this;
@@ -1391,39 +1267,61 @@
}
/**
- * Triggers a hook to run all functions
- * @example this.content.trigger(args).then(function(){...});
- * @return {Promise} results
+ * Triggers a hook to run all functions.
+ * @return {Promise|undefined} A Promise that resolves when all
+ * thenable-returning handlers settle, OR `undefined` if no
+ * handler returned a thenable (the all-synchronous fast path).
+ *
+ * [PATCH: hook-fast-path] Upstream always wrapped sync handler
+ * results in `new Promise(resolve => resolve(...))` and returned
+ * `Promise.all(promises)`, so callers' `await trigger(...)` was a
+ * mandatory microtask boundary even when every handler resolved
+ * synchronously. We return `undefined` on the all-sync path so
+ * callers can write:
+ *
+ * let p = hook.trigger(...);
+ * if (p) await p;
+ *
+ * and skip the microtask boundary entirely. Per-page hot-loop
+ * sites in the chunker do this; one-shot callers can keep the
+ * `await trigger(...)` form (`await undefined` still works, just
+ * with a cycle).
*/
trigger(){
var args = arguments;
var context = this.context;
- var promises = [];
-
- this.hooks.forEach(function(task) {
- var executing = task.apply(context, args);
+ var promises;
- if(executing && typeof executing["then"] === "function") {
- // Task is a function that returns a promise
- promises.push(executing);
- } else {
- // Otherwise Task resolves immediately, add resolved promise with result
- promises.push(new Promise((resolve, reject) => {
- resolve(executing);
- }));
+ for (var i = 0; i < this.hooks.length; i++) {
+ var executing = this.hooks[i].apply(context, args);
+ if (executing && typeof executing["then"] === "function") {
+ (promises = promises || []).push(executing);
}
- });
-
+ }
- return Promise.all(promises);
+ return promises ? Promise.all(promises) : undefined;
}
/**
- * Triggers a hook to run all functions synchronously
- * @example this.content.trigger(args).then(function(){...});
- * @return {Array} results
+ * Triggers a hook to run all functions synchronously.
+ * @return {Array|undefined} results array, or undefined when no
+ * handlers are registered (callers can skip their reducer
+ * forEach with a simple truthy check).
+ *
+ * [PATCH: hook-fast-path-sync] Mirrors the async `trigger()` patch:
+ * skip the results-array alloc and the empty-forEach indirection
+ * when this.hooks is empty. In the per-page hot path, onOverflow
+ * and onBreakToken have zero registered handlers in this build,
+ * so every call to those two hooks via triggerSync was pure
+ * dispatch overhead -- ~3300 calls per render on the 1650-page
+ * book. Callers in those reducer sites now read:
+ *
+ * let r = hook.triggerSync(...);
+ * if (r) r.forEach(...);
*/
triggerSync(){
+ if (this.hooks.length === 0) return undefined;
+
var args = arguments;
var context = this.context;
var results = [];
@@ -1488,10 +1386,35 @@
this.settings = options || {};
this.maxChars = this.settings.maxChars || MAX_CHARS_PER_BREAK;
+
+ // [PATCH: parent-lookup-cache] One-entry memo of the last
+ // (sourceParent, dest) -> destParent resolution from append().
+ // Consecutive siblings in the source tree all resolve to the
+ // same destParent, so caching the previous result lets the
+ // per-call findElement / indexOfRefs lookup short-circuit.
+ // Invalidated at the start of every renderTo; safe within a
+ // single renderTo loop because append() never detaches DOM
+ // from dest (removeOverflow only fires after the loop exits).
+ this._lastSrcParent = null;
+ this._lastDest = null;
+ this._lastDestParent = null;
this.forceRenderBreak = false;
}
- async renderTo(wrapper, source, breakToken, bounds = this.bounds) {
+ // [PATCH: sync-chain] renderTo no longer needs to be async because
+ // waitForImages is now sync (see its comment). Removing `async`
+ // removes the per-page Promise allocation that was returned from
+ // page.layout / chunker.layout up the chain.
+ renderTo(wrapper, source, breakToken, bounds = this.bounds) {
+ // [PATCH: parent-lookup-cache] Invalidate the per-Layout
+ // parent memo. The previous renderTo on this Layout instance
+ // (same Page, multiple renderTo calls) may have run
+ // findBreakToken -> removeOverflow at exit, leaving the
+ // cached destParent detached.
+ this._lastSrcParent = null;
+ this._lastDest = null;
+ this._lastDestParent = null;
+
let start = this.getStart(source, breakToken);
let walker = walk$2(start, source);
@@ -1518,7 +1441,7 @@
let prevBreakToken = breakToken || new BreakToken(start);
- this.hooks && this.hooks.onPageLayout.trigger(wrapper, prevBreakToken, this);
+ if (this.hooks) _assertSync(this.hooks.onPageLayout.trigger(wrapper, prevBreakToken, this), "onPageLayout");
while (!done && !newBreakToken) {
next = walker.next();
@@ -1527,36 +1450,36 @@
done = next.done;
if (!node) {
- this.hooks && this.hooks.layout.trigger(wrapper, this);
+ if (this.hooks) _assertSync(this.hooks.layout.trigger(wrapper, this), "layout");
let imgs = wrapper.querySelectorAll("img");
if (imgs.length) {
- await this.waitForImages(imgs);
+ this.waitForImages(imgs);
}
newBreakToken = this.findBreakToken(wrapper, source, bounds, prevBreakToken);
if (newBreakToken && newBreakToken.equals(prevBreakToken)) {
console.warn("Unable to layout item: ", prevNode);
- this.hooks && this.hooks.beforeRenderResult.trigger(undefined, wrapper, this);
+ if (this.hooks) _assertSync(this.hooks.beforeRenderResult.trigger(undefined, wrapper, this), "beforeRenderResult");
return new RenderResult(undefined, new OverflowContentError("Unable to layout item", [prevNode]));
}
this.rebuildTableFromBreakToken(newBreakToken, wrapper);
- this.hooks && this.hooks.beforeRenderResult.trigger(newBreakToken, wrapper, this);
+ if (this.hooks) _assertSync(this.hooks.beforeRenderResult.trigger(newBreakToken, wrapper, this), "beforeRenderResult");
return new RenderResult(newBreakToken);
}
- this.hooks && this.hooks.layoutNode.trigger(node);
+ if (this.hooks) _assertSync(this.hooks.layoutNode.trigger(node), "layoutNode");
// Check if the rendered element has a break set
if (hasRenderedContent && this.shouldBreak(node, start)) {
- this.hooks && this.hooks.layout.trigger(wrapper, this);
+ if (this.hooks) _assertSync(this.hooks.layout.trigger(wrapper, this), "layout");
let imgs = wrapper.querySelectorAll("img");
if (imgs.length) {
- await this.waitForImages(imgs);
+ this.waitForImages(imgs);
}
newBreakToken = this.findBreakToken(wrapper, source, bounds, prevBreakToken);
@@ -1611,7 +1534,7 @@
}
if (this.forceRenderBreak) {
- this.hooks && this.hooks.layout.trigger(wrapper, this);
+ if (this.hooks) _assertSync(this.hooks.layout.trigger(wrapper, this), "layout");
newBreakToken = this.findBreakToken(wrapper, source, bounds, prevBreakToken);
@@ -1630,11 +1553,11 @@
// Only check overflow once per maxChars of new content.
if (length - lengthAtLastCheck >= this.maxChars) {
- this.hooks && this.hooks.layout.trigger(wrapper, this);
+ if (this.hooks) _assertSync(this.hooks.layout.trigger(wrapper, this), "layout");
let imgs = wrapper.querySelectorAll("img");
if (imgs.length) {
- await this.waitForImages(imgs);
+ this.waitForImages(imgs);
}
newBreakToken = this.findBreakToken(wrapper, source, bounds, prevBreakToken);
@@ -1653,7 +1576,7 @@
if (after) {
newBreakToken = new BreakToken(after);
} else {
- this.hooks && this.hooks.beforeRenderResult.trigger(undefined, wrapper, this);
+ if (this.hooks) _assertSync(this.hooks.beforeRenderResult.trigger(undefined, wrapper, this), "beforeRenderResult");
return new RenderResult(undefined, new OverflowContentError("Unable to layout item", [node]));
}
}
@@ -1661,7 +1584,7 @@
}
- this.hooks && this.hooks.beforeRenderResult.trigger(newBreakToken, wrapper, this);
+ if (this.hooks) _assertSync(this.hooks.beforeRenderResult.trigger(newBreakToken, wrapper, this), "beforeRenderResult");
return new RenderResult(newBreakToken);
}
@@ -1671,7 +1594,7 @@
offset
);
let breakHooks = this.hooks.onBreakToken.triggerSync(newBreakToken, undefined, node, this);
- breakHooks.forEach((newToken) => {
+ if (breakHooks) breakHooks.forEach((newToken) => {
if (typeof newToken != "undefined") {
newBreakToken = newToken;
}
@@ -1715,7 +1638,16 @@
let clone = cloneNode(node, !shallow);
if (node.parentNode && isElement(node.parentNode)) {
- let parent = findElement(node.parentNode, dest);
+ const srcParent = node.parentNode;
+ // [PATCH: parent-lookup-cache] Consecutive sibling appends
+ // share the same source parent; reuse the prior result
+ // instead of walking dest.indexOfRefs again.
+ let parent;
+ if (srcParent === this._lastSrcParent && dest === this._lastDest) {
+ parent = this._lastDestParent;
+ } else {
+ parent = findElement(srcParent, dest);
+ }
// Rebuild chain
if (parent) {
parent.appendChild(clone);
@@ -1747,20 +1679,38 @@
dest.appendChild(clone);
}
+ // [PATCH: parent-lookup-cache] Cache the resolved (or
+ // rebuilt-and-attached) parent so the next sibling can
+ // skip the lookup. Skip on the no-rebuild fall-through
+ // where parent stayed null -- a later call with the same
+ // srcParent should still attempt the lookup.
+ if (parent) {
+ this._lastSrcParent = srcParent;
+ this._lastDest = dest;
+ this._lastDestParent = parent;
+ }
} else {
dest.appendChild(clone);
}
- if (clone.dataset && clone.dataset.ref) {
+ // [PATCH: append-ref-local] Cache clone.dataset.ref in a
+ // local. Each .ref access goes through getAttribute and
+ // allocates a fresh JS string; the existence check + dict
+ // write were two reads of the same value. Saves one string
+ // allocation per ~50k append calls on the book (~1.5 MB
+ // heap per paired heap-sampling A/B at 512 B sampling).
+ // Same shape as PATCH: addRefs-uuid-local above.
+ const ref = clone.dataset && clone.dataset.ref;
+ if (ref) {
if (!dest.indexOfRefs) {
dest.indexOfRefs = {};
}
- dest.indexOfRefs[clone.dataset.ref] = clone;
+ dest.indexOfRefs[ref] = clone;
}
let nodeHooks = this.hooks.renderNode.triggerSync(clone, node, this);
- nodeHooks.forEach((newNode) => {
+ if (nodeHooks) nodeHooks.forEach((newNode) => {
if (typeof newNode != "undefined") {
clone = newNode;
}
@@ -1786,29 +1736,27 @@
}
}
- async waitForImages(imgs) {
- let results = Array.from(imgs).map(async (img) => {
- return this.awaitImageLoaded(img);
- });
- await Promise.all(results);
- }
-
- async awaitImageLoaded(image) {
- return new Promise(resolve => {
- if (image.complete !== true) {
- image.onload = function () {
- let {width, height} = window.getComputedStyle(image);
- resolve(width, height);
- };
- image.onerror = function (e) {
- let {width, height} = window.getComputedStyle(image);
- resolve(width, height, e);
- };
- } else {
- let {width, height} = window.getComputedStyle(image);
- resolve(width, height);
+ // [PATCH: sync-chain] waitForImages used to wrap every image in
+ // `new Promise(resolve => ...)` and await `Promise.all(...)`, so
+ // `renderTo` was forced to be async even when every image was
+ // already loaded (which is our case -- page.goto(url, {
+ // waitUntil: "load" }) settles before paged.js starts rendering).
+ //
+ // In our headless pipeline image.complete is always true at this
+ // point. If a future caller hits this with a not-yet-loaded
+ // image, that's a pipeline bug and we throw immediately rather
+ // than silently making the rest of the layout chain async again.
+ waitForImages(imgs) {
+ for (const img of imgs) {
+ if (img.complete !== true) {
+ throw new Error(
+ "paged.js (forked): image not loaded at render time. " +
+ "This branch dropped async image-loading support; the " +
+ "render pipeline must finish loading all images before " +
+ "calling paged.js. Image: " + (img.src || img.outerHTML)
+ );
}
- });
+ }
}
avoidBreakInside(node, limiter) {
@@ -1927,7 +1875,7 @@
let breakToken, breakLetter;
let overflowHooks = this.hooks.onOverflow.triggerSync(overflow, rendered, bounds, this);
- overflowHooks.forEach((newOverflow) => {
+ if (overflowHooks) overflowHooks.forEach((newOverflow) => {
if (typeof newOverflow != "undefined") {
overflow = newOverflow;
}
@@ -1937,7 +1885,7 @@
breakToken = this.createBreakToken(overflow, rendered, source);
// breakToken is nullable
let breakHooks = this.hooks.onBreakToken.triggerSync(breakToken, overflow, rendered, this);
- breakHooks.forEach((newToken) => {
+ if (breakHooks) breakHooks.forEach((newToken) => {
if (typeof newToken != "undefined") {
breakToken = newToken;
}
@@ -1956,7 +1904,12 @@
if (breakToken && breakToken.node && extract) {
let removed = this.removeOverflow(overflow, breakLetter);
- this.hooks && this.hooks.afterOverflowRemoved.trigger(removed, rendered, this);
+ // [PATCH: assert-sync] Guard against silent async-handler
+ // drop. Upstream fired the trigger without `await`, so any
+ // async handler's work would have been lost. _assertSync
+ // throws instead if a handler returns a thenable -- the
+ // fork's per-page hot path is synchronous, see Hook.trigger.
+ if (this.hooks) _assertSync(this.hooks.afterOverflowRemoved.trigger(removed, rendered, this), "afterOverflowRemoved");
}
}
@@ -1995,7 +1948,14 @@
br = undefined;
if (node) {
- let pos = getBoundingClientRect(node);
+ let pos;
+ if (node.nodeType === 1) {
+ pos = node.getBoundingClientRect();
+ } else {
+ let range = document.createRange();
+ range.selectNode(node);
+ pos = range.getBoundingClientRect();
+ }
let left = Math.round(pos.left);
let right = Math.floor(pos.right);
let top = Math.round(pos.top);
@@ -2092,7 +2052,9 @@
node.textContent.trim().length &&
!breakInsideAvoidParentNode(node.parentNode)) {
- let rects = getClientRects(node);
+ let textRange = document.createRange();
+ textRange.selectNode(node);
+ let rects = textRange.getClientRects();
let rect;
left = 0;
top = 0;
@@ -2199,7 +2161,7 @@
break;
}
- pos = getBoundingClientRect(word);
+ pos = word.getBoundingClientRect();
left = Math.floor(pos.left);
right = Math.floor(pos.right);
@@ -2224,7 +2186,7 @@
break;
}
- pos = getBoundingClientRect(letter);
+ pos = letter.getBoundingClientRect();
left = Math.floor(pos.left);
top = Math.floor(pos.top);
@@ -2244,7 +2206,25 @@
removeOverflow(overflow, breakLetter) {
let {startContainer} = overflow;
- let extracted = overflow.extractContents();
+
+ // [PATCH: extract-vs-delete] Range.extractContents() builds a
+ // DocumentFragment of the removed nodes and reattaches them;
+ // Range.deleteContents() just removes. The only consumer of
+ // the returned fragment is Footnotes.afterOverflowRemoved,
+ // which iterates the rendered area's footnotes and for each
+ // looks up its [data-footnote-call=...] in the removed fragment.
+ // So extractContents is only useful if the rendered area
+ // contained any footnote-call elements. Check via a cheap
+ // querySelector on `this.element` (the page content area --
+ // `.pagedjs_page_content`); when no calls are present we
+ // skip the fragment build entirely.
+ let extracted;
+ if (this.element && this.element.querySelector("[data-footnote-call]")) {
+ extracted = overflow.extractContents();
+ } else {
+ overflow.deleteContents();
+ extracted = null;
+ }
this.hyphenateAtBreak(startContainer, breakLetter);
@@ -2403,20 +2383,33 @@
}
*/
- async layout(contents, breakToken, maxChars) {
+ // [PATCH: sync-chain] page.layout / append no longer await
+ // renderTo (which is now sync). Removing `async` removes the
+ // Promise allocation around each return.
+ layout(contents, breakToken, maxChars) {
this.clear();
this.startToken = breakToken;
let settings = this.settings;
- if (!settings.maxChars && maxChars) {
+ // [PATCH: maxChars-propagate] Upstream gated this on
+ // `!settings.maxChars`, which froze the chunker's running
+ // estimate at whatever value the first non-empty page produced.
+ // On a book whose first real page happens to be short, the
+ // estimate locked in tiny (e.g. 177 chars) and every later page
+ // fell back to checking overflow every 177 chars of new
+ // content -- ~5 hasOverflow / gBCR layout flushes per page on
+ // average, where 1-2 would suffice. Always propagate so each
+ // page picks up the most recent estimate; the chunker's
+ // recordCharLength still drives that value.
+ if (maxChars) {
settings.maxChars = maxChars;
}
this.layoutMethod = new Layout(this.area, this.hooks, settings);
- let renderResult = await this.layoutMethod.renderTo(this.wrapper, contents, breakToken);
+ let renderResult = this.layoutMethod.renderTo(this.wrapper, contents, breakToken);
let newBreakToken = renderResult.breakToken;
this.addListeners(contents);
@@ -2426,13 +2419,13 @@
return newBreakToken;
}
- async append(contents, breakToken) {
+ append(contents, breakToken) {
if (!this.layoutMethod) {
return this.layout(contents, breakToken);
}
- let renderResult = await this.layoutMethod.renderTo(this.wrapper, contents, breakToken);
+ let renderResult = this.layoutMethod.renderTo(this.wrapper, contents, breakToken);
let newBreakToken = renderResult.breakToken;
this.endToken = newBreakToken;
@@ -2465,14 +2458,7 @@
}
addListeners(contents) {
- if (typeof ResizeObserver !== "undefined") {
- this.addResizeObserver(contents);
- } else {
- this._checkOverflowAfterResize = this.checkOverflowAfterResize.bind(this, contents);
- this.element.addEventListener("overflow", this._checkOverflowAfterResize, false);
- this.element.addEventListener("underflow", this._checkOverflowAfterResize, false);
- }
- // TODO: fall back to mutation observer?
+ this.addResizeObserver(contents);
this._onScroll = function () {
if (this.listening) {
@@ -2491,11 +2477,8 @@
removeListeners() {
this.listening = false;
- if (typeof ResizeObserver !== "undefined" && this.ro) {
+ if (this.ro) {
this.ro.disconnect();
- } else if (this.element) {
- this.element.removeEventListener("overflow", this._checkOverflowAfterResize, false);
- this.element.removeEventListener("underflow", this._checkOverflowAfterResize, false);
}
this.element && this.element.removeEventListener("scroll", this._onScroll);
@@ -2620,13 +2603,40 @@
// which scans the entire source DOM (thousands of nodes). Measured
// as 848 + 42 noDict calls in createBreakToken ≈ 1+ s of render on
// the 1651-page book.
- if (!content.indexOfRefs) content.indexOfRefs = {};
+ //
+ // [PATCH: source-indexOfRefs-array] Use an Array (dense, sequential
+ // integer keys via the decimal UUID counter -- see UUID()) instead
+ // of a dict. V8 stores it as PACKED_ELEMENTS: ~8 B per slot vs
+ // ~40-50 B per dict entry. dest/fragment.indexOfRefs (sparse) stay
+ // dicts at their own init sites. `findRef` does `arr[ref]` either
+ // way -- V8 coerces the decimal-string ref to an array index
+ // transparently, so no caller-side branch is needed.
+ //
+ // [PATCH: source-indexOfRefs-presize] Size the array up front
+ // from the live HTMLCollection's .length. V8 grows arrays
+ // geometrically -- writing slots 1..N via doubling does
+ // log2(N) backing-store reallocations, each allocating the
+ // new store and orphaning the old (transient bytes ~= 2x the
+ // final size). Pre-sizing skips all of that.
+ if (!content.indexOfRefs) {
+ const elementCount = content.getElementsByTagName ? content.getElementsByTagName("*").length : 0;
+ content.indexOfRefs = new Array(elementCount + 1);
+ }
let node = treeWalker.nextNode();
while(node) {
- if (!node.hasAttribute("data-ref")) {
- let uuid = UUID();
+ // [PATCH: addRefs-uuid-local] Read data-ref once via
+ // getAttribute (null-tested as the existence check),
+ // reuse the local string for indexOfRefs. Previously
+ // hasAttribute + setAttribute + getAttribute on the
+ // new-uuid branch caused one extra DOM read and one
+ // duplicate string allocation per ~50k source nodes
+ // (~460 KB heap on the book per paired heap-sampling
+ // A/B at 4 KB sampling).
+ let uuid = node.getAttribute("data-ref");
+ if (!uuid) {
+ uuid = UUIDDecimal();
node.setAttribute("data-ref", uuid);
}
@@ -2638,8 +2648,7 @@
// node.setAttribute("data-text", node.textContent.trim().length);
- // [PATCH: findRef fast-path] record after data-ref is guaranteed.
- content.indexOfRefs[node.getAttribute("data-ref")] = node;
+ content.indexOfRefs[uuid] = node;
node = treeWalker.nextNode();
}
@@ -2664,7 +2673,13 @@
constructor(context){
this._q = [];
this.context = context;
- this.tick = requestAnimationFrame;
+ // [PATCH: queue-tick] Upstream uses requestAnimationFrame as the
+ // per-task tick, which on a headless puppeteer render still waits
+ // per frame even with no compositor. Across 1651 pages that's
+ // ~700 ms of V8 (idle). queueMicrotask schedules on the microtask
+ // queue and fires before the next event-loop iteration, dropping
+ // the per-page wait to microsecond-scale.
+ this.tick = (cb) => queueMicrotask(cb);
this.running = false;
this.paused = false;
}
@@ -3064,14 +3079,20 @@
// }
// }
+ // [PATCH: sync-chain] *layout is a sync generator now, so
+ // renderer.next() returns synchronously -- no per-page await.
+ // render() itself stays `async` because callers (flow()) await
+ // it and other once-per-render awaits in flow() (loadFonts,
+ // beforeParsed / afterParsed / afterRendered) still need it.
async render(parsed, startAt) {
let renderer = this.layout(parsed, startAt);
- let done = false;
let result;
- while (!done) {
- result = await this.q.enqueue(() => { return this.renderAsync(renderer); });
- done = result.done;
+ while (true) {
+ if (this.stopped) return { done: true, canceled: true };
+ result = renderer.next();
+ if (this.stopped) return { done: true, canceled: true };
+ if (result.done) break;
}
return result;
@@ -3087,35 +3108,18 @@
// this.q.clear();
}
- renderOnIdle(renderer) {
- return new Promise(resolve => {
- requestIdleCallback(async () => {
- if (this.stopped) {
- return resolve({ done: true, canceled: true });
- }
- let result = await renderer.next();
- if (this.stopped) {
- resolve({ done: true, canceled: true });
- } else {
- resolve(result);
- }
- });
- });
- }
-
- async renderAsync(renderer) {
- if (this.stopped) {
- return { done: true, canceled: true };
- }
- let result = await renderer.next();
- if (this.stopped) {
- return { done: true, canceled: true };
- } else {
- return result;
- }
- }
+ // [PATCH: sync-chain] renderOnIdle and renderAsync removed --
+ // both wrapped renderer.next() (now sync) in async machinery,
+ // and the only caller (render() via this.q.enqueue) was already
+ // removed in the drop-queue change.
- async handleBreaks(node, force) {
+ // [PATCH: sync-chain] handleBreaks no longer awaits hook triggers
+ // (Hook.trigger returns undefined on the all-sync path, which is
+ // our only path). If a future caller registers an async handler
+ // for any of these hooks, Hook.trigger will return a Promise and
+ // dropping it here will silently lose the work -- we assert that
+ // instead. The `_assertSync` helper lives below.
+ handleBreaks(node, force) {
let currentPage = this.total + 1;
let currentPosition = currentPage % 2 === 0 ? "left" : "right";
// TODO: Recto and Verso should reverse for rtl languages
@@ -3161,52 +3165,62 @@
}
if (page) {
- await this.hooks.beforePageLayout.trigger(page, undefined, undefined, this);
+ _assertSync(this.hooks.beforePageLayout.trigger(page, undefined, undefined, this), "beforePageLayout");
this.emit("page", page);
- // await this.hooks.layout.trigger(page.element, page, undefined, this);
- await this.hooks.afterPageLayout.trigger(page.element, page, undefined, this);
- await this.hooks.finalizePage.trigger(page.element, page, undefined, this);
+ _assertSync(this.hooks.afterPageLayout.trigger(page.element, page, undefined, this), "afterPageLayout");
+ _assertSync(this.hooks.finalizePage.trigger(page.element, page, undefined, this), "finalizePage");
this.emit("renderedPage", page);
}
}
- async *layout(content, startAt) {
+ // [PATCH: sync-chain] *layout is now a sync generator, not an
+ // async generator. With handleBreaks, page.layout, renderTo, and
+ // every per-page hook trigger all synchronous in our pipeline,
+ // nothing inside this generator needs to await. The sync form
+ // avoids ~1651 Promise allocations per render (one per
+ // `renderer.next()` call) and the matching microtask boundaries.
+ *layout(content, startAt) {
let breakToken = startAt || false;
- let tokens = [];
+ // [PATCH: tokens-set] Loop-detection used `tokens.lastIndexOf(...)`
+ // on an array, which scans up to N entries per page -- O(n^2)
+ // across a render. A Set gives O(1) lookup. The absolute saving
+ // on our 1651-page book is small (~80 us per late page) but the
+ // algorithmic shape is the load-bearing change.
+ let tokens = new Set();
while (breakToken !== undefined && (true)) {
if (breakToken && breakToken.node) {
- await this.handleBreaks(breakToken.node);
+ this.handleBreaks(breakToken.node);
} else {
- await this.handleBreaks(content.firstChild);
+ this.handleBreaks(content.firstChild);
}
let page = this.addPage();
- await this.hooks.beforePageLayout.trigger(page, content, breakToken, this);
+ _assertSync(this.hooks.beforePageLayout.trigger(page, content, breakToken, this), "beforePageLayout");
this.emit("page", page);
// Layout content in the page, starting from the breakToken
- breakToken = await page.layout(content, breakToken, this.maxChars);
+ breakToken = page.layout(content, breakToken, this.maxChars);
if (breakToken) {
let newToken = breakToken.toJSON(true);
- if (tokens.lastIndexOf(newToken) > -1) {
+ if (tokens.has(newToken)) {
// loop
let err = new OverflowContentError("Layout repeated", [breakToken.node]);
console.error("Layout repeated at: ", breakToken.node);
return err;
} else {
- tokens.push(newToken);
+ tokens.add(newToken);
}
}
- await this.hooks.afterPageLayout.trigger(page.element, page, breakToken, this);
- await this.hooks.finalizePage.trigger(page.element, page, undefined, this);
+ _assertSync(this.hooks.afterPageLayout.trigger(page.element, page, breakToken, this), "afterPageLayout");
+ _assertSync(this.hooks.finalizePage.trigger(page.element, page, undefined, this), "finalizePage");
this.emit("renderedPage", page);
- this.recoredCharLength(page.wrapper.textContent.length);
+ this.recordCharLength(page.wrapper.textContent.length);
yield breakToken;
@@ -3216,19 +3230,33 @@
}
- recoredCharLength(length) {
+ recordCharLength(length) {
if (length === 0) {
return;
}
this.charsPerBreak.push(length);
- // Keep the length of the last few breaks
- if (this.charsPerBreak.length > 4) {
+ // [PATCH: maxChars-running-max] Upstream tracked the running
+ // average over the last 4 page text-content lengths and used
+ // it as `maxChars`, the renderTo overflow-check period.
+ // Average is the wrong statistic: short pages (chapter ends,
+ // part dividers) get recorded alongside full pages, dragging
+ // the estimate well below true page capacity. The check then
+ // fires several times per full page when one call would have
+ // sufficed -- each call is a hasOverflow / gBCR layout flush.
+ // The running max over a wider window biases toward true
+ // capacity (the largest page recently seen), so overflow
+ // pages typically resolve in a single check.
+ if (this.charsPerBreak.length > 16) {
this.charsPerBreak.shift();
}
- this.maxChars = this.charsPerBreak.reduce((a, b) => a + b, 0) / (this.charsPerBreak.length);
+ let m = 0;
+ for (let i = 0; i < this.charsPerBreak.length; i++) {
+ if (this.charsPerBreak[i] > m) m = this.charsPerBreak[i];
+ }
+ this.maxChars = m;
}
removePages(fromIndex=0) {
@@ -5199,11 +5227,10 @@
};
var MIN_SIZE = 16 * 1024;
- var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported
var adoptBuffer$2 = function adoptBuffer(buffer, size) {
if (buffer === null || buffer.length < size) {
- return new SafeUint32Array(Math.max(size + 1024, MIN_SIZE));
+ return new Uint32Array(Math.max(size + 1024, MIN_SIZE));
}
return buffer;
@@ -10599,10 +10626,6 @@
* http://opensource.org/licenses/BSD-3-Clause
*/
- var util$2 = util$3;
- var has$1 = Object.prototype.hasOwnProperty;
- var hasNativeMap = typeof Map !== "undefined";
-
/**
* A data structure which is a combination of an array and a set. Adding a new
* member is O(1), testing for membership is O(1), and finding the index of an
@@ -10611,7 +10634,7 @@
*/
function ArraySet$1() {
this._array = [];
- this._set = hasNativeMap ? new Map() : Object.create(null);
+ this._set = new Map();
}
/**
@@ -10632,7 +10655,7 @@
* @returns Number
*/
ArraySet$1.prototype.size = function ArraySet_size() {
- return hasNativeMap ? this._set.size : Object.getOwnPropertyNames(this._set).length;
+ return this._set.size;
};
/**
@@ -10641,18 +10664,13 @@
* @param String aStr
*/
ArraySet$1.prototype.add = function ArraySet_add(aStr, aAllowDuplicates) {
- var sStr = hasNativeMap ? aStr : util$2.toSetString(aStr);
- var isDuplicate = hasNativeMap ? this.has(aStr) : has$1.call(this._set, sStr);
+ var isDuplicate = this.has(aStr);
var idx = this._array.length;
if (!isDuplicate || aAllowDuplicates) {
this._array.push(aStr);
}
if (!isDuplicate) {
- if (hasNativeMap) {
- this._set.set(aStr, idx);
- } else {
- this._set[sStr] = idx;
- }
+ this._set.set(aStr, idx);
}
};
@@ -10662,12 +10680,7 @@
* @param String aStr
*/
ArraySet$1.prototype.has = function ArraySet_has(aStr) {
- if (hasNativeMap) {
- return this._set.has(aStr);
- } else {
- var sStr = util$2.toSetString(aStr);
- return has$1.call(this._set, sStr);
- }
+ return this._set.has(aStr);
};
/**
@@ -10676,18 +10689,10 @@
* @param String aStr
*/
ArraySet$1.prototype.indexOf = function ArraySet_indexOf(aStr) {
- if (hasNativeMap) {
- var idx = this._set.get(aStr);
- if (idx >= 0) {
- return idx;
- }
- } else {
- var sStr = util$2.toSetString(aStr);
- if (has$1.call(this._set, sStr)) {
- return this._set[sStr];
- }
+ var idx = this._set.get(aStr);
+ if (idx >= 0) {
+ return idx;
}
-
throw new Error('"' + aStr + '" is not in the set.');
};
@@ -26517,10 +26522,6 @@
// Replace urls
this.replaceUrls(this.ast);
- // Scope
- this.id = UUID();
- // this.addScope(this.ast, this.uuid);
-
// Replace IDs with data-id
this.replaceIds(this.ast);
@@ -26551,7 +26552,7 @@
csstree.walk(ast, {
visit: "Url",
enter: (node, item, list) => {
- this.hooks.onUrl.trigger(node, item, list);
+ _assertSync(this.hooks.onUrl.trigger(node, item, list), "onUrl");
}
});
}
@@ -26563,17 +26564,17 @@
const basename = csstree.keyword(node.name).basename;
if (basename === "page") {
- this.hooks.onAtPage.trigger(node, item, list);
+ _assertSync(this.hooks.onAtPage.trigger(node, item, list), "onAtPage");
this.declarations(node, item, list);
}
if (basename === "media") {
- this.hooks.onAtMedia.trigger(node, item, list);
+ _assertSync(this.hooks.onAtMedia.trigger(node, item, list), "onAtMedia");
this.declarations(node, item, list);
}
if (basename === "import") {
- this.hooks.onImport.trigger(node, item, list);
+ _assertSync(this.hooks.onImport.trigger(node, item, list), "onImport");
this.imports(node, item, list);
}
}
@@ -26586,7 +26587,7 @@
visit: "Rule",
enter: (ruleNode, ruleItem, rulelist) => {
- this.hooks.onRule.trigger(ruleNode, ruleItem, rulelist);
+ _assertSync(this.hooks.onRule.trigger(ruleNode, ruleItem, rulelist), "onRule");
this.declarations(ruleNode, ruleItem, rulelist);
this.onSelector(ruleNode, ruleItem, rulelist);
@@ -26599,13 +26600,13 @@
visit: "Declaration",
enter: (declarationNode, dItem, dList) => {
- this.hooks.onDeclaration.trigger(declarationNode, dItem, dList, {ruleNode, ruleItem, rulelist});
+ _assertSync(this.hooks.onDeclaration.trigger(declarationNode, dItem, dList, {ruleNode, ruleItem, rulelist}), "onDeclaration");
if (declarationNode.property === "content") {
csstree.walk(declarationNode, {
visit: "Function",
enter: (funcNode, fItem, fList) => {
- this.hooks.onContent.trigger(funcNode, fItem, fList, {declarationNode, dItem, dList}, {ruleNode, ruleItem, rulelist});
+ _assertSync(this.hooks.onContent.trigger(funcNode, fItem, fList, {declarationNode, dItem, dList}, {ruleNode, ruleItem, rulelist}), "onContent");
}
});
}
@@ -26619,13 +26620,13 @@
csstree.walk(ruleNode, {
visit: "Selector",
enter: (selectNode, selectItem, selectList) => {
- this.hooks.onSelector.trigger(selectNode, selectItem, selectList, {ruleNode, ruleItem, rulelist});
+ _assertSync(this.hooks.onSelector.trigger(selectNode, selectItem, selectList, {ruleNode, ruleItem, rulelist}), "onSelector");
if (selectNode.children.forEach(node => {if (node.type === "PseudoElementSelector") {
csstree.walk(node, {
visit: "PseudoElementSelector",
enter: (pseudoNode, pItem, pList) => {
- this.hooks.onPseudoSelector.trigger(pseudoNode, pItem, pList, {selectNode, selectItem, selectList}, {ruleNode, ruleItem, rulelist});
+ _assertSync(this.hooks.onPseudoSelector.trigger(pseudoNode, pItem, pList, {selectNode, selectItem, selectList}, {ruleNode, ruleItem, rulelist}), "onPseudoSelector");
}
});
}}));
@@ -27637,13 +27638,41 @@
this.polisher = polisher;
this.caller = caller;
+ // [PATCH: handler-self-disable] Track each (hook, bound) pair we
+ // register so handlers that find nothing to do for a given render
+ // can splice themselves back out. Footnotes uses this to disappear
+ // when the document and CSS produced no footnote-marked nodes;
+ // combined with Hook.trigger/triggerSync's empty-handlers fast
+ // path, the per-page and per-node dispatches then short-circuit.
+ this._registered = {};
+
for (let name in hooks) {
if (name in this) {
let hook = hooks[name];
- hook.register(this[name].bind(this));
+ let bound = this[name].bind(this);
+ this._registered[name] = { hook, bound };
+ hook.register(bound);
}
}
}
+
+ /**
+ * Remove this handler's registered callbacks from every hook it
+ * subscribed to. Pass the name of the hook the caller is currently
+ * inside (e.g. `"afterParsed"`) to skip its own entry -- splicing
+ * the array we're iterating would cause the surrounding `trigger()`
+ * loop to skip a sibling handler. The skipped entry is harmless on
+ * one-shot hooks; on recurring hooks the caller can re-call later.
+ */
+ _unregisterAll(except) {
+ for (const name in this._registered) {
+ if (name === except) continue;
+ const { hook, bound } = this._registered[name];
+ const idx = hook.hooks.indexOf(bound);
+ if (idx >= 0) hook.hooks.splice(idx, 1);
+ delete this._registered[name];
+ }
+ }
}
EventEmitter(Handler.prototype);
@@ -31281,6 +31310,18 @@
afterParsed(parsed) {
this.processFootnotes(parsed, this.footnotes);
+
+ // [PATCH: footnotes-self-disable] If neither source HTML nor CSS
+ // `float: footnote` rules produced any footnote-marked nodes, the
+ // remaining hooks (renderNode per element-node, afterPageLayout +
+ // beforePageLayout + afterOverflowRemoved per page) have nothing
+ // to do for the rest of this render. Unregister them so the
+ // empty-handlers fast-path in Hook.triggerSync short-circuits.
+ // afterParsed itself is skipped via `except` -- it's a one-shot
+ // and the surrounding trigger() loop is still iterating it.
+ if (!parsed.querySelector("[data-note='footnote']")) {
+ this._unregisterAll("afterParsed");
+ }
}
processFootnotes(parsed, notes) {
@@ -31336,7 +31377,12 @@
if (node.dataset.note === "footnote") {
notes = [node];
- } else if (node.dataset.hasNotes || node.querySelectorAll("[data-note='footnote']")) {
+ } else if (node.dataset.hasNotes) {
+ // Upstream wrote `|| node.querySelectorAll(...)` here, but a
+ // NodeList is always truthy (even empty), so the right arm
+ // of the || always ran and the next line ran querySelectorAll
+ // again -- two subtree scans per element-node clone for any
+ // document that doesn't use data-note='footnote'.
notes = node.querySelectorAll("[data-note='footnote']");
}
@@ -31642,8 +31688,14 @@
let notes = area.querySelectorAll(".pagedjs_footnote_area [data-note='footnote']");
for (let n = 0; n < notes.length; n++) {
const note = notes[n];
- // Check if the call for that footnote has been removed with the overflow
- let call = removed.querySelector(`[data-footnote-call="${note.dataset.ref}"]`);
+ // [PATCH: extract-vs-delete] Guard `removed` access -- when
+ // removeOverflow took the deleteContents fast path (no
+ // footnotes in the rendered area), `removed` is null. In
+ // that case there are no rendered footnotes for the loop
+ // to iterate either, so we never actually enter this body.
+ // The guard is for future content where the area DOES have
+ // rendered footnotes but removeOverflow's pre-check changes.
+ let call = removed && removed.querySelector(`[data-footnote-call="${note.dataset.ref}"]`);
if (call) {
note.remove();
}
@@ -32725,599 +32777,21 @@
UndisplayedFilter
];
- var isImplemented$4 = function () {
- var from = Array.from, arr, result;
- if (typeof from !== "function") return false;
- arr = ["raz", "dwa"];
- result = from(arr);
- return Boolean(result && result !== arr && result[1] === "dwa");
- };
-
- var isImplemented$3;
- var hasRequiredIsImplemented$1;
-
- function requireIsImplemented$1 () {
- if (hasRequiredIsImplemented$1) return isImplemented$3;
- hasRequiredIsImplemented$1 = 1;
-
- isImplemented$3 = function () {
- if (typeof globalThis !== "object") return false;
- if (!globalThis) return false;
- return globalThis.Array === Array;
- };
- return isImplemented$3;
- }
-
- var implementation;
- var hasRequiredImplementation;
-
- function requireImplementation () {
- if (hasRequiredImplementation) return implementation;
- hasRequiredImplementation = 1;
- var naiveFallback = function () {
- if (typeof self === "object" && self) return self;
- if (typeof window === "object" && window) return window;
- throw new Error("Unable to resolve global `this`");
- };
-
- implementation = (function () {
- if (this) return this;
-
- // Unexpected strict mode (may happen if e.g. bundled into ESM module)
-
- // Thanks @mathiasbynens -> https://mathiasbynens.be/notes/globalthis
- // In all ES5+ engines global object inherits from Object.prototype
- // (if you approached one that doesn't please report)
- try {
- Object.defineProperty(Object.prototype, "__global__", {
- get: function () { return this; },
- configurable: true
- });
- } catch (error) {
- // Unfortunate case of Object.prototype being sealed (via preventExtensions, seal or freeze)
- return naiveFallback();
- }
- try {
- // Safari case (window.__global__ is resolved with global context, but __global__ does not)
- if (!__global__) return naiveFallback();
- return __global__;
- } finally {
- delete Object.prototype.__global__;
- }
- })();
- return implementation;
- }
-
- var globalThis_1;
- var hasRequiredGlobalThis;
-
- function requireGlobalThis () {
- if (hasRequiredGlobalThis) return globalThis_1;
- hasRequiredGlobalThis = 1;
-
- globalThis_1 = requireIsImplemented$1()() ? globalThis : requireImplementation();
- return globalThis_1;
- }
-
- var isImplemented$2;
- var hasRequiredIsImplemented;
-
- function requireIsImplemented () {
- if (hasRequiredIsImplemented) return isImplemented$2;
- hasRequiredIsImplemented = 1;
-
- var global = requireGlobalThis()
- , validTypes = { object: true, symbol: true };
-
- isImplemented$2 = function () {
- var Symbol = global.Symbol;
- var symbol;
- if (typeof Symbol !== "function") return false;
- symbol = Symbol("test symbol");
- try { String(symbol); }
- catch (e) { return false; }
-
- // Return 'true' also for polyfills
- if (!validTypes[typeof Symbol.iterator]) return false;
- if (!validTypes[typeof Symbol.toPrimitive]) return false;
- if (!validTypes[typeof Symbol.toStringTag]) return false;
-
- return true;
- };
- return isImplemented$2;
- }
-
- var isSymbol;
- var hasRequiredIsSymbol;
-
- function requireIsSymbol () {
- if (hasRequiredIsSymbol) return isSymbol;
- hasRequiredIsSymbol = 1;
-
- isSymbol = function (value) {
- if (!value) return false;
- if (typeof value === "symbol") return true;
- if (!value.constructor) return false;
- if (value.constructor.name !== "Symbol") return false;
- return value[value.constructor.toStringTag] === "Symbol";
- };
- return isSymbol;
- }
-
- var validateSymbol;
- var hasRequiredValidateSymbol;
-
- function requireValidateSymbol () {
- if (hasRequiredValidateSymbol) return validateSymbol;
- hasRequiredValidateSymbol = 1;
-
- var isSymbol = requireIsSymbol();
-
- validateSymbol = function (value) {
- if (!isSymbol(value)) throw new TypeError(value + " is not a symbol");
- return value;
- };
- return validateSymbol;
- }
-
- var generateName;
- var hasRequiredGenerateName;
-
- function requireGenerateName () {
- if (hasRequiredGenerateName) return generateName;
- hasRequiredGenerateName = 1;
-
- var d = dExports;
-
- var create = Object.create, defineProperty = Object.defineProperty, objPrototype = Object.prototype;
-
- var created = create(null);
- generateName = function (desc) {
- var postfix = 0, name, ie11BugWorkaround;
- while (created[desc + (postfix || "")]) ++postfix;
- desc += postfix || "";
- created[desc] = true;
- name = "@@" + desc;
- defineProperty(
- objPrototype,
- name,
- d.gs(null, function (value) {
- // For IE11 issue see:
- // https://connect.microsoft.com/IE/feedbackdetail/view/1928508/
- // ie11-broken-getters-on-dom-objects
- // https://github.com/medikoo/es6-symbol/issues/12
- if (ie11BugWorkaround) return;
- ie11BugWorkaround = true;
- defineProperty(this, name, d(value));
- ie11BugWorkaround = false;
- })
- );
- return name;
- };
- return generateName;
- }
-
- var standardSymbols;
- var hasRequiredStandardSymbols;
-
- function requireStandardSymbols () {
- if (hasRequiredStandardSymbols) return standardSymbols;
- hasRequiredStandardSymbols = 1;
-
- var d = dExports
- , NativeSymbol = requireGlobalThis().Symbol;
-
- standardSymbols = function (SymbolPolyfill) {
- return Object.defineProperties(SymbolPolyfill, {
- // To ensure proper interoperability with other native functions (e.g. Array.from)
- // fallback to eventual native implementation of given symbol
- hasInstance: d(
- "", (NativeSymbol && NativeSymbol.hasInstance) || SymbolPolyfill("hasInstance")
- ),
- isConcatSpreadable: d(
- "",
- (NativeSymbol && NativeSymbol.isConcatSpreadable) ||
- SymbolPolyfill("isConcatSpreadable")
- ),
- iterator: d("", (NativeSymbol && NativeSymbol.iterator) || SymbolPolyfill("iterator")),
- match: d("", (NativeSymbol && NativeSymbol.match) || SymbolPolyfill("match")),
- replace: d("", (NativeSymbol && NativeSymbol.replace) || SymbolPolyfill("replace")),
- search: d("", (NativeSymbol && NativeSymbol.search) || SymbolPolyfill("search")),
- species: d("", (NativeSymbol && NativeSymbol.species) || SymbolPolyfill("species")),
- split: d("", (NativeSymbol && NativeSymbol.split) || SymbolPolyfill("split")),
- toPrimitive: d(
- "", (NativeSymbol && NativeSymbol.toPrimitive) || SymbolPolyfill("toPrimitive")
- ),
- toStringTag: d(
- "", (NativeSymbol && NativeSymbol.toStringTag) || SymbolPolyfill("toStringTag")
- ),
- unscopables: d(
- "", (NativeSymbol && NativeSymbol.unscopables) || SymbolPolyfill("unscopables")
- )
- });
- };
- return standardSymbols;
- }
-
- var symbolRegistry;
- var hasRequiredSymbolRegistry;
-
- function requireSymbolRegistry () {
- if (hasRequiredSymbolRegistry) return symbolRegistry;
- hasRequiredSymbolRegistry = 1;
-
- var d = dExports
- , validateSymbol = requireValidateSymbol();
-
- var registry = Object.create(null);
-
- symbolRegistry = function (SymbolPolyfill) {
- return Object.defineProperties(SymbolPolyfill, {
- for: d(function (key) {
- if (registry[key]) return registry[key];
- return (registry[key] = SymbolPolyfill(String(key)));
- }),
- keyFor: d(function (symbol) {
- var key;
- validateSymbol(symbol);
- for (key in registry) {
- if (registry[key] === symbol) return key;
- }
- return undefined;
- })
- });
- };
- return symbolRegistry;
- }
-
- var polyfill;
- var hasRequiredPolyfill;
-
- function requirePolyfill () {
- if (hasRequiredPolyfill) return polyfill;
- hasRequiredPolyfill = 1;
-
- var d = dExports
- , validateSymbol = requireValidateSymbol()
- , NativeSymbol = requireGlobalThis().Symbol
- , generateName = requireGenerateName()
- , setupStandardSymbols = requireStandardSymbols()
- , setupSymbolRegistry = requireSymbolRegistry();
-
- var create = Object.create
- , defineProperties = Object.defineProperties
- , defineProperty = Object.defineProperty;
-
- var SymbolPolyfill, HiddenSymbol, isNativeSafe;
-
- if (typeof NativeSymbol === "function") {
- try {
- String(NativeSymbol());
- isNativeSafe = true;
- } catch (ignore) {}
- } else {
- NativeSymbol = null;
- }
-
- // Internal constructor (not one exposed) for creating Symbol instances.
- // This one is used to ensure that `someSymbol instanceof Symbol` always return false
- HiddenSymbol = function Symbol(description) {
- if (this instanceof HiddenSymbol) throw new TypeError("Symbol is not a constructor");
- return SymbolPolyfill(description);
- };
-
- // Exposed `Symbol` constructor
- // (returns instances of HiddenSymbol)
- polyfill = SymbolPolyfill = function Symbol(description) {
- var symbol;
- if (this instanceof Symbol) throw new TypeError("Symbol is not a constructor");
- if (isNativeSafe) return NativeSymbol(description);
- symbol = create(HiddenSymbol.prototype);
- description = description === undefined ? "" : String(description);
- return defineProperties(symbol, {
- __description__: d("", description),
- __name__: d("", generateName(description))
- });
- };
-
- setupStandardSymbols(SymbolPolyfill);
- setupSymbolRegistry(SymbolPolyfill);
-
- // Internal tweaks for real symbol producer
- defineProperties(HiddenSymbol.prototype, {
- constructor: d(SymbolPolyfill),
- toString: d("", function () { return this.__name__; })
- });
-
- // Proper implementation of methods exposed on Symbol.prototype
- // They won't be accessible on produced symbol instances as they derive from HiddenSymbol.prototype
- defineProperties(SymbolPolyfill.prototype, {
- toString: d(function () { return "Symbol (" + validateSymbol(this).__description__ + ")"; }),
- valueOf: d(function () { return validateSymbol(this); })
- });
- defineProperty(
- SymbolPolyfill.prototype,
- SymbolPolyfill.toPrimitive,
- d("", function () {
- var symbol = validateSymbol(this);
- if (typeof symbol === "symbol") return symbol;
- return symbol.toString();
- })
- );
- defineProperty(SymbolPolyfill.prototype, SymbolPolyfill.toStringTag, d("c", "Symbol"));
-
- // Proper implementaton of toPrimitive and toStringTag for returned symbol instances
- defineProperty(
- HiddenSymbol.prototype, SymbolPolyfill.toStringTag,
- d("c", SymbolPolyfill.prototype[SymbolPolyfill.toStringTag])
- );
-
- // Note: It's important to define `toPrimitive` as last one, as some implementations
- // implement `toPrimitive` natively without implementing `toStringTag` (or other specified symbols)
- // And that may invoke error in definition flow:
- // See: https://github.com/medikoo/es6-symbol/issues/13#issuecomment-164146149
- defineProperty(
- HiddenSymbol.prototype, SymbolPolyfill.toPrimitive,
- d("c", SymbolPolyfill.prototype[SymbolPolyfill.toPrimitive])
- );
- return polyfill;
- }
-
- var es6Symbol;
- var hasRequiredEs6Symbol;
-
- function requireEs6Symbol () {
- if (hasRequiredEs6Symbol) return es6Symbol;
- hasRequiredEs6Symbol = 1;
-
- es6Symbol = requireIsImplemented()()
- ? requireGlobalThis().Symbol
- : requirePolyfill();
- return es6Symbol;
- }
-
- var isArguments;
- var hasRequiredIsArguments;
-
- function requireIsArguments () {
- if (hasRequiredIsArguments) return isArguments;
- hasRequiredIsArguments = 1;
-
- var objToString = Object.prototype.toString
- , id = objToString.call((function () { return arguments; })());
-
- isArguments = function (value) { return objToString.call(value) === id; };
- return isArguments;
- }
-
- var isFunction;
- var hasRequiredIsFunction;
-
- function requireIsFunction () {
- if (hasRequiredIsFunction) return isFunction;
- hasRequiredIsFunction = 1;
-
- var objToString = Object.prototype.toString
- , isFunctionStringTag = RegExp.prototype.test.bind(/^[object [A-Za-z0-9]*Function]$/);
-
- isFunction = function (value) {
- return typeof value === "function" && isFunctionStringTag(objToString.call(value));
- };
- return isFunction;
- }
-
- var isImplemented$1 = function () {
- var sign = Math.sign;
- if (typeof sign !== "function") return false;
- return sign(10) === 1 && sign(-20) === -1;
- };
-
- var shim$2;
- var hasRequiredShim$2;
-
- function requireShim$2 () {
- if (hasRequiredShim$2) return shim$2;
- hasRequiredShim$2 = 1;
-
- shim$2 = function (value) {
- value = Number(value);
- if (isNaN(value) || value === 0) return value;
- return value > 0 ? 1 : -1;
- };
- return shim$2;
- }
-
- var sign$1 = isImplemented$1() ? Math.sign : requireShim$2();
-
- var sign = sign$1
- , abs$1 = Math.abs
+ var abs$1 = Math.abs
, floor$1 = Math.floor;
- var toInteger$1 = function (value) {
+ var toInteger = function (value) {
if (isNaN(value)) return 0;
value = Number(value);
if (value === 0 || !isFinite(value)) return value;
- return sign(value) * floor$1(abs$1(value));
+ return Math.sign(value) * floor$1(abs$1(value));
};
- var toInteger = toInteger$1
- , max = Math.max;
+ var max = Math.max;
var toPosInteger = function (value) { return max(0, toInteger(value)); };
- var isString;
- var hasRequiredIsString;
-
- function requireIsString () {
- if (hasRequiredIsString) return isString;
- hasRequiredIsString = 1;
-
- var objToString = Object.prototype.toString, id = objToString.call("");
-
- isString = function (value) {
- return (
- typeof value === "string" ||
- (value &&
- typeof value === "object" &&
- (value instanceof String || objToString.call(value) === id)) ||
- false
- );
- };
- return isString;
- }
-
- var shim$1;
- var hasRequiredShim$1;
-
- function requireShim$1 () {
- if (hasRequiredShim$1) return shim$1;
- hasRequiredShim$1 = 1;
-
- var iteratorSymbol = requireEs6Symbol().iterator
- , isArguments = requireIsArguments()
- , isFunction = requireIsFunction()
- , toPosInt = toPosInteger
- , callable = validCallable
- , validValue$1 = validValue
- , isValue = isValue$4
- , isString = requireIsString()
- , isArray = Array.isArray
- , call = Function.prototype.call
- , desc = { configurable: true, enumerable: true, writable: true, value: null }
- , defineProperty = Object.defineProperty;
-
- // eslint-disable-next-line complexity, max-lines-per-function
- shim$1 = function (arrayLike /*, mapFn, thisArg*/) {
- var mapFn = arguments[1]
- , thisArg = arguments[2]
- , Context
- , i
- , j
- , arr
- , length
- , code
- , iterator
- , result
- , getIterator
- , value;
-
- arrayLike = Object(validValue$1(arrayLike));
-
- if (isValue(mapFn)) callable(mapFn);
- if (!this || this === Array || !isFunction(this)) {
- // Result: Plain array
- if (!mapFn) {
- if (isArguments(arrayLike)) {
- // Source: Arguments
- length = arrayLike.length;
- if (length !== 1) return Array.apply(null, arrayLike);
- arr = new Array(1);
- arr[0] = arrayLike[0];
- return arr;
- }
- if (isArray(arrayLike)) {
- // Source: Array
- arr = new Array((length = arrayLike.length));
- for (i = 0; i < length; ++i) arr[i] = arrayLike[i];
- return arr;
- }
- }
- arr = [];
- } else {
- // Result: Non plain array
- Context = this;
- }
-
- if (!isArray(arrayLike)) {
- if ((getIterator = arrayLike[iteratorSymbol]) !== undefined) {
- // Source: Iterator
- iterator = callable(getIterator).call(arrayLike);
- if (Context) arr = new Context();
- result = iterator.next();
- i = 0;
- while (!result.done) {
- value = mapFn ? call.call(mapFn, thisArg, result.value, i) : result.value;
- if (Context) {
- desc.value = value;
- defineProperty(arr, i, desc);
- } else {
- arr[i] = value;
- }
- result = iterator.next();
- ++i;
- }
- length = i;
- } else if (isString(arrayLike)) {
- // Source: String
- length = arrayLike.length;
- if (Context) arr = new Context();
- for (i = 0, j = 0; i < length; ++i) {
- value = arrayLike[i];
- if (i + 1 < length) {
- code = value.charCodeAt(0);
- // eslint-disable-next-line max-depth
- if (code >= 0xd800 && code <= 0xdbff) value += arrayLike[++i];
- }
- value = mapFn ? call.call(mapFn, thisArg, value, j) : value;
- if (Context) {
- desc.value = value;
- defineProperty(arr, j, desc);
- } else {
- arr[j] = value;
- }
- ++j;
- }
- length = j;
- }
- }
- if (length === undefined) {
- // Source: array or array-like
- length = toPosInt(arrayLike.length);
- if (Context) arr = new Context(length);
- for (i = 0; i < length; ++i) {
- value = mapFn ? call.call(mapFn, thisArg, arrayLike[i], i) : arrayLike[i];
- if (Context) {
- desc.value = value;
- defineProperty(arr, i, desc);
- } else {
- arr[i] = value;
- }
- }
- }
- if (Context) {
- desc.value = null;
- arr.length = length;
- }
- return arr;
- };
- return shim$1;
- }
-
- var from = isImplemented$4() ? Array.from : requireShim$1();
-
- var isImplemented = function () {
- var numberIsNaN = Number.isNaN;
- if (typeof numberIsNaN !== "function") return false;
- return !numberIsNaN({}) && numberIsNaN(NaN) && !numberIsNaN(34);
- };
-
- var shim;
- var hasRequiredShim;
-
- function requireShim () {
- if (hasRequiredShim) return shim;
- hasRequiredShim = 1;
-
- shim = function (value) {
- // eslint-disable-next-line no-self-compare
- return value !== value;
- };
- return shim;
- }
-
- var isNan = isImplemented() ? Number.isNaN : requireShim();
-
- var numberIsNaN = isNan
+ var numberIsNaN = Number.isNaN
, toPosInt = toPosInteger
, value$1 = validValue
, indexOf$1 = Array.prototype.indexOf
@@ -33349,7 +32823,7 @@
, splice = Array.prototype.splice;
// eslint-disable-next-line no-unused-vars
- var remove$1 = function (itemToRemove /*, …item*/) {
+ var remove = function (itemToRemove /*, …item*/) {
forEach.call(
arguments,
function (item) {
@@ -33364,17 +32838,14 @@
var map = { function: true, object: true };
- var isObject$1 = function (value) { return (isValue(value) && map[typeof value]) || false; };
-
- var isObject = isObject$1;
+ var isObject = function (value) { return (isValue(value) && map[typeof value]) || false; };
var validObject = function (value) {
if (!isObject(value)) throw new TypeError(value + " is not an Object");
return value;
};
- var aFrom = from
- , remove = remove$1
+ var aFrom = Array.from
, value = validObject
, d = dExports
, emit = eventEmitterExports.methods.emit
@@ -33522,16 +32993,39 @@
let template;
template = body.querySelector(":scope > template[data-ref='pagedjs-content']");
- if (!template) {
- // Otherwise create one
- template = document.createElement("template");
- template.dataset.ref = "pagedjs-content";
- template.innerHTML = body.innerHTML;
- body.innerHTML = "";
- body.appendChild(template);
+ if (template) {
+ // [PATCH: wrap-content-move] Re-entrant call: the fragment we
+ // returned previously was stashed on the marker template's
+ // `_pagedjsContent` expando (template.content stays empty under
+ // the move strategy below).
+ return template._pagedjsContent || template.content;
}
- return template.content;
+ // [PATCH: wrap-content-move] Move children into a plain
+ // DocumentFragment owned by the live document instead of round-
+ // tripping through innerHTML (serialise the entire body to a
+ // string, reparse into a template). The round-trip is O(document
+ // size) twice over; the move is one O(n) detach/attach pass with
+ // no string work.
+ //
+ // Why a plain DocumentFragment and not template.content: a
+ // template's content fragment is owned by the inert "template
+ // contents owner document", and moving live elements into
+ // it triggers adoptNode which runs the spec's "update the image
+ // data" algorithm. That resets .complete and leaves the source
+ // image in a state where later cloning into the live page wrapper
+ // doesn't synchronously cache-hit -- our sync waitForImages check
+ // then throws. A plain fragment stays in the live document so
+ // adoption is a no-op and image state is preserved.
+ let fragment = document.createDocumentFragment();
+ while (body.firstChild) {
+ fragment.appendChild(body.firstChild);
+ }
+ template = document.createElement("template");
+ template.dataset.ref = "pagedjs-content";
+ template._pagedjsContent = fragment;
+ body.appendChild(template);
+ return fragment;
}
removeStyles(doc=document) {
diff --git a/perf/README.md b/perf/README.md
index 135e873d..135d2de2 100644
--- a/perf/README.md
+++ b/perf/README.md
@@ -12,6 +12,44 @@ page count roughly quadruples the total render time.
This folder holds the tools used to investigate that.
+## Profiling `paged.browser.js`: canonical command
+
+The command we reach for whenever CPU-profiling paged.js:
+
+```
+node measure.mjs --detach-pages --no-timing --render-only --cpu-profile --cpu-sampling 100
+```
+
+(`run.bat` forwards the same args.) Flag rationale:
+
+- `--detach-pages` -- inject the shipping fix. The profile reflects
+ what production actually pays, not the old O(n^2) baseline.
+- `--no-timing` -- skip the per-page `console.log` relay from
+ `timing-handler.js`. The relay costs ~2 % of render self-time on
+ the 1638-page book and muddies the bottom-up view.
+- `--render-only` -- bail out after `PagedPolyfill.preview()`
+ returns. Skips meta extraction, `parseOutline`, `page.pdf`, and
+ the pdf-lib roundtrip / incremental writer. ~47 s saved per run
+ on the book (~55 s full -> ~8 s render-only), with no effect on
+ what the `--cpu-profile` trace captures (it already covered only
+ the render phase).
+- `--cpu-profile` -- write `render.cpuprofile` (render phase only)
+ into the timestamped `results/` folder. Open in Chrome DevTools via
+ Performance -> "Load profile...", or interrogate from the terminal
+ with `analyze-profile.mjs` / `find-callers.mjs` / `find-callees.mjs`
+ / `grep-profile.mjs`.
+- `--cpu-sampling 100` -- 100 us sampling, 10x denser than the 1 ms
+ default. Resolves frames in paged.js's sub-millisecond inner loops
+ where most remaining cost lives (see "Looking past `finalizePage`"
+ and later sections). Larger profile file in return.
+
+Drop `--render-only` whenever you need to also measure generate /
+process (e.g. confirming a fix doesn't shift cost into `page.pdf()`
+or pdf-lib), or to write `book.pdf` for behavioural verification.
+
+The rest of this README is the long-form narrative -- baseline
+findings, each landed optimisation, and the residual hotspots.
+
## The plan
The render pipeline has three phases, matching what `pagedjs-cli`
@@ -60,6 +98,7 @@ DevTools-compatible trace is a few lines.
| `detach-pages.js` | `Paged.Handler` that hides each completed page from the layout tree (registered against `finalizePage`). The fix. Injected by `--detach-pages` and by `docs/book.bat`. |
| `instrument-flush-ops.js` | Wraps `getComputedStyle`, `getBoundingClientRect`, and the `offsetWidth` / `clientWidth` / `scrollWidth` family with counters + per-call timing. Injected by `--instrument`. |
| `time-hooks.js` | Wraps every task registered to `chunker.hooks.*` and `polisher.hooks.*` with a wall-clock timer. Tells you which handler's hook method is eating render time, per page. Injected by `--time-hooks`. |
+| `instrument-clones.js` | Wraps `Layout.prototype.append` to tag every source-walker clone, then walks each finalized page at `finalizePage` counting tagged survivors. Reports total appendCalls vs. survivors and the per-page overshoot distribution -- the share of clones rolled back by `removeOverflow`. Requires a one-line `window.PagedLayout = Layout` patch near the bottom of `docs/lib/paged.browser.js` (it's a private class otherwise). Injected by `--clone-count`. |
| `incremental-pdf.mjs` | Replaces the pdf-lib load+save roundtrip with a PDF 1.7 §7.5.6 incremental update appended to Chrome's bytes. Used by `--incremental`. |
| `test-incremental.mjs` | Smoke test for `incremental-pdf.mjs`: renders a tiny probe page, runs the writer, verifies the result parses (via pdf-lib re-load) and that outline + metadata land correctly. |
| `profile-load.mjs` | Standalone profiler for `PDFDocument.load`. Runs the load on a chosen PDF with a chosen `parseSpeed`; intended to be run under `node --cpu-prof`. |
@@ -68,6 +107,9 @@ DevTools-compatible trace is a few lines.
| `compare-outlines.mjs` | Diffs two PDFs' `/Outlines` trees by `(depth, title, target page)`. Used to verify whether Chrome's native outline matches the injected one. |
| `probe-outline-exclusions.mjs` | Tests which per-element attributes / styles (aria-hidden, role=presentation, hidden, display:none, CSS bookmark-level, ...) make Chrome drop a heading from its outline. |
| `analyze-profile.mjs` | Bottom-up self-time analyzer for `.cpuprofile` files. Same shape as DevTools' Performance bottom-up view, in the terminal. |
+| `find-callers.mjs` | "Who paid for this callee's time?" -- walks a `.cpuprofile` and attributes a target function's total time back to each direct caller. Used throughout the post-mortems to detect gBCR migration between callers. |
+| `find-callees.mjs` | The other direction of `find-callers.mjs`: splits a function's self+descendant time across its direct callees. Surfaces the cases where V8 has rolled native DOM work back into the calling JS frame (Range deletion in `removeOverflow`, HTML parser in `wrapContent`). |
+| `grep-profile.mjs` | Lists every node in a `.cpuprofile` whose `functionName` matches a regex, with self-time and location. Quick check for "is this frame in the profile at all, and what's it called?" |
| `run.bat` | Windows wrapper. Installs deps on first run, then invokes `node measure.mjs`. |
| `results/` | Output, one timestamped subfolder per run. Git-ignored. |
@@ -116,6 +158,8 @@ run.bat path\to\some-other.html # explicit input
run.bat --out my-run # explicit output directory
run.bat --detach-pages # inject the detach-pages fix
run.bat --cpu-profile # CPU-profile the render phase
+run.bat --render-only # bail out after render (skip generate + process, ~47s saved)
+run.bat --clone-count # report Layout.append clones appended vs survivors per page
run.bat --instrument # count + time DOM-accessor calls
run.bat --time-hooks # per-task timing of every chunker/polisher hook
run.bat --incremental # process via incremental update instead of pdf-lib roundtrip
@@ -2437,3 +2481,1641 @@ generate. After the puppeteer 25 bump it would save less than the
earlier estimate (the 64 s -> 43 s gain made the target smaller),
but it's still the only knob with a profile target large enough to
move the wall-clock total by 5+ s.
+
+## Can we make `removeChild` cheaper?
+
+After the findRef fix, `removeChild` sits at ~12 % of render
+self-time. The detach-pages handler attribution is clean -- 1651
+detaches for 1651 pages, exactly one per page, with the only
+other removeChild callers being `filterTree` at startup (9,192
+ignorable-text-node strips totalling 2.3 ms; not a hot path).
+
+Per-call cost on the 1651-page book, with `Element.prototype.removeChild`
+wrapped to measure each call:
+
+```
+[instrument] page-detach avg: 1.009 ms/call
+[instrument] page-detach median: 0.900 ms/call
+[instrument] page-detach p90: 2.000 ms/call
+[instrument] page-detach p99: 3.000 ms/call
+[instrument] avg descendants/page: 147.7
+```
+
+That's ~5-7 us per descendant LayoutObject torn down, multiplied
+by ~150 descendants per page, multiplied by ~1651 pages = ~1.7 s
+total. The distribution is tight and scales linearly with
+descendant count -- this looks like ordinary Blink teardown work
+rather than a pathological slow path.
+
+To verify, two structural variants both tested at the same
+instrumentation harness:
+
+### Variant B: graveyard DocumentFragment
+
+Replace `parent.removeChild(page)` with
+`graveyard.appendChild(page)`, where `graveyard` is a fresh
+`DocumentFragment` held by the handler. Hypothesis: the
+move-to-out-of-document-fragment path might skip some
+LayoutObject teardown work because the destination is itself
+disconnected.
+
+| metric | A (removeChild) | B (graveyard) |
+| ------ | --------------- | ------------- |
+| avg per call | **1.009 ms** | 1.082 ms (+7 %) |
+| median | 0.900 ms | 0.900 ms |
+| p90 | 2.000 ms | 2.200 ms |
+| p99 | 3.000 ms | 3.100 ms |
+| total page wall | 1666 ms | 1785 ms |
+| render wall-clock | ~16.1 s | ~15.2 s (run-to-run noise) |
+
+The graveyard move is **slightly slower** per call. Blink tears
+down the LayoutObjects regardless of where the node lands; there's
+no fast-path for "moved to a detached parent". No win.
+
+### Variant C: `contain: layout style` on `.pagedjs_page`
+
+Inject ``
+into the document before render. Hypothesis: removing a contained
+subtree might skip style/layout invalidation propagation because
+Blink already knows the subtree didn't influence its siblings or
+parent.
+
+Also tested `contain: strict` (which adds `paint` and `size`
+containment -- pages already have explicit dimensions via @page
+CSS so this is safe).
+
+| metric | A (no contain) | C (layout style) | C-strict |
+| ------ | -------------- | ---------------- | -------- |
+| avg per call | **1.009 ms** | 1.017 ms | 0.991 ms |
+| median | 0.900 ms | 0.900 ms | 0.900 ms |
+| p90 | 2.000 ms | 1.900 ms | 1.900 ms |
+| total page wall | 1666 ms | 1678 ms | 1634 ms |
+| render wall-clock | ~16.1 s | ~15.0 s | ~14.8 s |
+
+All four runs are within ~5 % of each other on per-call cost --
+well inside the run-to-run noise band. Containment doesn't unlock
+a faster removeChild path either.
+
+### Conclusion (variants B + C)
+
+The 1.7 s of `removeChild` is intrinsic Blink LayoutObject
+teardown work. The math checks out at ~5-7 us per descendant ×
+~150 descendants × 1651 pages, and three different framings
+(plain removeChild, move-to-fragment, contain + removeChild) all
+land within ~10 % of each other. The destination of the move and
+the containment metadata don't change Blink's teardown rate.
+
+The one thing we *don't* do is "remove less per page" -- removing
+a page's content as N individual leaf removals would be strictly
+worse (N × overhead instead of 1 × overhead, same teardown total).
+Each removeChild call carries DOM-mutation, style-invalidation,
+and notify overhead beyond the per-descendant cost, so consolidating
+to one removal per page is already the optimal framing.
+
+### Variant D: don't detach at all, just `contain: strict`
+
+A natural follow-up: if the per-page cost of having siblings
+around really comes from style/selector traversal, maybe Blink
+will skip a *contained* sibling subtree even when it can't skip
+a `display: none` one. Containment is a stronger signal -- it
+explicitly tells the engine "no observable interaction crosses
+this boundary" -- so the renderer ought to be able to short-circuit
+sibling-walks more aggressively.
+
+Implementation: replace the detach handler with one that sets
+`pageElement.style.contain = 'strict'` at finalizePage and clears
+the property for every page at afterRendered (so `page.pdf()`
+serializes the right paint state).
+
+Result:
+
+| metric | current detach | variant D (contain:strict, no detach) |
+| ------ | -------------- | --------------------------------------- |
+| **render wall-clock** | **~16 s** | **89.3 s** |
+| `Page.create` gBCR | ~764 ms | **31,142 ms** |
+| `hasOverflow` gBCR | ~2,478 ms | 10,922 ms |
+| total gBCR | ~4,832 ms | 45,413 ms |
+| per-page ratio (last/first) | 1.36x | 4.11x |
+
+Worse than the README's display:none baseline (`Page.create`
+gBCR 12,947 ms / render 48.5 s). Containment metadata adds work
+to per-sibling evaluation rather than removing it. **Definitive
+no.** Containment is a hint about what's inside the box; it
+doesn't make the box invisible to neighbours.
+
+### Variant E: empty the wrapper, leave it in place
+
+A second framing of the same idea: keep the page wrapper as a
+sibling, but move its children to a stash so the wrapper itself
+is a leaf (no descendants for Blink to walk through). Restore
+the children at afterRendered. This isolates the "what costs
+what" question: does sibling-walk cost depend on descendant
+count, or just on sibling count?
+
+Implementation: at finalizePage, for the previous-finalized page
+(one behind, mirroring the keep-one-back pattern), move each
+child into an array via `wrapper.removeChild(wrapper.firstChild)`,
+set `min-height: 297mm` so the wrapper still occupies its slot,
+and stash the children. At afterRendered, restore.
+
+Result:
+
+| metric | current detach | variant E (empty wrapper) |
+| ------ | -------------- | --------------------------- |
+| **render wall-clock** | **~16 s** | **21.9 s** |
+| `Page.create` gBCR | ~764 ms | 2,628 ms (+1,864) |
+| `hasOverflow` gBCR | ~2,478 ms | 5,024 ms (+2,546) |
+| `Layout` gBCR | ~294 ms | 937 ms |
+| total gBCR | ~4,832 ms | **10,127 ms (+5,295)** |
+| `removeChild` self | 2,426 ms | **854 ms (-1,572)** |
+| per-page ratio (last/first) | 1.36x | 2.93x |
+
+The removeChild *savings* are real -- with no wrapper to tear
+down, just ~150 child removals per page at sub-microsecond each.
+But the gBCR *cost* roughly doubles because the wrappers are
+still siblings, and gBCR firings have to walk them. Net is +5 s
+render, *worse* than the current detach.
+
+This experiment yields a clean cost-model decomposition. Pulling
+the gBCR deltas apart against the wrapper-vs-content split:
+
+```
+display:none baseline (full content): gBCR(Page.create) ≈ 12,947 ms
+variant E (empty wrappers, n=1651): gBCR(Page.create) ≈ 2,628 ms
+current detach (no siblings): gBCR(Page.create) ≈ 764 ms
+```
+
+Subtracting:
+
+- (variant E - current detach) = 1,864 ms for 1,651 sibling wrappers
+ → ~1.1 us per wrapper-sibling per `Page.create` gBCR call
+- (display:none - variant E) = 10,319 ms for 1,651 × 150 ≈
+ 247,650 sibling descendants
+ → ~42 us per sibling-descendant per `Page.create` gBCR call
+
+Both wrappers and their descendants contribute to the per-call
+cost. Removing the descendants helps -- variant E really is
+substantially cheaper than display:none -- but the wrapper cost
+alone is enough to lose. To zero out both contributions you have
+to take both the wrapper and its descendants out of the sibling
+list, which is exactly what the current detach does.
+
+### Variant F: `content-visibility: hidden`, no detach
+
+The CSS spec's `content-visibility: hidden` is the closest
+property to "freeze in place without disposing" -- per spec,
+rendering work is "skipped" but cached state is preserved for
+cheap restoration. Conceptually nearer to a freeze than
+`display: none` or `contain: strict` were.
+
+Implementation: at finalizePage, set
+`pageElement.style.contentVisibility = 'hidden'` and
+`containIntrinsicSize = '210mm 297mm'` (the size hint Blink uses
+when content-visibility skips a subtree). At afterRendered,
+clear both.
+
+Result:
+
+| metric | current detach | variant F (cv:hidden) |
+| ------ | -------------- | ----------------------- |
+| **render wall-clock** | **~16 s** | **95.2 s** |
+| `Page.create` gBCR | ~764 ms | **29,656 ms** |
+| `hasOverflow` gBCR | ~2,478 ms | 17,558 ms |
+| total gBCR | ~4,832 ms | 52,899 ms |
+| per-page ratio (last/first) | 1.36x | 5.12x |
+
+Worse than every other variant. The spec's "skip rendering work"
+clause covers painting and composition; it does **not** make the
+subtree invisible to sibling-walks during style and selector
+matching that gBCR forces. Three "leave in place" properties
+(`display: none`, `contain: strict`, `content-visibility: hidden`)
+have now been tested and none of them short-circuit the
+sibling-walk.
+
+### Conclusion across all six variants
+
+| variant | render | net vs current |
+| ------- | ------ | -------------- |
+| A current (removeChild, no contain) | ~16.1 s | (baseline) |
+| B graveyard fragment | ~15.2 s | flat (noise) |
+| C `contain: layout style` + removeChild | ~15.0 s | flat (noise) |
+| C-strict `contain: strict` + removeChild | ~14.8 s | flat (noise) |
+| **D `contain: strict`, no detach** | **89.3 s** | **+73 s** |
+| **E empty wrappers, no detach** | **21.9 s** | **+5.9 s** |
+| **F `content-visibility: hidden`, no detach** | **95.2 s** | **+79 s** |
+
+The flat band (A/B/C/C-strict) is the cost-of-doing-business --
+~1 ms × 1651 pages = ~1.7 s of intrinsic Blink LayoutObject
+teardown. Variations on the framing don't move it. The
+catastrophic band (D, E) confirms that any path where the page
+wrapper stays in the live sibling list pays meaningfully more
+than the teardown cost would have been -- ~1.1 us per
+wrapper-sibling × 1651 wrappers × several gBCR call sites per
+page comes out to several seconds of extra render even when the
+wrapper is otherwise empty and contained.
+
+The 1.7 s is the bill we pay for shrinking the live DOM from
+~150 × 1651 ≈ 250k nodes back down to 2 nodes (in-flight page +
+keeper), which is what kept `Page.create`'s gBCR flat per page
+(see "Hypothesis 2: sibling sweeps over `display: none` pages"
+above). Net savings vs the display:none variant was ~22 s render;
+the 1.7 s removeChild cost is roughly 8 % of that win paid back
+to Blink for cleanup. Worth keeping.
+
+### Aside: it's not GC, and JS references don't help
+
+A reasonable follow-up question to all of this is "can we just
+hold a reference to the detached children to avoid disposal,
+or turn off GC to skip the cleanup?" Neither applies to what
+we're measuring.
+
+Chromium maintains two trees:
+
+- **DOM tree** -- `Node` objects, JS-visible, referenceable.
+- **Render tree** -- `LayoutObject` / `LayoutBox` / `LayoutText`
+ etc., Blink-internal, NOT JS-visible.
+
+`removeChild` keeps the DOM Node alive (JS reference holders --
+including the handler's `this._detached` array -- prevent
+collection). But the corresponding LayoutObject in the render
+tree is **destroyed immediately**, synchronously, at the
+removeChild call. Re-attaching via appendChild later builds a
+new LayoutObject from scratch.
+
+There is no JS-level API to keep a LayoutObject alive across
+detach + reattach. Holding DOM references doesn't change the
+render-tree lifecycle. The 1.7 s lives entirely in
+LayoutObject teardown -- which is Blink-internal C++ work
+attributed to the `removeChild` native frame in the profile,
+not to GC.
+
+V8's GC is a separate concern and isn't the bottleneck. The
+profile reads:
+
+```
+ self_ms self_% function
+ 195.21 0.89% (garbage collector)
+```
+
+~200 ms over a ~22 s render. Even if it could be disabled
+(it can't -- Node would OOM), it would barely register.
+
+The asymmetry between variants B and E makes this concrete.
+Variant B (graveyard fragment) moves the page from
+`.pagedjs_pages` to a detached DocumentFragment; variant E
+(empty wrapper) keeps the page in `.pagedjs_pages` but moves
+its children out. The fragment-move path *does* trigger
+LayoutObject teardown (you can see the 1.08 ms / call in
+variant B's instrumentation) even though the DOM Node lives on
+in a JS-visible fragment -- because the destination is itself
+not attached to the document, so there's no live render-tree
+parent. Conversely, variant E's wrapper stays in
+`.pagedjs_pages` with a live LayoutObject the whole time, so
+the wrapper's render-tree slot doesn't get torn down; only
+its child LayoutObjects do (as the children move out). The
+"keep render objects alive" idea would have to mean keeping
+the wrapper in `.pagedjs_pages` with all its children, which
+is the display:none baseline -- ~48 s render.
+
+The trade-off is therefore not "keep things alive vs. let GC
+collect them"; it's "be a live render-tree sibling vs. not".
+Anything that keeps the wrapper as a live sibling pays the
+~1.1 us per wrapper-sibling per gBCR call shown above, and the
+gBCR firings compound that into seconds across 1651 pages.
+
+## Chasing the residual `(idle)` to requestAnimationFrame
+
+A second axis of the same investigation. The post-findRef-fix
+profile showed `(idle) 735 ms (4.6 %)` -- not huge, but non-zero
+and worth understanding. `(idle)` in a V8 CPU profile means
+samples taken while the main thread had nothing scheduled --
+waiting on async/await, microtask queue settling, requestAnimationFrame
+ticks, or other browser-internal yields.
+
+### Hypothesis 1: microtask boundaries from `await Hook.trigger(...)`
+
+The chunker's per-page loop has 5-6 `await this.hooks.X.trigger(...)`
+calls per page. `Hook.trigger()` wraps every sync handler in a fresh
+Promise and returns `Promise.all(promises)`, so the caller always
+awaits a thenable -- a microtask boundary per await even when every
+handler resolved synchronously. 5 boundaries × 1651 pages ≈ 8,255
+yields; if each yield is ~85 us in V8 it lines up with the 735 ms.
+
+Patched it: `Hook.trigger()` returns `undefined` when no handler
+returned a thenable, callers do
+`let p = hook.trigger(...); if (p) await p;` to skip the await on
+the sync fast path. Patched at four hot per-page sites (3 in
+`chunker.layout`, 3 in `chunker.handleBreaks`).
+
+Result: render went **up** by ~0.35 s on a 2-run paired A/B
+(14.57 s -> 14.92 s avg). `(idle)` in the profile went **up too**
+(735 ms -> 1223 ms in absolute terms). Microtask boundaries are
+~30 us each at the JIT level; the V8 sampler at 1 ms intervals
+hardly catches them, so they show up as `(program)` rather than
+`(idle)`. The patch shaved microtask scheduling cost in the
+single-digit percent range but added a branch on every Hook.trigger
+call -- net wash, slight regression. **Reverted.**
+
+### Hypothesis 2: ResizeObserver firing per page
+
+Per page, `Page.addResizeObserver` creates a fresh `ResizeObserver`
+that fires its callback asynchronously from the compositor thread
+back to main. The callback wraps work in `requestAnimationFrame`,
+so each RO firing schedules a frame-tick wait. 1651 pages × ~0.5 ms
+per RO-rAF round-trip ≈ ~800 ms. Plausible.
+
+Two-step probe:
+1. **Skip the rAF wrap inside the RO callback**, run synchronously.
+ Result: `(idle) 902 ms`. No improvement, possibly slightly worse.
+2. **Disable the ResizeObserver entirely** (early-return in
+ `addResizeObserver`). Result: `(idle) 1,074 ms`. Still no
+ improvement.
+
+Neither helped. The RO isn't the source -- the per-page
+`addResizeObserver` overhead is real, but it doesn't show up in
+the `(idle)` bucket. Restored upstream behaviour.
+
+### Hypothesis 3: the chunker's `Queue.tick` is `requestAnimationFrame`
+
+The chunker drives its per-page work through a `Queue` class
+(`paged.browser.js:2666`). The queue's constructor sets:
+
+```js
+this.tick = requestAnimationFrame;
+```
+
+and `Queue.run()` schedules each iteration via
+`this.tick.call(window, () => { ... });`. Chunker's `render()`
+loops over `this.q.enqueue(() => this.renderAsync(renderer))`
+once per page. Every per-page iteration therefore waits one rAF
+tick before processing.
+
+`requestAnimationFrame` waits for the next animation frame. In
+headless puppeteer with no display, rAF still delivers callbacks
+on a regular cadence (Chromium's headless mode default is around
+60 Hz off-screen / ~16 ms per frame, with the scheduler often
+batching tighter than that). Either way, per-page rAF waits
+across 1651 pages add up to several hundred milliseconds of pure
+main-thread idle.
+
+The fix is one line:
+
+```js
+this.tick = (cb) => queueMicrotask(cb);
+```
+
+`queueMicrotask` schedules the callback on the microtask queue --
+runs before returning to the event loop, microsecond-scale latency
+instead of millisecond-scale. The `Queue` doesn't depend on rAF
+semantics (no paint coordination, no frame-budget yielding --
+it's just a serializer that wants to run tasks back-to-back).
+
+Verification (paired 2-run A/B, `--detach-pages`, no
+instrumentation, no cpu-profile):
+
+| run | BEFORE render | AFTER render |
+| --- | --- | --- |
+| 1 | 14.62 s | 11.86 s |
+| 2 | 14.51 s | 12.12 s |
+| **avg** | **14.57 s** | **11.99 s** |
+
+**Δ = -2.58 s render (-18 %).** Larger than the 735 ms `(idle)`
+that prompted the look -- because rAF was costing real (program)
+work too (V8 scheduler, microtask queue draining around the rAF
+boundary), not just idle wait. CPU profile of the fixed render:
+
+```
+ self_ms self_% function
+ ------- ------ ----------------------------------------------
+ 4355.74 34.75% getBoundingClientRect
+ 1935.89 15.45% removeChild
+ 1934.11 15.43% (program) (was 5872 -- down ~4 s)
+ 636.43 5.08% removeOverflow
+ -- (idle) absent from the top 10, < 130 ms (1 %)
+```
+
+`(idle)` dropped out of the top 10 (< 130 ms / 1 %), `(program)`
+dropped from 5872 ms to 1934 ms (-4 s), `removeChild` dropped
+slightly (2426 ms -> 1935 ms; smaller render = same per-call cost
+× same call count, so this is sampling artefact, not a real
+change). PDF byte size unchanged (within standard timestamp
+drift). Shipped.
+
+### What the three hypotheses together teach
+
+`(idle)` in a V8 CPU profile attribution table is **not** primarily
+microtask scheduling -- those are too fast to sample. It's
+genuinely-waiting time, where the main thread had no V8 work to do.
+The dominant source of waiting in our render was not async/await,
+not ResizeObserver coalescing, but a `requestAnimationFrame`
+buried in the chunker's task queue. Replacing it with
+`queueMicrotask` collapses the per-page wait, and additionally
+shrinks the surrounding V8 scheduler work because each rAF
+callback came with its own setup / teardown overhead.
+
+The pattern to remember: if a profile shows non-trivial `(idle)`
+in a render-style workload, hunt for explicit `requestAnimationFrame`
+/ `setTimeout` / `requestIdleCallback` calls in the hot path before
+investigating microtask machinery. The frame-paced scheduler is a
+much bigger lever than the microtask scheduler.
+
+### Follow-up: the `Queue` itself was unnecessary indirection
+
+The chunker's `render()` routes each per-page iteration through
+`this.q.enqueue(() => this.renderAsync(renderer))`. The queue's
+job is to serialize tasks -- but an async generator is already
+inherently serial (you can't call `.next()` twice in parallel).
+With the rAF-tick fix above, the queue was reduced to a
+`queueMicrotask` hop plus a Promise/deferred allocation per page,
+for no purpose.
+
+Dropped the indirection: `render()` now iterates `renderer.next()`
+directly. The `Queue` class still exists in the bundle for the
+`onOverflow` re-render path (which is rare in practice), but the
+hot per-page loop bypasses it.
+
+This is a structural simplification more than a measurable speedup
+-- the queueMicrotask hop was already cheap and the deferred
+allocation amortizes. But it removes a layer that was doing
+nothing useful for our use case, which is the point of
+maintaining a fork.
+
+## Stripping headless-irrelevant async machinery
+
+paged.js was designed to be fully usable in interactive browser
+work. The async coordination patterns it carries -- always
+returning Promises from hook triggers, awaiting microtask
+boundaries between every phase, deferring tasks via animation
+frames -- pay off when the same engine is rendering inside a
+visible page that needs to stay responsive, coordinate with the
+compositor, and tolerate handlers that load external resources.
+
+In our headless puppeteer pipeline, none of that is true:
+
+- The page is offscreen; no compositor to coordinate with.
+- We don't care if any individual page-render blocks for tens of
+ milliseconds, because the browser isn't trying to repaint.
+- Every handler we register is synchronous. No hook needs to
+ await anything.
+- The book HTML is loaded before render starts (`page.goto(url,
+ { waitUntil: "load" })`), so every image's `.complete` flag is
+ already true. No image-loading awaits ever actually wait.
+
+Each remaining async wrapper is overhead we pay for a flexibility
+we never use. We're maintaining a task-specific fork; we can keep
+peeling layers as long as the simplifications don't change observed
+output.
+
+### Phase 1: hook fast-path
+
+`Hook.trigger()` upstream always wraps sync handler results in
+`new Promise(resolve => resolve(executing))` and returns
+`Promise.all(promises)`. The chunker's per-page loop awaits each
+of `beforePageLayout`, `afterPageLayout`, and `finalizePage`. With
+all six of our registered handlers running synchronously,
+`await trigger(...)` was a no-work microtask boundary per call.
+
+Patch: `Hook.trigger()` returns `undefined` when no handler
+returned a thenable. Callers in the per-page hot path become:
+
+```js
+let _p = this.hooks.X.trigger(...);
+if (_p) await _p;
+```
+
+The microtask boundary is skipped entirely on the sync fast
+path. Patched at six per-page sites (three in `chunker.layout`,
+three in `chunker.handleBreaks`).
+
+CPU profile comparison (post-queue-tick + drop-queue baseline vs
+post-Phase-1):
+
+| metric | baseline | Phase 1 | Δ |
+| ------ | -------- | ------- | --- |
+| samples | 7,353 | 6,902 | -451 |
+| profile duration | 13.07 s | 12.22 s | **-0.85 s (-6.5 %)** |
+| `getBoundingClientRect` self | 4,622 ms | 4,273 ms | -349 ms |
+| `(program)` self | 1,873 ms | 1,874 ms | flat |
+| `removeChild` self | 1,885 ms | 1,913 ms | flat |
+| `removeOverflow` self | 592 ms | 579 ms | flat |
+| `(idle)` self | n/a (< 130 ms) | n/a (< 130 ms) | flat |
+
+The 451 fewer samples account for ~800 ms of saved CPU work.
+`getBoundingClientRect`'s self-time dropped by ~350 ms; the rest
+is distributed across many small hot spots that all shrank
+slightly because they were each preceded by fewer microtask
+yields. No new hot spot appeared.
+
+> [!NOTE]
+> We compare CPU-profile sample counts and self-times here, not
+> wall-clock. Wall-clock includes I/O variance and system load on
+> the dev machine; CPU profile sample times are independent of
+> those and more reliable for "did this actually change CPU work."
+> Wall-clock numbers from these runs are noted where useful for
+> sanity-checking but aren't the primary signal.
+
+Shipped. The fix is small (one helper change + six call-site
+edits) and removes about 8k microtask boundaries from the
+per-page hot loop on a 1651-page render.
+
+### Phase 2: sync chain end-to-end through the per-page hot path
+
+With Phase 1 in place, every per-page `await` in the chunker is
+unconditional on a function that returned a Promise even when
+nothing was actually awaitable. The structural answer is to make
+those functions plain sync functions.
+
+The chain, top to bottom of the per-page call tree:
+
+```
+chunker.*layout() (async generator → sync generator)
+ chunker.handleBreaks() (async → sync)
+ page.layout() (async → sync)
+ Layout.renderTo() (async → sync)
+ Layout.waitForImages() (async → sync, throws if not preloaded)
+chunker.render() loop (still async at the outer edge;
+ renderer.next() now sync)
+```
+
+Phase 2 converts each step. The only function that *could* have
+been genuinely async -- `waitForImages` -- is now a synchronous
+check: it walks the supplied `
` nodes and throws if any
+isn't `.complete`. In our pipeline,
+`page.goto(url, { waitUntil: "load" })` settles before paged.js
+is invoked, so every image is already loaded; the throw is a
+safety net for pipeline bugs, not a runtime path we expect to
+take.
+
+The hook triggers in the per-page hot path keep the Phase 1
+fast-path semantics but switch from
+`let _p = hook.trigger(...); if (_p) await _p;` to
+`_assertSync(hook.trigger(...), "hook-name")`. The helper throws
+if a handler ever returns a thenable -- the same safety pattern
+as `waitForImages`. None of our shipping handlers do.
+
+Dead code removed in the same pass: `Chunker.renderAsync` and
+`Chunker.renderOnIdle`, both unreachable since the drop-queue
+change above stripped their only caller. Together ~30 lines of
+async machinery that existed only to wrap the (now sync)
+`renderer.next()` call.
+
+CPU profile (Phase 1 baseline vs Phase 2):
+
+| metric | Phase 1 | Phase 2 | Δ |
+| ------ | -------- | ------- | --- |
+| samples | 6,902 | 6,948 | +46 |
+| profile duration | 12.22 s | 12.35 s | +0.13 s (noise) |
+| `getBoundingClientRect` self | 4,273 ms | 4,524 ms | +251 ms (noise) |
+| `(program)` self | 1,874 ms | 1,909 ms | +35 ms |
+| `removeChild` self | 1,913 ms | 1,883 ms | -30 ms |
+| `removeOverflow` self | 579 ms | 523 ms | -56 ms |
+
+Phase 2 sits inside the run-to-run noise band on CPU time --
+the per-call CPU cost of an `await` on an already-settled Promise
+is small (a handful of microseconds), and Phase 1 already
+eliminated most of the boundary count. **What Phase 2 buys is
+not measurable CPU time -- it's structural simplicity.**
+
+Code shape, before and after:
+
+- 6 fewer `async` keywords on hot-path methods.
+- 13 fewer `await` keywords removed from the bodies of those
+ methods (the per-page chain no longer threads `await` through
+ any of its layers).
+- One async generator (`async *layout`) → sync generator
+ (`*layout`).
+- Two dead methods removed (`renderAsync`, `renderOnIdle`).
+- Two `_assertSync` guards added at the chunker's hook call
+ sites + one at `waitForImages` -- the contract we now rely on
+ (per-page handlers all synchronous, every `
` preloaded)
+ is enforced at runtime with a useful error message.
+
+PDF output is **byte-identical** to the Phase 1 build on this
+content (`async-phase1/book.pdf` and `async-phase2/book.pdf`
+both 16,893,546 bytes -- a rare 0-byte timestamp drift, but
+the structural content is identical regardless).
+
+This is the kind of cleanup that's only worth doing because
+we maintain a task-specific fork of the bundle. Upstream
+paged.js has to support handlers that await fetches or image
+loads or font measurements -- our pipeline never registers one.
+Removing the async machinery in our copy shrinks the surface to
+reason about and makes the data-flow direct: a render is a
+plain function call that produces a plain return value.
+
+### What's still async, and why
+
+The async machinery that survives this audit is now at the
+once-per-render layer, where it's load-bearing:
+
+- `Chunker.flow()` is async because `loadFonts()` waits on the
+ CSS font-face descriptor's load promise, which is actually
+ async and OS-level.
+- `Chunker.render()` stays `async` as a thin wrapper so callers
+ in `flow()` can `await` it (the alternative would be to
+ remove `async` and have `flow()` not await it, but the call
+ site reads more clearly with the `await` retained).
+- `beforeParsed`, `afterParsed`, `afterRendered` hooks are still
+ awaited with the `await hook.trigger(...)` form because they
+ fire once per render and the overhead is irrelevant.
+- The `onOverflow` recovery path (`Chunker.q.enqueue(async ...)`)
+ re-renders the document if any page overflows after paint. In
+ practice this never fires for our content, but keeping the
+ recovery code intact costs nothing and preserves behaviour for
+ edge cases.
+
+The hot per-page path is now `function`, `function*`, plain
+return values, and a `while` loop. Future work that touches
+this code can reason about it as straight-line synchronous
+flow.
+
+## Doing less work in `Layout.append()`
+
+Picking the next hotspot after the async cleanup, BreakToken
+JSON, gBCR wrapper inline, and UUID-counter changes had all
+landed. Fresh profile from a clean baseline at 100us sampling
+(V8 effectively clamped this to ~543us/sample on this Node/
+Chromium build), `--no-timing --detach-pages`, render-only:
+
+```
+ self_ms self_% function @ source
+ ------- ------ --------------------------------------------------
+ 4825.28 38.22% getBoundingClientRect (native)
+ 2021.89 16.02% (program) (native)
+ 1954.01 15.48% removeChild (native)
+ 635.95 5.04% removeOverflow paged.browser.js
+ 288.38 2.28% wrapContent paged.browser.js
+ 255.25 2.02% insertBefore (native)
+ 227.01 1.80% appendChild (native)
+ 164.01 1.30% findOverflow paged.browser.js
+ 140.66 1.11% (garbage collector) (native)
+ 138.49 1.10% afterPageLayout paged.browser.js (Splits)
+ 129.25 1.02% cloneNode (native)
+ 125.99 1.00% addRefs paged.browser.js
+ 90.15 0.71% renderTo paged.browser.js
+ 81.46 0.65% filterTree paged.browser.js
+ 80.92 0.64% importNode (native)
+ 80.38 0.64% setAttribute (native)
+ 72.77 0.58% append paged.browser.js
+ ...
+```
+
+The four heavy hitters are unchanged from earlier reports.
+`Layout.append` itself shows only 73 ms of self-time, but
+inclusively it owns a large fraction of the per-source-node
+work: `cloneNode`, `appendChild`/`insertBefore`, the
+`findElement` chain (`querySelector` + `getAttribute`), the
+`renderNode` hook dispatch, and `rebuildAncestors` at page
+boundaries all flow through it. With ~100k+ source-node
+clones per render, anything per-call adds up.
+
+Reading the body of `append()`, three things stood out as
+potentially-reducible:
+
+1. The `renderNode` hook dispatch fires for every cloned
+ node. Even if no handler is registered, `triggerSync`
+ still allocates a results array, runs `this.hooks.forEach`
+ over zero entries, and returns the empty array; the
+ caller then runs its own `.forEach` over that empty array.
+2. The `findElement(node.parentNode, dest)` lookup goes
+ through `getAttribute("data-ref")` on the parent. The
+ ref is also set on every source element at decoration
+ time, so the value could be stashed on a plain JS expando.
+3. `clone.dataset.ref` is read a second time at the end of
+ `append()` to register the clone in `dest.indexOfRefs`.
+ Same expando trick applies.
+
+Following the (1) thread first uncovered two separable wins:
+a bug inside the only registered `renderNode` handler, and
+the broader empty-handlers dispatch overhead.
+
+### `Footnotes.renderNode`: always-truthy NodeList condition
+
+The grep for `renderNode` method definitions in the bundle
+returns exactly one match: `Footnotes.renderNode` (in the
+package's footnotes-handling class). Every `append()` call
+goes through it. Its body:
+
+```js
+renderNode(node) {
+ if (node.nodeType == 1) {
+ let notes;
+ if (!node.dataset) return;
+
+ if (node.dataset.note === "footnote") {
+ notes = [node];
+ } else if (node.dataset.hasNotes ||
+ node.querySelectorAll("[data-note='footnote']")) {
+ notes = node.querySelectorAll("[data-note='footnote']");
+ }
+
+ if (notes && notes.length) {
+ this.findVisibleFootnotes(notes, node);
+ }
+ }
+}
+```
+
+The `else if` condition has an upstream bug: a `NodeList` is
+always truthy (even an empty one -- it's an object), so when
+`dataset.hasNotes` is undefined the right arm of the `||`
+runs `querySelectorAll`, the condition evaluates true, and
+the next line then runs `querySelectorAll` **a second time**.
+Two subtree scans per element-node clone, for any document
+that doesn't author `data-note='footnote'` directly.
+
+`grep -c 'data-note' docs/_site-pdf/book.html` returns 0 --
+every one of those scans on every clone of every page of
+the book was dead work.
+
+The fix narrows the `else if` to the original intent:
+
+```js
+} else if (node.dataset.hasNotes) {
+ notes = node.querySelectorAll("[data-note='footnote']");
+}
+```
+
+Profile delta (post-tojson baseline vs surgical fix):
+
+| metric | baseline | post-fix | Δ |
+| ------ | -------- | -------- | --- |
+| render wall | 12.63 s | 12.63 s | flat (within noise) |
+| `querySelectorAll` self | 67.9 ms | 52.8 ms | -15 ms |
+| samples | 23,313 | 23,250 | -63 |
+
+A small saving in absolute terms: most of the eliminated
+`querySelectorAll` calls were against tiny leaf subtrees
+that terminate in microseconds when no matches are present.
+The bug fix is upstream-clean and correct; the perf-relevant
+takeaway was that *most* of the work `append()` pays for the
+`renderNode` hook is in the dispatch wrapping the handler,
+not in the handler's body. That motivated (2).
+
+### `Hook.triggerSync` empty-handlers fast-path
+
+Mirrors the README's earlier "Phase 1: hook fast-path" for
+the async `trigger()` path. `Hook.triggerSync` previously:
+
+```js
+triggerSync() {
+ var args = arguments;
+ var context = this.context;
+ var results = [];
+ this.hooks.forEach(function (task) {
+ var executing = task.apply(context, args);
+ results.push(executing);
+ });
+ return results;
+}
+```
+
+…and the four reducer call sites in `Layout` always did:
+
+```js
+let r = this.hooks.X.triggerSync(...);
+r.forEach((newVal) => { if (newVal !== undefined) target = newVal; });
+```
+
+Walking the bundle to see which of those four hook arrays
+are actually populated in our build:
+
+| call site | hook | handlers registered |
+| --------- | ---- | ------------------- |
+| `breakAt` (line 1551) | `onBreakToken` | 0 |
+| `append` (line 1640) | `renderNode` | 1 (`Footnotes`) |
+| `findBreakToken` (line 1805) | `onOverflow` | 0 |
+| `findBreakToken` (line 1815) | `onBreakToken` | 0 |
+| `Chunker.flow` (line 2910) | `filter` | 4 |
+
+Three of the four hot sites are dispatching against an empty
+handler array every call. `onOverflow` and the two
+`onBreakToken` sites all fire from the per-page break-
+detection path, which can run more than once per page when
+overflow-and-retry happens.
+
+Patch: `triggerSync` returns `undefined` on the empty path,
+callers guard their reducer `forEach` with a truthy check.
+
+```js
+triggerSync() {
+ if (this.hooks.length === 0) return undefined;
+ // ...existing body
+}
+```
+
+```js
+let r = this.hooks.X.triggerSync(...);
+if (r) r.forEach((newVal) => { ... });
+```
+
+Profile delta (post-surgical vs post-fast-path):
+
+| metric | post-surgical | post-fast-path | Δ |
+| ------ | ------------- | -------------- | --- |
+| render wall | 12.63 s | **12.14 s** | **-0.49 s** |
+| samples | 23,250 | 22,433 | -817 |
+| `getBoundingClientRect` self | 4,819 ms | 4,714 ms | -105 ms |
+| `removeChild` self | 1,962 ms | 1,902 ms | -60 ms |
+| `removeOverflow` self | 634 ms | 552 ms | -82 ms |
+| `querySelectorAll` self | 52.8 ms | 43.4 ms | -10 ms |
+
+The wall-clock drop (~490 ms) and sample drop (817 × 542 us
+≈ 443 ms) line up cleanly, so the saving is real, not run-
+to-run noise. The reductions spread across rows because the
+per-call cost of an empty `triggerSync` -- an array alloc, a
+forEach over zero entries, a return, and the caller's own
+forEach over the returned `[]` -- creates pressure on the
+allocator and the V8 inliner that compounds on the per-page
+hot path even though no single line attributes the cost.
+
+The `renderNode` site at line 1640 does **not** hit the fast
+path in this build -- `Footnotes` still occupies it with one
+handler, so `hooks.length === 1` and the body runs as
+before. The savings come entirely from the three zero-
+handler sites.
+
+### `Footnotes` self-disables when no footnotes are in source
+
+That left the per-element `Footnotes.renderNode` dispatch
+still firing on every cloned node, plus four other hook
+methods `Footnotes` registers via the `Handler` base auto-
+wiring. Inventory of what `Footnotes` is doing on a render
+with zero footnote-marked nodes:
+
+| method | fires | what it does on a footnote-free doc |
+| ------ | ----- | ----------------------------------- |
+| `onDeclaration` | per CSS declaration | quick property-name checks. Cheap. |
+| `renderNode` | per element-node clone | short-circuits after surgical fix. |
+| `beforePageLayout` | once per page | checks `this.needsLayout.length` (always 0). Cheap. |
+| `afterPageLayout` | once per page | **3 `querySelector`s + `getBoundingClientRect` + `new Layout(...)` (which does 2 more `getBoundingClientRect`s + `getComputedStyle` in its constructor) + `findOverflow()` on the footnote-inner-content area.** Real work. |
+| `afterOverflowRemoved` | per overflow detection | `querySelectorAll` returning empty. Cheap-ish. |
+
+The big hidden cost was `afterPageLayout` -- ~1,650 calls per
+render, each measuring an empty footnote area through several
+DOM ops and constructing a transient `Layout` instance whose
+constructor itself does multiple gBCRs.
+
+The detect-and-disable plan:
+
+1. Footnotes is the *only* registrant for each of its hook
+ methods (`onDeclaration` aside -- it's a polisher-time
+ hook with other registrants, but it's also cheap).
+2. By the time `afterParsed` fires, both the CSS-driven
+ selectors (populated by `onDeclaration` calls into
+ `this.footnotes`) and any source-HTML `data-note` markers
+ are accounted for. `Footnotes.afterParsed` already runs
+ `processFootnotes(parsed, this.footnotes)` which writes
+ `data-note='footnote'` on any element matching a CSS
+ selector. So a single `parsed.querySelector(
+ "[data-note='footnote']")` at the end of that pass is
+ conclusive.
+3. If null, splice `Footnotes`'s bound functions back out
+ of each hook array. With the empty-handlers fast-path
+ from (2) already landed, the per-page and per-node
+ dispatches then return `undefined` immediately and
+ callers skip their reducer `forEach`.
+
+To enable (3), the `Handler` base class gets a small
+addition: each `(hook, bound)` pair from auto-registration
+is stashed under its hook name on `this._registered`, and a
+new `_unregisterAll(except)` method splices each entry back
+out. The `except` argument lets the caller skip the hook
+it's currently inside (`afterParsed` in this case) --
+splicing the array we're iterating would cause the
+surrounding `trigger()` loop to skip a sibling handler.
+The skipped entry stays in `this._registered` forever, but
+it's a one-shot anyway: harmless.
+
+`Footnotes.afterParsed` then becomes:
+
+```js
+afterParsed(parsed) {
+ this.processFootnotes(parsed, this.footnotes);
+ if (!parsed.querySelector("[data-note='footnote']")) {
+ this._unregisterAll("afterParsed");
+ }
+}
+```
+
+Profile delta (post-fast-path vs post-self-disable):
+
+| metric | post-fast-path | post-self-disable | Δ |
+| ------ | -------------- | ----------------- | --- |
+| render wall | 12.14 s | **11.77 s** | **-0.37 s** |
+| samples | 22,433 | 21,809 | -624 |
+| **`getBoundingClientRect` self** | **4,714 ms** | **4,198 ms** | **-516 ms** |
+| `removeChild` self | 1,902 ms | 1,898 ms | flat |
+| `(program)` self | 2,022 ms | 2,198 ms | +176 ms |
+| `append` self | 76 ms | 69 ms | -7 ms |
+
+The 516 ms `getBoundingClientRect` drop is exactly the
+`Footnotes.afterPageLayout` cost that the inventory
+predicted -- one gBCR on `noteContent` plus two more in
+the `new Layout(noteArea, ...)` constructor plus internal
+gBCRs from `findOverflow()`, multiplied by ~1,650 pages.
+The `(program)` row growing by 176 ms is V8 reattributing
+work between native and self-time as the dispatch pattern
+changes; not new work, just a different breakdown.
+
+PDF output remained byte-identical to the previous build
+on this content (16.1 MB, same checksum on the raw
+Chromium output).
+
+### `Layout.append` parent-lookup cache
+
+When the source walker emits consecutive children of the
+same parent, `findElement(node.parentNode, dest)` in
+`append()` gets called repeatedly with the same input.
+For a parent with N children that's N - 1 redundant
+lookups -- each one cheap (`getAttribute("data-ref")` +
+`dest.indexOfRefs[ref]` is an O(1) dict hit on the fast
+path), but the call count is north of 100k per render.
+
+Patch: a three-property memo on `Layout` -- last
+`srcParent`, last `dest`, last `destParent`. Hit check at
+the top of `append`, writeback at the bottom after the
+parent is resolved (whether via direct lookup or via the
+rebuild-ancestors branch, since the rebuild attaches the
+cloned ancestor into `dest`).
+
+Invalidation: reset all three at the top of every
+`renderTo`. The cache is safe within a single `renderTo`
+loop because `append()` never detaches DOM from `dest`,
+and `removeOverflow` (the one thing that does) only fires
+at loop exit. Across `renderTo` calls on the same `Layout`
+instance the previous run's `removeOverflow` may have
+detached the cached parent, so the explicit reset is the
+correctness guard.
+
+Profile delta (post-self-disable vs post-parent-cache):
+
+| metric | post-self-disable | post-parent-cache | Δ |
+| ------ | ----------------- | ----------------- | --- |
+| render wall | 11.77 s | 11.72 s | flat (within noise) |
+| samples | 21,809 | 21,688 | -121 (~65 ms) |
+| `(program)` self | 2,198 ms | 2,169 ms | -29 ms |
+| `getAttribute` (native) | 43 ms | off-list (<40 ms) | -3 ms+ |
+| `querySelector` (native) | 63 ms | 59 ms | -4 ms |
+| `Layout.append` self | 69 ms | 70 ms | flat |
+
+Order ~50-100 ms saved depending on the row chosen, fully
+below the run-to-run wall-clock noise band but visible in
+the cpuprofile rows. The math checks: ~100k append calls
+× ~80 % sibling-cache-hit rate × ~1 us per skipped
+findElement ≈ 80 ms.
+
+PDF output byte-identical.
+
+### What didn't land: the `_ref` expando
+
+One sibling candidate to the parent-lookup cache was
+tried and reverted. The idea: mirror `data-ref` onto a
+plain JS property `_ref` at decoration time (in
+`ContentParser.addRefs`), propagate via the `cloneNode`
+helper, and read it in `findElement` and `append`'s
+postlude instead of `getAttribute("data-ref")` /
+`clone.dataset.ref`. Both reads in the hot path become
+plain JS property loads instead of going through C++ DOM
+attribute fetches or the `DOMStringMap` proxy.
+
+Measured win on the per-row breakdown:
+
+- `Layout.append` self 69 -> 47 ms (-22 ms).
+- `getAttribute` native 43 ms -> off-list (-3+ ms).
+
+About 25 ms of real per-call work removed. Reverted: the
+saving is genuinely smaller than the diff's surface --
+`cloneNode` helper has to propagate an extra property,
+the `data-ref` attribute has to stay for CSS selectors
+and the `querySelector` fallback in `findRef`, `findElement`
+needs a `||` fallback to keep direct `.cloneNode()`
+callers in `rebuildAncestors` working unchanged, and any
+future code that wants the ref has two places it could
+read from. Not worth maintaining for a saving that
+doesn't move single-run wall-clock.
+
+Lesson worth carrying forward: at this point in the
+codebase, per-call findElement / `dataset.ref` work has
+been ground down close enough to its floor that any
+further shave produces savings in the 20-50 ms band, well
+below the run-to-run wall-clock noise on this machine.
+Reading the cpuprofile per-row deltas is the only way to
+tell whether such a change is genuine; reading wall-clock
+isn't. And the bar for landing scales with the size of
+the diff -- the parent-cache landed because it's three
+property writes and one branch; the expando didn't
+because it's a propagation pattern that ripples through
+the bundle.
+
+### Cumulative effect
+
+Across all four landings:
+
+| metric | pre-investigation | post-parent-cache | Δ |
+| ------ | ----------------- | ----------------- | --- |
+| render wall | 12.63 s | 11.72 s | **-0.91 s (-7.2 %)** |
+| samples | 23,313 | 21,688 | -1,625 |
+| `getBoundingClientRect` self | 4,825 ms | 4,194 ms | -631 ms |
+| `removeChild` self | 1,954 ms | 1,897 ms | -57 ms |
+| `removeOverflow` self | 636 ms | 583 ms | -53 ms |
+| `getAttribute` (native) | ~125 ms* | off-list (<40 ms) | -85 ms+ |
+
+\* Inferred from the post-tojson baseline rank; not
+explicitly tabulated in the top-25 cut at that time.
+
+The `Handler._registered` + `_unregisterAll(except)` plumbing
+is reusable: any future handler that determines at
+parse/decoration time that it has nothing to do for a given
+render can self-disable the same way, and the
+empty-handlers fast-path will swallow the per-call dispatch
+cost for free. That's the pattern this work leaves behind --
+combine "detect once at a known-quiet point" with "remove
+yourself from the dispatch chain" and you pay zero
+ongoing cost for inactive handlers.
+
+## Skipping the `wrapContent` innerHTML round-trip
+
+The post-append-cache profile's 5th-largest JS row was
+`wrapContent` at 260 ms. It's called once per render, right
+at the top of `Chunker.flow`, so unlike the previous fixes it
+has no per-page hot path -- the absolute size is the whole
+story.
+
+`Layout.wrapContent` lifts the entire `
` elements have no `data-break-before` and no
+`data-previous-break-after`, so the fire is via
+`needsPageBreak(node, previousNode)` -- which checks
+whether `node`'s effective `data-page` differs from
+`previousNode`'s.
+
+`previousNode` is computed via
+`nodeBefore(node, limiter)`, which walks
+`node.previousSibling` then climbs via `parentNode` if
+no significant sibling exists. In the move model, after
+the previous yield was moved out of source, the current
+yield's `previousSibling` is `null` (the previous one no
+longer lives in source). The climb continues up:
+FAQ article (no `data-page`) -> looks at its previous
+sibling -> finds the **part-divider article** sitting
+right before the FAQ article in source, which DOES carry
+`data-page="divider"` (set by processBreaks for the CSS
+`page: divider;` rule on `article.part-divider`).
+
+So `needsPageBreak` saw a transition from
+`page="divider"` to (effectively) no page, fired true,
+and the chunker started a fresh page for every paragraph
+in the FAQ section. The chapter article's normal
+"siblings share the same effective page-name" property
+broke because the sibling-walk now escapes the chapter
+into the prior part-divider.
+
+### Fix: track previousLeaf in renderTo
+
+The chunker already knows the right answer: the last
+leaf it actually appended this page. Threaded through
+`shouldBreak` as a third argument, used by the
+`needsPageBreak` branch only (`needsBreakBefore` and the
+`parentBreakBefore` logic still use `nodeBefore`):
+
+```js
+let _moveLastLeaf = null;
+// ... in the loop ...
+if (hasRenderedContent &&
+ this.shouldBreak(node, start, _moveLastLeaf)) { ... }
+// ... after append ...
+if (!shallow) _moveLastLeaf = node;
+```
+
+In `shouldBreak`:
+
+```js
+let pageBreakRef = previousLeaf || nodeBefore(node, limiter);
+return ... || needsPageBreak(node, pageBreakRef);
+```
+
+With that, page count went 1740 -> 1653 (within 2 of
+baseline) and per-page content matched. PDF
+byte-equivalent to baseline within timestamp drift.
+
+### Profile diff
+
+Both runs `--detach-pages --cpu-profile --cpu-sampling
+100`, sample-time absolute, single run each (wall-clock
+on this machine is too noisy to be a useful signal --
+see "Methodology: compare profiles, not wall-clock"
+above):
+
+| function | baseline | move | Δ |
+| --- | --- | --- | --- |
+| `getBoundingClientRect` | 3539 ms | 4036 ms | **+497** |
+| `appendChild` | 137 ms | 390 ms | **+253** |
+| `restoreOverflow` (new) | -- | 168 ms | +168 |
+| `removeChild` | 1536 ms | 1635 ms | +99 |
+| `insertBefore` | <50 ms | 87 ms | ~+87 |
+| `getNodeWithNamedPage` | <50 ms | 108 ms | ~+85 |
+| `afterPageLayout` (AtPage) | 105 ms | 182 ms | +77 |
+| `(program)` | 2196 ms | 2266 ms | +70 |
+| `Layout` ctor | 23 ms | 31 ms | +8 |
+| `cloneNode` | 146 ms | <130 ms | **-146** |
+| `removeOverflow` | 124 ms | -- (replaced) | -124 |
+| **samples** | **17,481** | **19,590** | **+2,109** |
+| **CPU work** | **9.48 s** | **10.74 s** | **+1.26 s** |
+
+Net **+1.26 s of CPU work** -- the change is a clear
+regression in the opposite direction from the prediction.
+
+### Why the prediction was wrong
+
+The cloneNode self-time saving (-146 ms) shows up as
+expected, but three structural costs dwarf it:
+
+1. **`appendChild` on an attached node is roughly 2x
+ the cost of `appendChild` on a fresh clone (+253 ms).**
+ A move is internally detach-from-source-parent +
+ attach-to-dest-parent; both touch Blink's child-list
+ bookkeeping. cloneNode produces an unparented node,
+ so the subsequent attach is one-sided. Intrinsic to
+ any move-based design -- no implementation choice
+ avoids it.
+
+2. **Each move dirties Blink's layout state more than
+ each clone does, distributing cost into gBCR
+ (+497 ms).** The increase is spread across every
+ gBCR call site -- `Page.create` (+225 ms),
+ `hasOverflow` (+152 ms), `Layout` ctor (+58 ms),
+ `afterPageLayout` (+31 ms), `addResizeObserver`
+ (+31 ms) -- not localized to any new code. Each
+ gBCR call flushes pending mutations; with every move
+ counting as two mutations vs one for clone+append,
+ each flush has more to do. Same migration pattern
+ the README's "Attempt B: memoize `Page.create`'s gBCR"
+ documented above -- DOM mutation cost doesn't go
+ away by elimination, it migrates to whichever frame
+ next forces a layout flush.
+
+3. **The extract-and-restore cycle adds ~340 ms of new
+ JS work.** `restoreOverflow` (168 ms) builds an
+ `extractContents` fragment + walks it for leaves +
+ inserts each back into source. `previousLeaf` makes
+ `shouldBreak` call `getNodeWithNamedPage` (108 ms)
+ on every leaf yield (it climbs parent chains looking
+ for `data-page`). `insertBefore` (87 ms) is the
+ per-restore reinsertion.
+
+The deeper structural reason: paged.js's break-and-
+resume model touches each source leaf O(pages-spanning-
+that-leaf) times in the move model -- moved into page N,
+extracted to the fragment, reinserted into source,
+moved into page N+1. Each touch is a DOM mutation. The
+clone model touches each node O(1) times -- allocated
+once, attached, thrown away with the page. Cumulative
+mutation count is structurally higher under moves.
+
+The cloneNode time the profile attributes to its native
+frame is just the *allocator* portion of cloning work --
+not the total cost of "duplicating a subtree". The rest
+hides in V8 / Blink native frames not labeled
+`cloneNode`, and that rest doesn't disappear when you
+switch to moves; it shows up as appendChild +
+invalidation cost instead.
+
+### Where this leaves the picture
+
+Reverted. The cumulative table from the previous
+section is unchanged. No row added.
+
+The pattern this attempt taught is the inverse of the
+"distributed savings often exceed direct estimates"
+heuristic the README documents elsewhere: sometimes a
+change with a direct cost saving has bigger distributed
+*regressions* that aren't visible until you measure.
+The cloneNode saving was real; the appendChild + gBCR +
+restoreOverflow overhead was bigger.
+
+The only design that would avoid all three costs is one
+that never re-moves the same node -- a single-pass
+paginator with no break-and-resume. That's not paged.js;
+it's a different algorithm. Not a small refactor.
+
+The buffer variant (pre-clone source once at startup,
+move from buffer to dest) was considered and not
+prototyped: it'd shift the cloneNode allocation cost to
+one big startup call but every per-page move would
+still hit the same appendChild + gBCR dynamic that ate
+the savings here. No structural win.
+
+This experiment also clarifies why the "Profiling
+pdf-lib's load" and "Findings: removeChild" sections
+saw allocation savings show up as wall-clock gains:
+those operations didn't have a Blink layout-tree
+mutation step downstream. Mutations are where the cost
+that *looks* like JS allocation actually lives in this
+codebase.
diff --git a/perf/analyze-heap-profile.mjs b/perf/analyze-heap-profile.mjs
new file mode 100644
index 00000000..b8d17c94
--- /dev/null
+++ b/perf/analyze-heap-profile.mjs
@@ -0,0 +1,84 @@
+// Bottom-up heap sampling profile analyzer.
+//
+// Reads a V8 .heapprofile (the JSON returned by CDP's
+// HeapProfiler.stopSampling) and prints the top allocation sites by
+// self-bytes, aggregated by (function name + source location). Same
+// shape as Chrome DevTools' Memory tab "Allocation sampling"
+// bottom-up view, but in the terminal.
+//
+// Usage:
+// node analyze-heap-profile.mjs